From a6d417113ba1c25eaf00f8b67e62eceba9fd7da0 Mon Sep 17 00:00:00 2001 From: Nick Liu Date: Wed, 11 Jun 2025 12:10:59 -0500 Subject: [PATCH 1/5] fix(syncoid): Rework snap/bookmark fetching with feature detection The original snapshot fetching relied on a complex state-dependent `getsnaps()` subroutine with a separate `getsnapsfallback()` for older ZFS versions. The first refactor attempt in #818 simplified this but introduced performance regressions by using `zfs get all`, which was inefficient for large datasets. This commit avoids that overhead by integrating proactive `zfs get` feature detection through a new `check_zfs_get_features()` subroutine that determines the command's capabilities by testing for `-t` (type filter) support and the availability of the `createtxg` property. Results are cached per host to avoid redundant checks. `check_zfs_get_features()` came from the #931, which this change supersedes. The `getsnaps()` and `getbookmarks()` subroutines now use this information to build optimized `zfs get` commands that query only necessary properties. As before in #818, the parsing logic is refactored to populate property hashes for each item, eliminating the old multi-loop state-dependent approach and the need for mostly duplicated fallback logic. This resolves both the original complexity and the performance issues from the first attempted fix. Now there is a foundation for fixing the snapshot ordering bug reported in #815. --- syncoid | 263 ++++++++++++++++++++++++-------------------------------- 1 file changed, 112 insertions(+), 151 deletions(-) diff --git a/syncoid b/syncoid index 956f3e7..7a6d6d1 100755 --- a/syncoid +++ b/syncoid @@ -194,6 +194,9 @@ if (length $args{'insecure-direct-connection'}) { # warn user of anything missing, then continue with sync. my %avail = checkcommands(); +# host => { supports_type_filter => 1/0, supported_properties => ['guid', 'creation', ...] } +my %host_zfs_get_features; + my %snaps; my $exitcode = 0; @@ -1392,6 +1395,47 @@ sub checkcommands { return %avail; } +sub check_zfs_get_features { + my ($rhost, $mysudocmd, $zfscmd) = @_; + my $host = $rhost ? (split(/\s+/, $rhost))[-1] : "localhost"; + + return $host_zfs_get_features{$host} if exists $host_zfs_get_features{$host}; + + writelog('DEBUG', "Checking `zfs get` features on host \"$host\"..."); + + $host_zfs_get_features{$host} = { + supports_type_filter => 0, + supported_properties => ['guid', 'creation'] + }; + + my $check_t_option_cmd = "$rhost $mysudocmd $zfscmd get -H -t snapshot '' ''"; + open my $fh_t, "$check_t_option_cmd 2>&1 |"; + my $output_t = <$fh_t>; + close $fh_t; + + if ($output_t !~ /^\Qinvalid option\E/) { + $host_zfs_get_features{$host}->{supports_type_filter} = 1; + } + + writelog('DEBUG', "Host \"$host\" has `zfs get -t`?: $host_zfs_get_features{$host}->{supports_type_filter}"); + + my @properties_to_check = ('createtxg'); + foreach my $prop (@properties_to_check) { + my $check_prop_cmd = "$rhost $mysudocmd $zfscmd get -H $prop ''"; + open my $fh_p, "$check_prop_cmd 2>&1 |"; + my $output_p = <$fh_p>; + close $fh_p; + + if ($output_p !~ /^\Qbad property list: invalid property\E/) { + push @{$host_zfs_get_features{$host}->{supported_properties}}, $prop; + } + } + + writelog('DEBUG', "Host \"$host\" ZFS properties: @{$host_zfs_get_features{$host}->{supported_properties}}"); + + return $host_zfs_get_features{$host}; +} + sub iszfsbusy { my ($rhost,$fs,$isroot) = @_; if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } @@ -1860,21 +1904,30 @@ sub dumphash() { writelog('INFO', Dumper($hash)); } -sub getsnaps() { +sub getsnaps { my ($type,$rhost,$fs,$isroot,%snaps) = @_; my $mysudocmd; my $fsescaped = escapeshellparam($fs); if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - my $rhostOriginal = $rhost; - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; # double escaping needed $fsescaped = escapeshellparam($fsescaped); } - my $getsnapcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 -t snapshot guid,creation $fsescaped"; + my $host_features = check_zfs_get_features($rhost, $mysudocmd, $zfscmd); + + my @properties = @{$host_features->{supported_properties}}; + my $type_filter = ""; + if ($host_features->{supports_type_filter}) { + $type_filter = "-t snapshot"; + } else { + push @properties, 'type'; + } + my $properties_string = join(',', @properties); + my $getsnapcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 $type_filter $properties_string $fsescaped"; + if ($debug) { $getsnapcmd = "$getsnapcmd |"; writelog('DEBUG', "getting list of snapshots on $fs using $getsnapcmd..."); @@ -1883,142 +1936,48 @@ sub getsnaps() { } open FH, $getsnapcmd; my @rawsnaps = ; - close FH or do { - # fallback (solaris for example doesn't support the -t option) - return getsnapsfallback($type,$rhostOriginal,$fs,$isroot,%snaps); - }; - - # this is a little obnoxious. get guid,creation returns guid,creation on two separate lines - # as though each were an entirely separate get command. - - my %creationtimes=(); - - foreach my $line (@rawsnaps) { - $line =~ /\Q$fs\E\@(\S*)/; - my $snapname = $1; - - if (!snapisincluded($snapname)) { next; } - - # only import snap guids from the specified filesystem - if ($line =~ /\Q$fs\E\@.*\tguid/) { - chomp $line; - my $guid = $line; - $guid =~ s/^.*\tguid\t*(\d*).*/$1/; - my $snap = $line; - $snap =~ s/^.*\@(.*)\tguid.*$/$1/; - $snaps{$type}{$snap}{'guid'}=$guid; - } - # only import snap creations from the specified filesystem - elsif ($line =~ /\Q$fs\E\@.*\tcreation/) { - chomp $line; - my $creation = $line; - $creation =~ s/^.*\tcreation\t*(\d*).*/$1/; - my $snap = $line; - $snap =~ s/^.*\@(.*)\tcreation.*$/$1/; - - # the accuracy of the creation timestamp is only for a second, but - # snapshots in the same second are highly likely. The list command - # has an ordered output so we append another three digit running number - # to the creation timestamp and make sure those are ordered correctly - # for snapshot with the same creation timestamp - my $counter = 0; - my $creationsuffix; - while ($counter < 999) { - $creationsuffix = sprintf("%s%03d", $creation, $counter); - if (!defined $creationtimes{$creationsuffix}) { - $creationtimes{$creationsuffix} = 1; - last; - } - $counter += 1; - } - - $snaps{$type}{$snap}{'creation'}=$creationsuffix; - } - } - - return %snaps; -} - -sub getsnapsfallback() { - # fallback (solaris for example doesn't support the -t option) - my ($type,$rhost,$fs,$isroot,%snaps) = @_; - my $mysudocmd; - my $fsescaped = escapeshellparam($fs); - if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - - if ($rhost ne '') { - $rhost = "$sshcmd $rhost"; - # double escaping needed - $fsescaped = escapeshellparam($fsescaped); - } - - my $getsnapcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 type,guid,creation $fsescaped |"; - writelog('WARN', "snapshot listing failed, trying fallback command"); - writelog('DEBUG', "FALLBACK, getting list of snapshots on $fs using $getsnapcmd..."); - open FH, $getsnapcmd; - my @rawsnaps = ; close FH or die "CRITICAL ERROR: snapshots couldn't be listed for $fs (exit code $?)"; - my %creationtimes=(); + my %snap_data; + my %creationtimes; - my $state = 0; - foreach my $line (@rawsnaps) { - if ($state < 0) { - $state++; - next; - } + for my $line (@rawsnaps) { + chomp $line; + my ($dataset, $property, $value) = split /\t/, $line; + next unless defined $value; - if ($state eq 0) { - if ($line !~ /\Q$fs\E\@.*\ttype\s*snapshot/) { - # skip non snapshot type object - $state = -2; - next; - } - } elsif ($state eq 1) { - if ($line !~ /\Q$fs\E\@.*\tguid/) { - die "CRITICAL ERROR: snapshots couldn't be listed for $fs (guid parser error)"; - } + my (undef, $snap) = split /@/, $dataset; + next unless length $snap; - chomp $line; - my $guid = $line; - $guid =~ s/^.*\tguid\t*(\d*).*/$1/; - my $snap = $line; - $snap =~ s/^.*\@(.*)\tguid.*$/$1/; - if (!snapisincluded($snap)) { next; } - $snaps{$type}{$snap}{'guid'}=$guid; - } elsif ($state eq 2) { - if ($line !~ /\Q$fs\E\@.*\tcreation/) { - die "CRITICAL ERROR: snapshots couldn't be listed for $fs (creation parser error)"; - } + if (!snapisincluded($snap)) { next; } + $snap_data{$snap}{$property} = $value; - chomp $line; - my $creation = $line; - $creation =~ s/^.*\tcreation\t*(\d*).*/$1/; - my $snap = $line; - $snap =~ s/^.*\@(.*)\tcreation.*$/$1/; - if (!snapisincluded($snap)) { next; } - - # the accuracy of the creation timestamp is only for a second, but - # snapshots in the same second are highly likely. The list command - # has an ordered output so we append another three digit running number - # to the creation timestamp and make sure those are ordered correctly - # for snapshot with the same creation timestamp + # the accuracy of the creation timestamp is only for a second, but + # snapshots in the same second are highly likely. The list command + # has an ordered output so we append another three digit running number + # to the creation timestamp and make sure those are ordered correctly + # for snapshot with the same creation timestamp + if ($property eq 'creation') { my $counter = 0; my $creationsuffix; while ($counter < 999) { - $creationsuffix = sprintf("%s%03d", $creation, $counter); + $creationsuffix = sprintf("%s%03d", $value, $counter); if (!defined $creationtimes{$creationsuffix}) { $creationtimes{$creationsuffix} = 1; last; } $counter += 1; } - - $snaps{$type}{$snap}{'creation'}=$creationsuffix; - $state = -1; + $snap_data{$snap}{'creation'} = $creationsuffix; } + } - $state++; + for my $snap (keys %snap_data) { + if (length $type_filter || $snap_data{$snap}{'type'} eq 'snapshot') { + $snaps{$type}{$snap}{'guid'} = $snap_data{$snap}{'guid'}; + $snaps{$type}{$snap}{'createtxg'} = $snap_data{$snap}{'createtxg'}; + $snaps{$type}{$snap}{'creation'} = $snap_data{$snap}{'creation'}; + } } return %snaps; @@ -2036,8 +1995,12 @@ sub getbookmarks() { $fsescaped = escapeshellparam($fsescaped); } + my $host_features = check_zfs_get_features($rhost, $mysudocmd, $zfscmd); + my @properties = @{$host_features->{supported_properties}}; + my $properties_string = join(',', @properties); + my $error = 0; - my $getbookmarkcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 -t bookmark guid,creation $fsescaped 2>&1 |"; + my $getbookmarkcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 -t bookmark $properties_string $fsescaped 2>&1 |"; writelog('DEBUG', "getting list of bookmarks on $fs using $getbookmarkcmd..."); open FH, $getbookmarkcmd; my @rawbookmarks = ; @@ -2052,48 +2015,46 @@ sub getbookmarks() { die "CRITICAL ERROR: bookmarks couldn't be listed for $fs (exit code $?)"; } - # this is a little obnoxious. get guid,creation returns guid,creation on two separate lines - # as though each were an entirely separate get command. + my %bookmark_data; + my %creationtimes; - my $lastguid; - my %creationtimes=(); + for my $line (@rawbookmarks) { + chomp $line; + my ($dataset, $property, $value) = split /\t/, $line; + next unless defined $value; - foreach my $line (@rawbookmarks) { - # only import bookmark guids, creation from the specified filesystem - if ($line =~ /\Q$fs\E\#.*\tguid/) { - chomp $line; - $lastguid = $line; - $lastguid =~ s/^.*\tguid\t*(\d*).*/$1/; - my $bookmark = $line; - $bookmark =~ s/^.*\#(.*)\tguid.*$/$1/; - $bookmarks{$lastguid}{'name'}=$bookmark; - } elsif ($line =~ /\Q$fs\E\#.*\tcreation/) { - chomp $line; - my $creation = $line; - $creation =~ s/^.*\tcreation\t*(\d*).*/$1/; - my $bookmark = $line; - $bookmark =~ s/^.*\#(.*)\tcreation.*$/$1/; + my (undef, $bookmark) = split /#/, $dataset; + next unless length $bookmark; - # the accuracy of the creation timestamp is only for a second, but - # bookmarks in the same second are possible. The list command - # has an ordered output so we append another three digit running number - # to the creation timestamp and make sure those are ordered correctly - # for bookmarks with the same creation timestamp + $bookmark_data{$bookmark}{$property} = $value; + + # the accuracy of the creation timestamp is only for a second, but + # bookmarks in the same second are possible. The list command + # has an ordered output so we append another three digit running number + # to the creation timestamp and make sure those are ordered correctly + # for bookmarks with the same creation timestamp + if ($property eq 'creation') { my $counter = 0; my $creationsuffix; while ($counter < 999) { - $creationsuffix = sprintf("%s%03d", $creation, $counter); + $creationsuffix = sprintf("%s%03d", $value, $counter); if (!defined $creationtimes{$creationsuffix}) { $creationtimes{$creationsuffix} = 1; last; } $counter += 1; } - - $bookmarks{$lastguid}{'creation'}=$creationsuffix; + $bookmark_data{$bookmark}{'creation'} = $creationsuffix; } } + for my $bookmark (keys %bookmark_data) { + my $guid = $bookmark_data{$bookmark}{'guid'}; + $bookmarks{$guid}{'name'} = $bookmark; + $bookmarks{$guid}{'creation'} = $bookmark_data{$bookmark}{'creation'}; + $bookmarks{$guid}{'createtxg'} = $bookmark_data{$bookmark}{'createtxg'}; + } + return %bookmarks; } From be52f8ab1e9410797d957c09079687c7b0e1ecd2 Mon Sep 17 00:00:00 2001 From: Nick Liu Date: Wed, 11 Jun 2025 12:25:14 -0500 Subject: [PATCH 2/5] feat(syncoid): Sort snapshots reliably using the `createtxg` property System clock adjustments from manual changes or NTP synchronization can cause ZFS snapshot creation timestamps to be non-monotonic. This can cause `syncoid` to select the wrong "oldest" snapshot for initial replication or the wrong "newest" snapshot with `--no-sync-snap`, potentially losing data from the source to the target. This change adds the `sortsnapshots()` helper that prefers to compare the `createtxg` (creation transaction group) property over the `creation` property when available. The `createtxg` property is guaranteed to be strictly sequential within a ZFS pool, unlike the `creation` property, which depends on the system clock's accuracy. Unlike the first iteration of `sortsnapshots()` in #818, the subroutine takes a specific snapshot sub-hash ('source' or 'target') as input rather than both, ensuring createtxg comparisons occur within the same zpool context. The first iteration that took the entire snapshots hash had no mechanism to sort target snapshots, which could have caused issues in usages that expected target snapshots to be sorted. Most snapshot sorting call sites now use the new `sortsnapshots()` subroutine. Two more usages involving bookmarks are updated in a different commit for independent testing of a bookmarks-related refactoring. Fixes: #815 --- syncoid | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/syncoid b/syncoid index 7a6d6d1..2e6762c 100755 --- a/syncoid +++ b/syncoid @@ -441,7 +441,7 @@ sub syncdataset { # Don't send the sync snap if it's filtered out by --exclude-snaps or # --include-snaps if (!snapisincluded($newsyncsnap)) { - $newsyncsnap = getnewestsnapshot($sourcehost,$sourcefs,$sourceisroot); + $newsyncsnap = getnewestsnapshot(\%snaps); if ($newsyncsnap eq 0) { writelog('WARN', "CRITICAL: no snapshots exist on source $sourcefs, and you asked for --no-sync-snap."); if ($exitcode < 1) { $exitcode = 1; } @@ -450,7 +450,7 @@ sub syncdataset { } } else { # we don't want sync snapshots created, so use the newest snapshot we can find. - $newsyncsnap = getnewestsnapshot($sourcehost,$sourcefs,$sourceisroot); + $newsyncsnap = getnewestsnapshot(\%snaps); if ($newsyncsnap eq 0) { writelog('WARN', "CRITICAL: no snapshots exist on source $sourcefs, and you asked for --no-sync-snap."); if ($exitcode < 1) { $exitcode = 1; } @@ -868,7 +868,7 @@ sub syncdataset { %snaps = (%sourcesnaps, %targetsnaps); } - my @to_delete = sort { $snaps{'target'}{$a}{'creation'}<=>$snaps{'target'}{$b}{'creation'} } grep {!exists $snaps{'source'}{$_}} keys %{ $snaps{'target'} }; + my @to_delete = sort { sortsnapshots($snaps{'target'}, $a, $b) } grep {!exists $snaps{'source'}{$_}} keys %{ $snaps{'target'} }; while (@to_delete) { # Create batch of snapshots to remove my $snaps = join ',', splice(@to_delete, 0, 50); @@ -1574,9 +1574,22 @@ sub readablebytes { return $disp; } +sub sortsnapshots { + my ($snapdata, $left, $right) = @_; + if (defined $snapdata->{$left}{'createtxg'} && defined $snapdata->{$right}{'createtxg'}) { + return $snapdata->{$left}{'createtxg'} <=> $snapdata->{$right}{'createtxg'}; + } + + if (defined $snapdata->{$left}{'creation'} && defined $snapdata->{$right}{'creation'}) { + return $snapdata->{$left}{'creation'} <=> $snapdata->{$right}{'creation'}; + } + + return 0; +} + sub getoldestsnapshot { my $snaps = shift; - foreach my $snap ( sort { $snaps{'source'}{$a}{'creation'}<=>$snaps{'source'}{$b}{'creation'} } keys %{ $snaps{'source'} }) { + foreach my $snap (sort { sortsnapshots($snaps{'source'}, $a, $b) } keys %{ $snaps{'source'} }) { # return on first snap found - it's the oldest return $snap; } @@ -1590,7 +1603,7 @@ sub getoldestsnapshot { sub getnewestsnapshot { my $snaps = shift; - foreach my $snap ( sort { $snaps{'source'}{$b}{'creation'}<=>$snaps{'source'}{$a}{'creation'} } keys %{ $snaps{'source'} }) { + foreach my $snap (sort { sortsnapshots($snaps{'source'}, $b, $a) } keys %{ $snaps{'source'} }) { # return on first snap found - it's the newest writelog('DEBUG', "NEWEST SNAPSHOT: $snap"); return $snap; @@ -1769,7 +1782,7 @@ sub pruneoldsyncsnaps { sub getmatchingsnapshot { my ($sourcefs, $targetfs, $snaps) = @_; - foreach my $snap ( sort { $snaps{'source'}{$b}{'creation'}<=>$snaps{'source'}{$a}{'creation'} } keys %{ $snaps{'source'} }) { + foreach my $snap ( sort { sortsnapshots($snaps{'source'}, $b, $a) } keys %{ $snaps{'source'} }) { if (defined $snaps{'target'}{$snap}) { if ($snaps{'source'}{$snap}{'guid'} == $snaps{'target'}{$snap}{'guid'}) { return $snap; @@ -1974,9 +1987,11 @@ sub getsnaps { for my $snap (keys %snap_data) { if (length $type_filter || $snap_data{$snap}{'type'} eq 'snapshot') { - $snaps{$type}{$snap}{'guid'} = $snap_data{$snap}{'guid'}; - $snaps{$type}{$snap}{'createtxg'} = $snap_data{$snap}{'createtxg'}; - $snaps{$type}{$snap}{'creation'} = $snap_data{$snap}{'creation'}; + foreach my $prop (@{$host_features->{supported_properties}}) { + if (exists $snap_data{$snap}{$prop}) { + $snaps{$type}{$snap}{$prop} = $snap_data{$snap}{$prop}; + } + } } } From 258a664dc0b2c2351ee300b18e0c11928b796ef2 Mon Sep 17 00:00:00 2001 From: Nick Liu Date: Wed, 11 Jun 2025 12:40:57 -0500 Subject: [PATCH 3/5] fix(syncoid): Harden bookmark replication against timestamp non-monotony * Replaced direct sorting on the `creation` property with calls to the `sortsnapshots()` helper subroutine. As with other usages, this ensures that when `syncoid` searches for the next snapshot to replicate from a bookmark, it preferentially uses the monotonic `createtxg` for sorting. * Refactored the variables holding bookmark details from separate scalars (`$bookmark`, `$bookmarkcreation`) into a single hash (`%bookmark`). This allows for cleaner handling of all relevant bookmark properties (`name`, `creation`, `createtxg`). * Fixed a code comment that incorrectly described the snapshot search order. The search for a matching target snapshot now correctly states it proceeds from newest-to-oldest to find the most recent common ancestor. --- syncoid | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/syncoid b/syncoid index 2e6762c..76aae39 100755 --- a/syncoid +++ b/syncoid @@ -578,28 +578,26 @@ sub syncdataset { my $targetsize = getzfsvalue($targethost,$targetfs,$targetisroot,'-p used'); - my $bookmark = 0; - my $bookmarkcreation = 0; + my %bookmark = (); $matchingsnap = getmatchingsnapshot($sourcefs, $targetfs, \%snaps); if (! $matchingsnap) { # no matching snapshots, check for bookmarks as fallback my %bookmarks = getbookmarks($sourcehost,$sourcefs,$sourceisroot); - # check for matching guid of source bookmark and target snapshot (oldest first) - foreach my $snap ( sort { $snaps{'target'}{$b}{'creation'}<=>$snaps{'target'}{$a}{'creation'} } keys %{ $snaps{'target'} }) { + # check for matching guid of source bookmark and target snapshot (newest first) + foreach my $snap ( sort { sortsnapshots($snaps{'target'}, $b, $a) } keys %{ $snaps{'target'} }) { my $guid = $snaps{'target'}{$snap}{'guid'}; if (defined $bookmarks{$guid}) { # found a match - $bookmark = $bookmarks{$guid}{'name'}; - $bookmarkcreation = $bookmarks{$guid}{'creation'}; + %bookmark = %{ $bookmarks{$guid} }; $matchingsnap = $snap; last; } } - if (! $bookmark) { + if (! %bookmark) { # force delete is not possible for the root dataset if ($args{'force-delete'} && index($targetfs, '/') != -1) { writelog('INFO', "Removing $targetfs because no matching snapshots were found"); @@ -672,15 +670,18 @@ sub syncdataset { my $nextsnapshot = 0; - if ($bookmark) { - my $bookmarkescaped = escapeshellparam($bookmark); + if (%bookmark) { if (!defined $args{'no-stream'}) { # if intermediate snapshots are needed we need to find the next oldest snapshot, # do an replication to it and replicate as always from oldest to newest # because bookmark sends doesn't support intermediates directly - foreach my $snap ( sort { $snaps{'source'}{$a}{'creation'}<=>$snaps{'source'}{$b}{'creation'} } keys %{ $snaps{'source'} }) { - if ($snaps{'source'}{$snap}{'creation'} >= $bookmarkcreation) { + foreach my $snap ( sort { sortsnapshots($snaps{'source'}, $a, $b) } keys %{ $snaps{'source'} }) { + my $comparisonkey = 'creation'; + if (defined $snaps{'source'}{$snap}{'createtxg'} && defined $bookmark{'createtxg'}) { + $comparisonkey = 'createtxg'; + } + if ($snaps{'source'}{$snap}{$comparisonkey} >= $bookmark{$comparisonkey}) { $nextsnapshot = $snap; last; } @@ -688,13 +689,13 @@ sub syncdataset { } if ($nextsnapshot) { - ($exit, $stdout) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark, $nextsnapshot); + ($exit, $stdout) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark{'name'}, $nextsnapshot); $exit == 0 or do { if (!$resume && $stdout =~ /\Qcontains partially-complete state\E/) { writelog('WARN', "resetting partially receive state"); resetreceivestate($targethost,$targetfs,$targetisroot); - (my $ret) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark, $nextsnapshot); + (my $ret) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark{'name'}, $nextsnapshot); $ret == 0 or do { if ($exitcode < 2) { $exitcode = 2; } return 0; @@ -708,13 +709,13 @@ sub syncdataset { $matchingsnap = $nextsnapshot; $matchingsnapescaped = escapeshellparam($matchingsnap); } else { - ($exit, $stdout) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark, $newsyncsnap); + ($exit, $stdout) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark{'name'}, $newsyncsnap); $exit == 0 or do { if (!$resume && $stdout =~ /\Qcontains partially-complete state\E/) { writelog('WARN', "resetting partially receive state"); resetreceivestate($targethost,$targetfs,$targetisroot); - (my $ret) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark, $newsyncsnap); + (my $ret) = syncbookmark($sourcehost, $sourcefs, $targethost, $targetfs, $bookmark{'name'}, $newsyncsnap); $ret == 0 or do { if ($exitcode < 2) { $exitcode = 2; } return 0; @@ -729,7 +730,7 @@ sub syncdataset { # do a normal replication if bookmarks aren't used or if previous # bookmark replication was only done to the next oldest snapshot - if (!$bookmark || $nextsnapshot) { + if (!%bookmark || $nextsnapshot) { if ($matchingsnap eq $newsyncsnap) { # edge case: bookmark replication used the latest snapshot return 0; From f2e1e4f8a0af39581de436e511d089160d6bff82 Mon Sep 17 00:00:00 2001 From: Nick Liu Date: Wed, 11 Jun 2025 13:01:00 -0500 Subject: [PATCH 4/5] test(syncoid): Add test to verify out-of-order snapshot sync This commit adds a regression test for the out-of-order snapshot replication issue. The new test case manipulates the system clock with `setdate` to create snapshots with non-monotonic `creation` timestamps but a correct, sequential `createtxg`. It then runs `syncoid` and verifies that all snapshots were replicated, which is only possible if they are ordered correctly by `createtxg`. See #815 for the original test. --- .../011_sync_out-of-order_snapshots/run.sh | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 tests/syncoid/011_sync_out-of-order_snapshots/run.sh diff --git a/tests/syncoid/011_sync_out-of-order_snapshots/run.sh b/tests/syncoid/011_sync_out-of-order_snapshots/run.sh new file mode 100755 index 0000000..2011d03 --- /dev/null +++ b/tests/syncoid/011_sync_out-of-order_snapshots/run.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# test verifying snapshots with out-of-order snapshot creation datetimes + +set -x +set -e + +. ../../common/lib.sh + +POOL_IMAGE="/tmp/jimsalterjrs_sanoid_815.img" +POOL_SIZE="64M" +POOL_NAME="jimsalterjrs_sanoid_815" + +truncate -s "${POOL_SIZE}" "${POOL_IMAGE}" + +zpool create -m none -f "${POOL_NAME}" "${POOL_IMAGE}" + +function cleanUp { + zpool export "${POOL_NAME}" + rm -f "${POOL_IMAGE}" +} + +# export pool and remove the image in any case +trap cleanUp EXIT + +zfs create "${POOL_NAME}"/before +zfs snapshot "${POOL_NAME}"/before@this-snapshot-should-make-it-into-the-after-dataset + +disableTimeSync +setdate 1155533696 +zfs snapshot "${POOL_NAME}"/before@oldest-snapshot + +zfs snapshot "${POOL_NAME}"/before@another-snapshot-does-not-matter +../../../syncoid --sendoptions="Lec" "${POOL_NAME}"/before "${POOL_NAME}"/after + +# verify +saveSnapshotList "${POOL_NAME}" "snapshot-list.txt" + +grep "${POOL_NAME}/before@this-snapshot-should-make-it-into-the-after-dataset" "snapshot-list.txt" || exit $? +grep "${POOL_NAME}/after@this-snapshot-should-make-it-into-the-after-dataset" "snapshot-list.txt" || exit $? +grep "${POOL_NAME}/before@oldest-snapshot" "snapshot-list.txt" || exit $? +grep "${POOL_NAME}/after@oldest-snapshot" "snapshot-list.txt" || exit $? +grep "${POOL_NAME}/before@another-snapshot-does-not-matter" "snapshot-list.txt" || exit $? +grep "${POOL_NAME}/after@another-snapshot-does-not-matter" "snapshot-list.txt" || exit $? + +exit 0 \ No newline at end of file From 1952e96846bd79a939b69cf044b196e933d00d04 Mon Sep 17 00:00:00 2001 From: Nick Liu Date: Wed, 11 Jun 2025 13:19:07 -0500 Subject: [PATCH 5/5] fix(tests/common/lib.sh): Support set -e in test scripts The `systemctl is-active virtualbox-guest-utils.service` command returns a non-zero exit status when the service is inactive, causing early exit in tests that use `set -e`. This change suppresses that error with `|| true` to prevent test failure when the service check returns non-zero. Fixes this test: ``` root@demo:~/sanoid/tests/syncoid# cat /tmp/syncoid_test_run_011_sync_out-of-order_snapshots.log + set -e + . ../../common/lib.sh +++ uname ++ unamestr=Linux + POOL_IMAGE=/tmp/jimsalterjrs_sanoid_815.img + POOL_SIZE=64M + POOL_NAME=jimsalterjrs_sanoid_815 + truncate -s 64M /tmp/jimsalterjrs_sanoid_815.img + zpool create -m none -f jimsalterjrs_sanoid_815 /tmp/jimsalterjrs_sanoid_815.img + trap cleanUp EXIT + zfs create jimsalterjrs_sanoid_815/before + zfs snapshot jimsalterjrs_sanoid_815/before@this-snapshot-should-make-it-into-the-after-dataset + disableTimeSync + which timedatectl + '[' 0 -eq 0 ']' + timedatectl set-ntp 0 + which systemctl + '[' 0 -eq 0 ']' + systemctl is-active virtualbox-guest-utils.service inactive + cleanUp + zpool export jimsalterjrs_sanoid_815 + rm -f /tmp/jimsalterjrs_sanoid_815.img ``` --- tests/common/lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/lib.sh b/tests/common/lib.sh index 84b2c63..a204652 100644 --- a/tests/common/lib.sh +++ b/tests/common/lib.sh @@ -57,7 +57,7 @@ function disableTimeSync { which systemctl > /dev/null if [ $? -eq 0 ]; then - systemctl is-active virtualbox-guest-utils.service && systemctl stop virtualbox-guest-utils.service + systemctl is-active virtualbox-guest-utils.service && systemctl stop virtualbox-guest-utils.service || true fi }