Merge 114724b0a4 into 940a84e21f

Merge pull request #1008 from aabccd021/master
Fix readme formatting
2025-06-16 22:39:22 -05:00 · 2025-06-12 09:53:57 -04:00 · 2025-06-12 12:58:26 +07:00 · 2024-06-09 14:13:15 +02:00
2 changed files with 72 additions and 38 deletions
--- a/README.md
+++ b/README.md
@ -330,6 +330,7 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup
 	This argument tells syncoid to create a zfs bookmark for the newest snapshot after it got replicated successfully. The bookmark name will be equal to the snapshot name. Only works in combination with the --no-sync-snap option. This can be very useful for irregular replication where the last matching snapshot on the source was already deleted but the bookmark remains so a replication is still possible.

 + --use-hold
+
 	This argument tells syncoid to add a hold to the newest snapshot on the source and target after replication succeeds and to remove the hold after the next successful replication. Setting a hold prevents the snapshots from being destroyed. The hold name includes the identifier if set. This allows for separate holds in case of replication to multiple targets.

 + --preserve-recordsize
--- a/109
+++ b/109
@ -215,53 +215,86 @@ if (!defined $args{'recursive'}) {
 		$exitcode = 2;
 	}

-	my @deferred;
-
-	foreach my $datasetProperties(@datasets) {
+	my %datasetsByName;
+	foreach my $datasetProperties (@datasets) {
 		my $dataset = $datasetProperties->{'name'};
 		my $origin = $datasetProperties->{'origin'};
-		if ($origin eq "-" || defined $args{'no-clone-handling'}) {
-			$origin = undef;
-		} else {
-			# check if clone source is replicated too
-			my @values = split(/@/, $origin, 2);
-			my $srcdataset = $values[0];
+		$datasetsByName{$dataset} = $datasetProperties;

-			my $found = 0;
-			foreach my $datasetProperties(@datasets) {
-				if ($datasetProperties->{'name'} eq $srcdataset) {
-					$found = 1;
-					last;
+		# Clean the 'origin' property
+		# (we set 'origin' to undef whenever we don't want to handle it during sync)
+		if ($origin eq "-" || defined $args{'no-clone-handling'}) {
+			$datasetProperties->{'origin'} = undef;
+		}
+	}
+
+	my %synced;
+
+	foreach my $dataset1Properties (@datasets) {
+		my $dataset1 = $dataset1Properties->{'name'};
+
+		# Collect all transitive dependencies of this dataset.
+		# A dataset can have two dependencies:
+		# - the parent dataset
+		# - the origin (if it is a clone)
+		my @todo = ($dataset1);  # the datasets whose dependencies we still have to collect
+		my @tosync;  # the datasets we have to sync (in the correct order)
+		my %tosyncSet;  # set of synced datasets to check for dependency cycles
+		while (@todo) {
+			my $dataset = shift(@todo);
+
+			if (exists $synced{$dataset}) {
+				# We already synced this dataset, thus also all its dependencies => skip
+				next;
+			}
+
+			if (exists $tosyncSet{$dataset}) {
+				# We already processed this dataset once during this loop,
+				# so we do not need to do it again.
+				# This check is also necessary to break dependency cycles.
+				#
+				# NOTE:
+				# If there is a cycle, multiple syncoid runs might be necessary to replicate all datasets,
+				# and not all clone relationships will be preserved
+				# (it seems like huge effort to handle this case properly, and it should be quite rare in practice)
+				next;
+			}
+
+			unshift @tosync, $dataset;
+			$tosyncSet{$dataset} = 1;
+
+			my ($parent) = $dataset =~ /(.*)\/[^\/]+/;
+			if (defined $parent) {
+				# If parent is replicated too, sync it first
+				if (exists $datasetsByName{$parent}) {
+					push @todo, $parent;
 				}
 			}

-			if ($found == 0) {
-				# clone source is not replicated, do a full replication
-				$origin = undef;
-			} else {
-				# clone source is replicated, defer until all non clones are replicated
-				push @deferred, $datasetProperties;
-				next;
+			my $origin = $datasetsByName{$dataset}->{'origin'};
+			if (defined $origin) {
+				# If clone source is replicated too, sync it first
+				my @values = split(/@/, $origin, 2);
+				my $srcdataset = $values[0];
+
+				if (exists $datasetsByName{$srcdataset}) {
+					push @todo, $srcdataset;
+				} else {
+					$datasetsByName{$dataset}->{'origin'} = undef;
+				}
 			}
 		}

-		$dataset =~ s/\Q$sourcefs\E//;
-		chomp $dataset;
-		my $childsourcefs = $sourcefs . $dataset;
-		my $childtargetfs = $targetfs . $dataset;
-		syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin);
-	}
-
-	# replicate cloned datasets and if this is the initial run, recreate them on the target
-	foreach my $datasetProperties(@deferred) {
-		my $dataset = $datasetProperties->{'name'};
-		my $origin = $datasetProperties->{'origin'};
-
-		$dataset =~ s/\Q$sourcefs\E//;
-		chomp $dataset;
-		my $childsourcefs = $sourcefs . $dataset;
-		my $childtargetfs = $targetfs . $dataset;
-		syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin);
+		foreach my $dataset (@tosync) {
+			my $origin = $datasetsByName{$dataset}->{'origin'};
+			my $datasetPath = $dataset;
+			$datasetPath =~ s/\Q$sourcefs\E//;
+			chomp $datasetPath;
+			my $childsourcefs = $sourcefs . $datasetPath;
+			my $childtargetfs = $targetfs . $datasetPath;
+			syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin);
+			$synced{$dataset} = 1;
+		}
 	}
 }
Author	SHA1	Message	Date
Jakob Rath	9707a2b779	Merge `114724b0a4` into `940a84e21f`	2025-06-16 22:39:22 -05:00
Jim Salter	940a84e21f	Merge pull request #1008 from aabccd021/master Fix readme formatting	2025-06-12 09:53:57 -04:00
aabccd021	680194fa33	Fix readme formatting	2025-06-12 12:58:26 +07:00
Jakob Rath	114724b0a4	Replicate all dependencies of a dataset first Assuming we want to replicate the following pool: ``` NAME USED AVAIL REFER MOUNTPOINT ORIGIN testpool1 1.10M 38.2M 288K /Volumes/testpool1 - testpool1/A 326K 38.2M 293K /Volumes/testpool1/A testpool1/B@b testpool1/A/D 303K 38.2M 288K /Volumes/testpool1/A/D - testpool1/B 35.5K 38.2M 292K /Volumes/testpool1/B testpool1/C@a testpool1/C 306K 38.2M 290K /Volumes/testpool1/C - ``` Note the clone dependencies: `A -> B -> C`. Currently, syncoid notices that `A` and `B` are clones and defers syncing them. There are two problems: 1. Syncing `A/D` fails because we have deferred `A`. 2. The clone relation `A -> B` will not be recreated since the list of deferred datasets does not take into account clone relations between them. This PR solves both of these problems by collecting all dependencies of a dataset and syncing them before the dataset itself. --- One problematic case remains: if a dataset depends (transitively) on one of its own children, e.g.: ``` NAME USED AVAIL REFER MOUNTPOINT ORIGIN testpool1/E 58.5K 38.7M 298K /Volumes/testpool1/E testpool1/E/D@e testpool1/E/D 37.5K 38.7M 296K /Volumes/testpool1/E/D testpool1/A@d ``` Here, the first run of syncoid will fail to sync `E/D`. I've chosen to ignore this case for now because 1) it seems quite artificial and not like something that would occur in practice very often, and 2) a second run of syncoid will successfully sync `E/D` too (although the clone relation `E -> E/D` is lost).	2024-06-09 14:13:15 +02:00