diff --git a/CHANGELIST b/CHANGELIST index 9af90f6..515d05d 100644 --- a/CHANGELIST +++ b/CHANGELIST @@ -1,3 +1,6 @@ +1.4.18 implemented special character handling and support of ZFS resume/receive tokens by default in syncoid, + thank you @phreaker0! + 1.4.17 changed die to warn when unexpectedly unable to remove a snapshot - this allows sanoid to continue taking/removing other snapshots not affected by whatever lock prevented the first from being taken or removed diff --git a/FREEBSD.readme b/FREEBSD.readme index 9960201..732940d 100644 --- a/FREEBSD.readme +++ b/FREEBSD.readme @@ -11,3 +11,14 @@ If you don't want to have to change the shebangs, your other option is to drop a root@bsd:~# ln -s /usr/local/bin/perl /usr/bin/perl After putting this symlink in place, ANY perl script shebanged for Linux will work on your system too. + +Syncoid assumes a bourne style shell on remote hosts. Using (t)csh (the default for root under FreeBSD) +has some known issues: + +* If mbuffer is present, syncoid will fail with an "Ambiguous output redirect." error. So if you: + root@bsd:~# ln -s /usr/local/bin/mbuffer /usr/bin/mbuffer + make sure the remote user is using an sh compatible shell. + +To change to a compatible shell, use the chsh command: + +root@bsd:~# chsh -s /bin/sh diff --git a/INSTALL b/INSTALL index 1492546..15c4896 100644 --- a/INSTALL +++ b/INSTALL @@ -8,8 +8,8 @@ default for SSH transport since v1.4.6. Syncoid runs will fail if one of them is not available on either end of the transport. On Ubuntu: apt install pv lzop mbuffer -On CentOS: yum install lzo pv mbuffer lzop -On FreeBSD: pkg install pv lzop +On CentOS: yum install lzo pv mbuffer lzop perl-Data-Dumper +On FreeBSD: pkg install pv mbuffer lzop FreeBSD notes: FreeBSD may place pv and lzop in somewhere other than /usr/bin ; syncoid currently does not check path. @@ -19,6 +19,8 @@ FreeBSD notes: FreeBSD may place pv and lzop in somewhere other than or similar, as appropriate, to create links in /usr/bin to wherever the utilities actually are on your system. + See note about mbuffer in FREEBSD.readme + SANOID ------ @@ -28,4 +30,4 @@ strongly recommends using your distribution's repositories instead. On Ubuntu: apt install libconfig-inifiles-perl On CentOS: yum install perl-Config-IniFiles -On FreeBSD: pkg install p5-Config-Inifiles +On FreeBSD: pkg install p5-Config-IniFiles diff --git a/README.md b/README.md index 9c4e6ba..d62d183 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,11 @@ More prosaically, you can use Sanoid to create, automatically thin, and monitor snapshots and pool health from a single eminently human-readable TOML config file at /etc/sanoid/sanoid.conf. (Sanoid also requires a "defaults" file located at /etc/sanoid/sanoid.defaults.conf, which is not user-editable.) A typical Sanoid system would have a single cron job: ``` -* * * * * /usr/local/bin/sanoid --cron +* * * * * TZ=UTC /usr/local/bin/sanoid --cron ``` +`Note`: Using UTC as timezone is recommend to prevent problems with daylight saving times + And its /etc/sanoid/sanoid.conf might look something like this: ``` @@ -26,6 +28,7 @@ And its /etc/sanoid/sanoid.conf might look something like this: ############################# [template_production] + frequently = 0 hourly = 36 daily = 30 monthly = 3 @@ -62,6 +65,10 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. ++ --monitor-capacity + + This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file. + + --force-update This clears out sanoid's zfs snapshot listing cache. This is normally not needed. @@ -82,6 +89,13 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da This prints out quite alot of additional information during a sanoid run, and is normally not needed. ++ --readonly + + Skip creation/deletion of snapshots (Simulate). + ++ --help + + Show help message. ---------- @@ -108,6 +122,35 @@ syncoid root@remotehost:data/images/vm backup/images/vm Which would pull-replicate the filesystem from the remote host to the local system over an SSH tunnel. Syncoid supports recursive replication (replication of a dataset and all its child datasets) and uses mbuffer buffering, lzop compression, and pv progress bars if the utilities are available on the systems used. +If ZFS supports resumeable send/receive streams on both the source and target those will be enabled as default. + +As of 1.4.18, syncoid also automatically supports and enables resume of interrupted replication when both source and target support this feature. + +##### Syncoid Dataset Properties + ++ syncoid:sync + + Available values: + + + `true` (default if unset) + + This dataset will be synchronised to all hosts. + + + `false` + + This dataset will not be synchronised to any hosts - it will be skipped. This can be useful for preventing certain datasets from being transferred when recursively handling a tree. + + + `host1,host2,...` + + A comma separated list of hosts. This dataset will only be synchronised by hosts listed in the property. + + _Note_: this check is performed by the host running `syncoid`, thus the local hostname must be present for inclusion during a push operation // the remote hostname must be present for a pull. + + _Note_: this will also prevent syncoid from handling the dataset if given explicitly on the command line. + + _Note_: syncing a child of a no-sync dataset will currently result in a critical error. + + _Note_: empty properties will be handled as if they were unset. ##### Syncoid Command Line Options @@ -119,13 +162,21 @@ Syncoid supports recursive replication (replication of a dataset and all its chi This is the destination dataset. It can be either local or remote. ++ --identifier= + + Adds the given identifier to the snapshot name after "syncoid_" prefix and before the hostname. This enables the use case of reliable replication to multiple targets from the same host. The following chars are allowed: a-z, A-Z, 0-9, _, -, : and . . + + -r --recursive This will also transfer child datasets. ++ --skip-parent + + This will skip the syncing of the parent dataset. Does nothing without '--recursive' option. + + --compress - Currently accepted options: gzip, pigz-fast, pigz-slow, lzo (default) & none. If the selected compression method is unavailable on the source and destination, no compression will be used. + Currently accepted options: gzip, pigz-fast, pigz-slow, zstd-fast, zstd-slow, lz4, lzo (default) & none. If the selected compression method is unavailable on the source and destination, no compression will be used. + --source-bwlimit @@ -147,6 +198,19 @@ Syncoid supports recursive replication (replication of a dataset and all its chi This argument tells syncoid to restrict itself to existing snapshots, instead of creating a semi-ephemeral syncoid snapshot at execution time. Especially useful in multi-target (A->B, A->C) replication schemes, where you might otherwise accumulate a large number of foreign syncoid snapshots. ++ --exclude=REGEX + + The given regular expression will be matched against all datasets which would be synced by this run and excludes them. This argument can be specified multiple times. + ++ --no-resume + + This argument tells syncoid to not use resumeable zfs send/receive streams. + ++ --no-clone-handling + + This argument tells syncoid to not recreate clones on the targe on initial sync and doing a normal replication instead. + + + --dumpsnaps This prints a list of snapshots during the run. @@ -155,6 +219,14 @@ Syncoid supports recursive replication (replication of a dataset and all its chi Allow sync to/from boxes running SSH on non-standard ports. ++ --sshcipher + + Instruct ssh to use a particular cipher set. + ++ --sshoption + + Passes option to ssh. This argument can be specified multiple times. + + --sshkey Use specified identity file as per ssh -i. @@ -167,6 +239,10 @@ Syncoid supports recursive replication (replication of a dataset and all its chi This prints out quite alot of additional information during a sanoid run, and is normally not needed. ++ --help + + Show help message. + + --version Print the version and exit. diff --git a/VERSION b/VERSION index 04e0d3f..f689e8c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.17 +1.4.18 diff --git a/debian/changelog b/packages/debian/changelog similarity index 53% rename from debian/changelog rename to packages/debian/changelog index ab530b0..2bcf423 100644 --- a/debian/changelog +++ b/packages/debian/changelog @@ -1,3 +1,18 @@ +sanoid (1.4.18) unstable; urgency=medium + + implemented special character handling and support of ZFS resume/receive tokens by default in syncoid, + thank you @phreaker0! + + -- Jim Salter Wed, 25 Apr 2018 16:24:00 -0400 + +sanoid (1.4.17) unstable; urgency=medium + + changed die to warn when unexpectedly unable to remove a snapshot - this + allows sanoid to continue taking/removing other snapshots not affected by + whatever lock prevented the first from being taken or removed + + -- Jim Salter Wed, 8 Nov 2017 15:25:00 -0400 + sanoid (1.4.16) unstable; urgency=medium * merged @hrast01's extended fix to support -o option1=val,option2=val passthrough to SSH. merged @JakobR's diff --git a/debian/compat b/packages/debian/compat similarity index 100% rename from debian/compat rename to packages/debian/compat diff --git a/debian/control b/packages/debian/control similarity index 100% rename from debian/control rename to packages/debian/control diff --git a/debian/copyright b/packages/debian/copyright similarity index 100% rename from debian/copyright rename to packages/debian/copyright diff --git a/debian/rules b/packages/debian/rules similarity index 67% rename from debian/rules rename to packages/debian/rules index 83eb475..ddd77b0 100755 --- a/debian/rules +++ b/packages/debian/rules @@ -16,4 +16,14 @@ override_dh_auto_install: @mkdir -p $(DESTDIR)/usr/share/doc/sanoid; \ cp sanoid.conf $(DESTDIR)/usr/share/doc/sanoid/sanoid.conf.example; @mkdir -p $(DESTDIR)/lib/systemd/system; \ - cp debian/sanoid.timer $(DESTDIR)/lib/systemd/system; + cp debian/sanoid-prune.service $(DESTDIR)/lib/systemd/system; + +override_dh_installinit: + dh_installinit --noscripts + +override_dh_systemd_enable: + dh_systemd_enable sanoid.timer + dh_systemd_enable sanoid-prune.service + +override_dh_systemd_start: + dh_systemd_start sanoid.timer diff --git a/packages/debian/sanoid-prune.service b/packages/debian/sanoid-prune.service new file mode 100644 index 0000000..c956bd5 --- /dev/null +++ b/packages/debian/sanoid-prune.service @@ -0,0 +1,13 @@ +[Unit] +Description=Cleanup ZFS Pool +Requires=zfs.target +After=zfs.target sanoid.service +ConditionFileNotEmpty=/etc/sanoid/sanoid.conf + +[Service] +Environment=TZ=UTC +Type=oneshot +ExecStart=/usr/sbin/sanoid --prune-snapshots + +[Install] +WantedBy=sanoid.service diff --git a/debian/sanoid.README.Debian b/packages/debian/sanoid.README.Debian similarity index 100% rename from debian/sanoid.README.Debian rename to packages/debian/sanoid.README.Debian diff --git a/debian/sanoid.docs b/packages/debian/sanoid.docs similarity index 100% rename from debian/sanoid.docs rename to packages/debian/sanoid.docs diff --git a/debian/sanoid.service b/packages/debian/sanoid.service similarity index 69% rename from debian/sanoid.service rename to packages/debian/sanoid.service index b54c586..e146354 100644 --- a/debian/sanoid.service +++ b/packages/debian/sanoid.service @@ -5,5 +5,6 @@ After=zfs.target ConditionFileNotEmpty=/etc/sanoid/sanoid.conf [Service] +Environment=TZ=UTC Type=oneshot -ExecStart=/usr/sbin/sanoid --cron +ExecStart=/usr/sbin/sanoid --take-snapshots diff --git a/debian/sanoid.timer b/packages/debian/sanoid.timer similarity index 100% rename from debian/sanoid.timer rename to packages/debian/sanoid.timer diff --git a/debian/source/format b/packages/debian/source/format similarity index 100% rename from debian/source/format rename to packages/debian/source/format diff --git a/packages/rhel/sanoid-1.4.18.tar.gz b/packages/rhel/sanoid-1.4.18.tar.gz new file mode 100644 index 0000000..df137cd Binary files /dev/null and b/packages/rhel/sanoid-1.4.18.tar.gz differ diff --git a/sanoid.spec b/packages/rhel/sanoid.spec similarity index 90% rename from sanoid.spec rename to packages/rhel/sanoid.spec index c0f33ed..7d4995d 100644 --- a/sanoid.spec +++ b/packages/rhel/sanoid.spec @@ -1,4 +1,4 @@ -%global version 1.4.14 +%global version 1.4.18 %global git_tag v%{version} # Enable with systemctl "enable sanoid.timer" @@ -6,15 +6,15 @@ Name: sanoid Version: %{version} -Release: 2%{?dist} +Release: 1%{?dist} BuildArch: noarch Summary: A policy-driven snapshot management tool for ZFS file systems Group: Applications/System License: GPLv3 URL: https://github.com/jimsalterjrs/sanoid -Source0: https://github.com/jimsalterjrs/%{name}/archive/%{git_tag}/%{name}-%{version}.tar.gz +Source0: https://github.com/jimsalterjrs/%{name}/archive/%{git_tag}/%{name}-%{version}.tar.gz -Requires: perl, mbuffer, lzop, pv +Requires: perl, mbuffer, lzop, pv, perl-Config-IniFiles %if 0%{?_with_systemd} Requires: systemd >= 212 @@ -58,6 +58,7 @@ Requires=zfs.target After=zfs.target [Service] +Environment=TZ=UTC Type=oneshot ExecStart=%{_sbindir}/sanoid --cron EOF @@ -110,6 +111,8 @@ echo "* * * * * root %{_sbindir}/sanoid --cron" > %{buildroot}%{_docdir}/%{name} %endif %changelog +* Sat Apr 28 2018 Dominic Robinson - 1.4.18-1 +- Bump to 1.4.18 * Thu Aug 31 2017 Dominic Robinson - 1.4.14-2 - Add systemd timers * Wed Aug 30 2017 Dominic Robinson - 1.4.14-1 @@ -121,6 +124,5 @@ echo "* * * * * root %{_sbindir}/sanoid --cron" > %{buildroot}%{_docdir}/%{name} - Version bump - Clean up variables and macros - Compatible with both Fedora and Red Hat - * Sat Feb 13 2016 Thomas M. Lapp - 1.4.4-1 - Initial RPM Package diff --git a/packages/rhel/sources b/packages/rhel/sources new file mode 100644 index 0000000..d6068d4 --- /dev/null +++ b/packages/rhel/sources @@ -0,0 +1 @@ +cf0ec23c310d2f9416ebabe48f5edb73 sanoid-1.4.18.tar.gz diff --git a/sanoid b/sanoid index 792497a..59d61cc 100755 --- a/sanoid +++ b/sanoid @@ -4,7 +4,8 @@ # from http://www.gnu.org/licenses/gpl-3.0.html on 2014-11-17. A copy should also be available in this # project's Git repository at https://github.com/jimsalterjrs/sanoid/blob/master/LICENSE. -$::VERSION = '1.4.17'; +$::VERSION = '1.4.18'; +my $MINIMUM_DEFAULTS_VERSION = 2; use strict; use warnings; @@ -18,7 +19,8 @@ use Time::Local; # to parse dates in reverse my %args = ("configdir" => "/etc/sanoid"); GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", "monitor-health", "force-update", "configdir=s", - "monitor-snapshots", "take-snapshots", "prune-snapshots" + "monitor-snapshots", "take-snapshots", "prune-snapshots", + "monitor-capacity" ) or pod2usage(2); # If only config directory (or nothing) has been specified, default to --cron --verbose @@ -30,6 +32,7 @@ if (keys %args < 2) { my $pscmd = '/bin/ps'; my $zfs = '/sbin/zfs'; +my $zpool = '/sbin/zpool'; my $conf_file = "$args{'configdir'}/sanoid.conf"; my $default_conf_file = "$args{'configdir'}/sanoid.defaults.conf"; @@ -39,8 +42,11 @@ my %config = init($conf_file,$default_conf_file); # if we call getsnaps(%config,1) it will forcibly update the cache, TTL or no TTL my $forcecacheupdate = 0; +my $cache = '/var/cache/sanoidsnapshots.txt'; my $cacheTTL = 900; # 15 minutes my %snaps = getsnaps( \%config, $cacheTTL, $forcecacheupdate ); +my %pruned; +my %capacitycache; my %snapsbytype = getsnapsbytype( \%config, \%snaps ); @@ -52,6 +58,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath ); if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } if ($args{'monitor-health'}) { monitor_health(@params); } +if ($args{'monitor-capacity'}) { monitor_capacity(@params); } if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } if ($args{'cron'}) { @@ -121,12 +128,14 @@ sub monitor_snapshots { my $path = $config{$section}{'path'}; push @paths, $path; - my @types = ('yearly','monthly','weekly','daily','hourly'); + my @types = ('yearly','monthly','weekly','daily','hourly','frequently'); foreach my $type (@types) { + if ($config{$section}{$type} == 0) { next; } my $smallerperiod = 0; # we need to set the period length in seconds first - if ($type eq 'hourly') { $smallerperiod = 60; } + if ($type eq 'frequently') { $smallerperiod = 1; } + elsif ($type eq 'hourly') { $smallerperiod = 60; } elsif ($type eq 'daily') { $smallerperiod = 60*60; } elsif ($type eq 'weekly') { $smallerperiod = 60*60*24; } elsif ($type eq 'monthly') { $smallerperiod = 60*60*24*7; } @@ -175,6 +184,61 @@ sub monitor_snapshots { exit $errorlevel; } + +#################################################################################### +#################################################################################### +#################################################################################### + +sub monitor_capacity { + my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; + my %pools; + my @messages; + my $errlevel=0; + + # build pool list with corresponding capacity limits + foreach my $section (keys %config) { + my @pool = split ('/',$section); + + if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) { + my %capacitylimits; + + if (!check_capacity_limit($config{$section}{'capacity_warn'})) { + die "ERROR: invalid zpool capacity warning limit!\n"; + } + + if ($config{$section}{'capacity_warn'} != 0) { + $capacitylimits{'warn'} = $config{$section}{'capacity_warn'}; + } + + if (!check_capacity_limit($config{$section}{'capacity_crit'})) { + die "ERROR: invalid zpool capacity critical limit!\n"; + } + + if ($config{$section}{'capacity_crit'} != 0) { + $capacitylimits{'crit'} = $config{$section}{'capacity_crit'}; + } + + if (%capacitylimits) { + $pools{$pool[0]} = \%capacitylimits; + } + } + } + + foreach my $pool (keys %pools) { + my $capacitylimitsref = $pools{$pool}; + + my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref); + if ($exitcode > $errlevel) { $errlevel = $exitcode; } + chomp $msg; + push (@messages, $msg); + } + + my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** '); + my $message = $warninglevels[$errlevel] . join (', ',@messages); + print "$message\n"; + exit $errlevel; +} + #################################################################################### #################################################################################### #################################################################################### @@ -196,12 +260,17 @@ sub prune_snapshots { my $path = $config{$section}{'path'}; my $period = 0; + if (check_prune_defer($config, $section)) { + if ($args{'verbose'}) { print "INFO: deferring snapshot pruning ($section)...\n"; } + next; + } foreach my $type (keys %{ $config{$section} }){ unless ($type =~ /ly$/) { next; } # we need to set the period length in seconds first - if ($type eq 'hourly') { $period = 60*60; } + if ($type eq 'frequently') { $period = 60 * $config{$section}{'frequent_period'}; } + elsif ($type eq 'hourly') { $period = 60*60; } elsif ($type eq 'daily') { $period = 60*60*24; } elsif ($type eq 'weekly') { $period = 60*60*24*7; } elsif ($type eq 'monthly') { $period = 60*60*24*31; } @@ -237,23 +306,41 @@ sub prune_snapshots { foreach my $snap( @prunesnaps ){ if ($args{'verbose'}) { print "INFO: pruning $snap ... \n"; } if (iszfsbusy($path)) { - print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; + if ($args{'verbose'}) { print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; } } else { - if (! $args{'readonly'}) { system($zfs, "destroy",$snap) == 0 or warn "could not remove $snap : $?"; } + if (! $args{'readonly'}) { + if (system($zfs, "destroy", $snap) == 0) { + $pruned{$snap} = 1; + my $dataset = (split '@', $snap)[0]; + my $snapname = (split '@', $snap)[1]; + if ($config{$dataset}{'pruning_script'}) { + $ENV{'SANOID_TARGET'} = $dataset; + $ENV{'SANOID_SNAPNAME'} = $snapname; + if ($args{'verbose'}) { print "executing pruning_script '".$config{$dataset}{'pruning_script'}."' on dataset '$dataset'\n"; } + my $ret = runscript('pruning_script',$dataset); + + delete $ENV{'SANOID_TARGET'}; + delete $ENV{'SANOID_SNAPNAME'}; + } + } else { + warn "could not remove $snap : $?"; + } + } } } removelock('sanoid_pruning'); - $forcecacheupdate = 1; - %snaps = getsnaps(%config,$cacheTTL,$forcecacheupdate); + removecachedsnapshots(0); } else { - print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; + if ($args{'verbose'}) { print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; } } } } } } - + # if there were any deferred cache updates, + # do them now and wait if necessary + removecachedsnapshots(1); } # end prune_snapshots @@ -270,6 +357,19 @@ sub take_snapshots { my @newsnaps; + # get utc timestamp of the current day for DST check + my $daystartUtc = timelocal(0, 0, 0, $datestamp{'mday'}, ($datestamp{'mon'}-1), $datestamp{'year'}); + my ($isdst) = (localtime($daystartUtc))[8]; + my $dstOffset = 0; + + if ($isdst ne $datestamp{'isdst'}) { + # current dst is different then at the beginning og the day + if ($isdst) { + # DST ended in the current day + $dstOffset = 60*60; + } + } + if ($args{'verbose'}) { print "INFO: taking snapshots...\n"; } foreach my $section (keys %config) { if ($section =~ /^template/) { next; } @@ -293,7 +393,21 @@ sub take_snapshots { my @preferredtime; my $lastpreferred; - if ($type eq 'hourly') { + # to avoid duplicates with DST + my $dateSuffix = ""; + + if ($type eq 'frequently') { + my $frequentslice = int($datestamp{'min'} / $config{$section}{'frequent_period'}); + + push @preferredtime,0; # try to hit 0 seconds + push @preferredtime,$frequentslice * $config{$section}{'frequent_period'}; + push @preferredtime,$datestamp{'hour'}; + push @preferredtime,$datestamp{'mday'}; + push @preferredtime,($datestamp{'mon'}-1); # january is month 0 + push @preferredtime,$datestamp{'year'}; + $lastpreferred = timelocal(@preferredtime); + if ($lastpreferred > time()) { $lastpreferred -= 60 * $config{$section}{'frequent_period'}; } # preferred time is later this frequent period - so look at last frequent period + } elsif ($type eq 'hourly') { push @preferredtime,0; # try to hit 0 seconds push @preferredtime,$config{$section}{'hourly_min'}; push @preferredtime,$datestamp{'hour'}; @@ -301,6 +415,13 @@ sub take_snapshots { push @preferredtime,($datestamp{'mon'}-1); # january is month 0 push @preferredtime,$datestamp{'year'}; $lastpreferred = timelocal(@preferredtime); + + if ($dstOffset ne 0) { + # timelocal doesn't take DST into account + $lastpreferred += $dstOffset; + # DST ended, avoid duplicates + $dateSuffix = "_y"; + } if ($lastpreferred > time()) { $lastpreferred -= 60*60; } # preferred time is later this hour - so look at last hour's } elsif ($type eq 'daily') { push @preferredtime,0; # try to hit 0 seconds @@ -310,7 +431,29 @@ sub take_snapshots { push @preferredtime,($datestamp{'mon'}-1); # january is month 0 push @preferredtime,$datestamp{'year'}; $lastpreferred = timelocal(@preferredtime); - if ($lastpreferred > time()) { $lastpreferred -= 60*60*24; } # preferred time is later today - so look at yesterday's + + # timelocal doesn't take DST into account + $lastpreferred += $dstOffset; + + # check if the planned time has different DST flag than the current + my ($isdst) = (localtime($lastpreferred))[8]; + if ($isdst ne $datestamp{'isdst'}) { + if (!$isdst) { + # correct DST difference + $lastpreferred -= 60*60; + } + } + + if ($lastpreferred > time()) { + $lastpreferred -= 60*60*24; + + if ($dstOffset ne 0) { + # because we are going back one day + # the DST difference has to be accounted + # for in reverse now + $lastpreferred -= 2*$dstOffset; + } + } # preferred time is later today - so look at yesterday's } elsif ($type eq 'weekly') { # calculate offset in seconds for the desired weekday my $offset = 0; @@ -328,7 +471,7 @@ sub take_snapshots { push @preferredtime,$datestamp{'year'}; $lastpreferred = timelocal(@preferredtime); $lastpreferred += $offset; - if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*7; } # preferred time is later today - so look at yesterday's + if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*7; } # preferred time is later this week - so look at last week's } elsif ($type eq 'monthly') { push @preferredtime,0; # try to hit 0 seconds push @preferredtime,$config{$section}{'monthly_min'}; @@ -347,6 +490,9 @@ sub take_snapshots { push @preferredtime,$datestamp{'year'}; $lastpreferred = timelocal(@preferredtime); if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*31*365.25; } # preferred time is later this year - so look at last year + } else { + warn "WARN: unknown interval type $type in config!"; + next; } # reconstruct our human-formatted most recent preferred snapshot time into an epoch time, to compare with the epoch of our most recent snapshot @@ -356,7 +502,7 @@ sub take_snapshots { # update to most current possible datestamp %datestamp = get_date(); # print "we should have had a $type snapshot of $path $maxage seconds ago; most recent is $newestage seconds old.\n"; - push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_$type"); + push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}${dateSuffix}_$type"); } } } @@ -364,6 +510,24 @@ sub take_snapshots { if ( (scalar(@newsnaps)) > 0) { foreach my $snap ( @newsnaps ) { + my $dataset = (split '@', $snap)[0]; + my $snapname = (split '@', $snap)[1]; + my $presnapshotfailure = 0; + if ($config{$dataset}{'pre_snapshot_script'} and !$args{'readonly'}) { + $ENV{'SANOID_TARGET'} = $dataset; + $ENV{'SANOID_SNAPNAME'} = $snapname; + if ($args{'verbose'}) { print "executing pre_snapshot_script '".$config{$dataset}{'pre_snapshot_script'}."' on dataset '$dataset'\n"; } + my $ret = runscript('pre_snapshot_script',$dataset); + + delete $ENV{'SANOID_TARGET'}; + delete $ENV{'SANOID_SNAPNAME'}; + + if ($ret != 0) { + # warning was already thrown by runscript function + $config{$dataset}{'no_inconsistent_snapshot'} and next; + $presnapshotfailure = 1; + } + } if ($args{'verbose'}) { print "taking snapshot $snap\n"; } if (!$args{'readonly'}) { system($zfs, "snapshot", "$snap") == 0 @@ -371,6 +535,17 @@ sub take_snapshots { # make sure we don't end up with multiple snapshots with the same ctime sleep 1; } + if ($config{$dataset}{'post_snapshot_script'} and !$args{'readonly'}) { + if (!$presnapshotfailure or $config{$dataset}{'force_post_snapshot_script'}) { + $ENV{'SANOID_TARGET'} = $dataset; + $ENV{'SANOID_SNAPNAME'} = $snapname; + if ($args{'verbose'}) { print "executing post_snapshot_script '".$config{$dataset}{'post_snapshot_script'}."' on dataset '$dataset'\n"; } + runscript('post_snapshot_script',$dataset); + + delete $ENV{'SANOID_TARGET'}; + delete $ENV{'SANOID_SNAPNAME'}; + } + } } $forcecacheupdate = 1; %snaps = getsnaps(%config,$cacheTTL,$forcecacheupdate); @@ -401,16 +576,20 @@ sub blabber { my $path = $config{$section}{'path'}; print "Filesystem $path has:\n"; print " $snapsbypath{$path}{'numsnaps'} total snapshots "; - print "(newest: "; - my $newest = sprintf("%.1f",$snapsbypath{$path}{'newest'} / 60 / 60); - print "$newest hours old)\n"; + if ($snapsbypath{$path}{'numsnaps'} == 0) { + print "(no current snapshots)" + } else { + print "(newest: "; + my $newest = sprintf("%.1f",$snapsbypath{$path}{'newest'} / 60 / 60); + print "$newest hours old)\n"; - foreach my $type (keys %{ $snapsbytype{$path} }){ - print " $snapsbytype{$path}{$type}{'numsnaps'} $type\n"; - print " desired: $config{$section}{$type}\n"; - print " newest: "; - my $newest = sprintf("%.1f",($snapsbytype{$path}{$type}{'newest'} / 60 / 60)); - print "$newest hours old, named $snapsbytype{$path}{$type}{'newestname'}\n"; + foreach my $type (keys %{ $snapsbytype{$path} }){ + print " $snapsbytype{$path}{$type}{'numsnaps'} $type\n"; + print " desired: $config{$section}{$type}\n"; + print " newest: "; + my $newest = sprintf("%.1f",($snapsbytype{$path}{$type}{'newest'} / 60 / 60)); + print "$newest hours old, named $snapsbytype{$path}{$type}{'newestname'}\n"; + } } print "\n\n"; } @@ -504,7 +683,6 @@ sub getsnaps { my ($config, $cacheTTL, $forcecacheupdate) = @_; - my $cache = '/var/cache/sanoidsnapshots.txt'; my @rawsnaps; my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($cache); @@ -541,7 +719,7 @@ sub getsnaps { } foreach my $snap (@rawsnaps) { - my ($fs,$snapname,$snapdate) = ($snap =~ m/(.*)\@(.*ly)\s*creation\s*(\d*)/); + my ($fs,$snapname,$snapdate) = ($snap =~ m/(.*)\@(.*ly)\t*creation\t*(\d*)/); # avoid pissing off use warnings if (defined $snapname) { @@ -571,10 +749,21 @@ sub init { tie my %ini, 'Config::IniFiles', ( -file => $conf_file ) or die "FATAL: cannot load $conf_file - please create a valid local config file before running sanoid!"; # we'll use these later to normalize potentially true and false values on any toggle keys - my @toggles = ('autosnap','autoprune','monitor_dont_warn','monitor_dont_crit','monitor','recursive','process_children_only'); + my @toggles = ('autosnap','autoprune','monitor_dont_warn','monitor_dont_crit','monitor','recursive','process_children_only','skip_children','no_inconsistent_snapshot','force_post_snapshot_script'); my @istrue=(1,"true","True","TRUE","yes","Yes","YES","on","On","ON"); my @isfalse=(0,"false","False","FALSE","no","No","NO","off","Off","OFF"); + # check if default configuration file is up to date + my $defaults_version = 1; + if (defined $defaults{'version'}{'version'}) { + $defaults_version = $defaults{'version'}{'version'}; + delete $defaults{'version'}; + } + + if ($defaults_version < $MINIMUM_DEFAULTS_VERSION) { + die "FATAL: you're using sanoid.defaults.conf v$defaults_version, this version of sanoid requires a minimum sanoid.defaults.conf v$MINIMUM_DEFAULTS_VERSION"; + } + foreach my $section (keys %ini) { # first up - die with honor if unknown parameters are set in any modules or templates by the user. @@ -628,7 +817,7 @@ sub init { # override with any locally set values in the module itself foreach my $key (keys %{$ini{$section}} ) { - if (! ($key =~ /template|recursive/)) { + if (! ($key =~ /template|recursive|skip_children/)) { if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value directly set in module.\n"; } $config{$section}{$key} = $ini{$section}{$key}; } @@ -653,10 +842,17 @@ sub init { # how 'bout some recursion? =) my @datasets; - if ($ini{$section}{'recursive'}) { + if ($ini{$section}{'recursive'} || $ini{$section}{'skip_children'}) { @datasets = getchilddatasets($config{$section}{'path'}); - foreach my $dataset(@datasets) { + DATASETS: foreach my $dataset(@datasets) { chomp $dataset; + + if ($ini{$section}{'skip_children'}) { + if ($args{'debug'}) { print "DEBUG: ignoring $dataset.\n"; } + delete $config{$dataset}; + next DATASETS; + } + foreach my $key (keys %{$config{$section}} ) { if (! ($key =~ /template|recursive|children_only/)) { if ($args{'debug'}) { print "DEBUG: recursively setting $key from $section to $dataset.\n"; } @@ -782,7 +978,7 @@ sub check_zpool() { exit $ERRORS{$state}; } - my $statcommand="/sbin/zpool list -o name,size,cap,health,free $pool"; + my $statcommand="$zpool list -o name,size,cap,health,free $pool"; if (! open STAT, "$statcommand|") { print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n"); @@ -830,7 +1026,7 @@ sub check_zpool() { ## flag to detect section of zpool status involving our zpool my $poolfind=0; - $statcommand="/sbin/zpool status $pool"; + $statcommand="$zpool status $pool"; if (! open STAT, "$statcommand|") { $state = 'CRITICAL'; print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n"); @@ -884,7 +1080,12 @@ sub check_zpool() { } ## other cases - my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/; + my ($dev, $sta, $read, $write, $cksum) = /^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/; + + if (!defined($sta)) { + # cache and logs are special and don't have a status + next; + } ## pool online, not degraded thanks to dead/corrupted disk if ($state eq "OK" && $sta eq "UNAVAIL") { @@ -899,8 +1100,21 @@ sub check_zpool() { ## no display for verbose level 1 next if ($verbose==1); ## don't display working devices for verbose level 2 - next if ($verbose==2 && $state eq "OK"); - next if ($verbose==2 && ($sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE")); + if ($verbose==2 && ($state eq "OK" || $sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE")) { + # check for io/checksum errors + + my @vdeverr = (); + if ($read != 0) { push @vdeverr, "read" }; + if ($write != 0) { push @vdeverr, "write" }; + if ($cksum != 0) { push @vdeverr, "cksum" }; + + if (scalar @vdeverr) { + $dmge=$dmge . "(" . $dev . ":" . join(", ", @vdeverr) . " errors) "; + if ($state eq "OK") { $state = "WARNING" }; + } + + next; + } ## show everything else if (/^\s{3}(\S+)/) { @@ -920,6 +1134,128 @@ sub check_zpool() { return ($ERRORS{$state},$msg); } # end check_zpool() +sub check_capacity_limit { + my $value = shift; + + if (!defined($value) || $value !~ /^\d+\z/) { + return undef; + } + + if ($value < 0 || $value > 100) { + return undef; + } + + return 1 +} + +sub check_zpool_capacity() { + my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2); + my $state="UNKNOWN"; + my $msg="FAILURE"; + + my $pool=shift; + my $capacitylimitsref=shift; + my %capacitylimits=%$capacitylimitsref; + + my $statcommand="$zpool list -H -o cap $pool"; + + if (! open STAT, "$statcommand|") { + print ("$state '$statcommand' command returns no result!\n"); + exit $ERRORS{$state}; + } + + my $line = ; + close(STAT); + + chomp $line; + my @row = split(/ +/, $line); + my $cap=$row[0]; + + ## check for valid capacity value + if ($cap !~ m/^[0-9]{1,3}%$/ ) { + $state = "CRITICAL"; + $msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool; + print $state, " ", $msg; + exit ($ERRORS{$state}); + } + + $state="OK"; + + # check capacity + my $capn = $cap; + $capn =~ s/\D//g; + + if (defined($capacitylimits{"warn"})) { + if ($capn >= $capacitylimits{"warn"}) { + $state = "WARNING"; + } + } + + if (defined($capacitylimits{"crit"})) { + if ($capn >= $capacitylimits{"crit"}) { + $state = "CRITICAL"; + } + } + + $msg = sprintf "ZPOOL %s : %s\n", $pool, $cap; + $msg = "$state $msg"; + return ($ERRORS{$state},$msg); +} # end check_zpool_capacity() + +sub check_prune_defer { + my ($config, $section) = @_; + + my $limit = $config{$section}{"prune_defer"}; + + if (!check_capacity_limit($limit)) { + die "ERROR: invalid prune_defer limit!\n"; + } + + if ($limit eq 0) { + return 0; + } + + my @parts = split /\//, $section, 2; + my $pool = $parts[0]; + + if (exists $capacitycache{$pool}) { + } else { + $capacitycache{$pool} = get_zpool_capacity($pool); + } + + if ($limit < $capacitycache{$pool}) { + return 0; + } + + return 1; +} + +sub get_zpool_capacity { + my $pool = shift; + + my $statcommand="$zpool list -H -o cap $pool"; + + if (! open STAT, "$statcommand|") { + die "ERROR: '$statcommand' command returns no result!\n"; + } + + my $line = ; + close(STAT); + + chomp $line; + my @row = split(/ +/, $line); + my $cap=$row[0]; + + ## check for valid capacity value + if ($cap !~ m/^[0-9]{1,3}%$/ ) { + die "ERROR: '$statcommand' command returned invalid capacity value ($cap)!\n"; + } + + $cap =~ s/\D//g; + + return $cap; +} + ###################################################################################################### ###################################################################################################### ###################################################################################################### @@ -950,13 +1286,22 @@ sub checklock { # no lockfile return 1; } + # make sure lockfile contains something + if ( -z $lockfile) { + # zero size lockfile, something is wrong + die "ERROR: something is wrong! $lockfile is empty\n"; + } # lockfile exists. read pid and mutex from it. see if it's our pid. if not, see if # there's still a process running with that pid and with the same mutex. - open FH, "< $lockfile"; + open FH, "< $lockfile" or die "ERROR: unable to open $lockfile"; my @lock = ; close FH; + # if we didn't get exactly 2 items from the lock file there is a problem + if (scalar(@lock) != 2) { + die "ERROR: $lockfile is invalid.\n" + } my $lockmutex = pop(@lock); my $lockpid = pop(@lock); @@ -968,7 +1313,6 @@ sub checklock { # we own the lockfile. no need to check any further. return 2; } - open PL, "$pscmd -p $lockpid -o args= |"; my @processlist = ; close PL; @@ -1073,9 +1417,96 @@ sub getchilddatasets { my @children = ; close FH; + # parent dataset is the first element + shift @children; + return @children; } +#######################################################################################################################3 +#######################################################################################################################3 +#######################################################################################################################3 + +sub removecachedsnapshots { + my $wait = shift; + + if (not %pruned) { + return; + } + + my $unlocked = checklock('sanoid_cacheupdate'); + + if ($wait != 1 && not $unlocked) { + if ($args{'verbose'}) { print "INFO: deferring cache update (snapshot removal) - valid cache update lock held by another sanoid process.\n"; } + return; + } + + # wait until we can get a lock to do our cache changes + while (not $unlocked) { + if ($args{'verbose'}) { print "INFO: waiting for cache update lock held by another sanoid process.\n"; } + sleep(10); + $unlocked = checklock('sanoid_cacheupdate'); + } + + writelock('sanoid_cacheupdate'); + + if ($args{'verbose'}) { + print "INFO: removing destroyed snapshots from cache.\n"; + } + open FH, "< $cache"; + my @rawsnaps = ; + close FH; + + open FH, "> $cache" or die 'Could not write to $cache!\n'; + foreach my $snapline ( @rawsnaps ) { + my @columns = split("\t", $snapline); + my $snap = $columns[0]; + print FH $snapline unless ( exists($pruned{$snap}) ); + } + close FH; + + removelock('sanoid_cacheupdate'); + %snaps = getsnaps(\%config,$cacheTTL,$forcecacheupdate); + + # clear hash + undef %pruned; +} + +#######################################################################################################################3 +#######################################################################################################################3 +#######################################################################################################################3 + +sub runscript { + my $key=shift; + my $dataset=shift; + + my $timeout=$config{$dataset}{'script_timeout'}; + + my $ret; + eval { + if ($timeout gt 0) { + local $SIG{ALRM} = sub { die "alarm\n" }; + alarm $timeout; + } + $ret = system($config{$dataset}{$key}); + alarm 0; + }; + if ($@) { + if ($@ eq "alarm\n") { + warn "WARN: $key didn't finish in the allowed time!"; + } else { + warn "CRITICAL ERROR: $@"; + } + return -1; + } else { + if ($ret != 0) { + warn "WARN: $key failed, $?"; + } + } + + return $ret; +} + __END__ =head1 NAME @@ -1099,6 +1530,7 @@ Options: --force-update Clears out sanoid's zfs snapshot cache --monitor-health Reports on zpool "health", in a Nagios compatible format + --monitor-capacity Reports on zpool capacity, in a Nagios compatible format --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format --take-snapshots Creates snapshots as specified in sanoid.conf --prune-snapshots Purges expired snapshots as specified in sanoid.conf diff --git a/sanoid.conf b/sanoid.conf index 9b1f19d..9f13105 100644 --- a/sanoid.conf +++ b/sanoid.conf @@ -40,6 +40,7 @@ daily = 60 [template_production] + frequently = 0 hourly = 36 daily = 30 monthly = 3 @@ -49,6 +50,7 @@ [template_backup] autoprune = yes + frequently = 0 hourly = 30 daily = 90 monthly = 12 @@ -67,6 +69,21 @@ daily_warn = 48 daily_crit = 60 +[template_scripts] + ### dataset and snapshot name will be supplied as environment variables + ### for all pre/post/prune scripts ($SANOID_TARGET, $SANOID_SNAPNAME) + ### run script before snapshot + pre_snapshot_script = /path/to/script.sh + ### run script after snapshot + post_snapshot_script = /path/to/script.sh + ### run script after pruning snapshot + pruning_script = /path/to/script.sh + ### don't take an inconsistent snapshot (skip if pre script fails) + #no_inconsistent_snapshot = yes + ### run post_snapshot_script when pre_snapshot_script is failing + #force_post_snapshot_script = yes + ### limit allowed execution time of scripts before continuing (<= 0: infinite) + script_timeout = 5 [template_ignore] autoprune = no diff --git a/sanoid.defaults.conf b/sanoid.defaults.conf index ff79ebb..4139393 100644 --- a/sanoid.defaults.conf +++ b/sanoid.defaults.conf @@ -5,6 +5,8 @@ # # # you have been warned. # ################################################################################### +[version] +version = 2 [template_default] @@ -15,6 +17,26 @@ path = recursive = use_template = process_children_only = +skip_children = + +pre_snapshot_script = +post_snapshot_script = +pruning_script = +script_timeout = 5 +no_inconsistent_snapshot = +force_post_snapshot_script = + +# for snapshots shorter than one hour, the period duration must be defined +# in minutes. Because they are executed within a full hour, the selected +# value should divide 60 minutes without remainder so taken snapshots +# are apart in equal intervals. Values larger than 59 aren't practical +# as only one snapshot will be taken on each full hour in this case. +# examples: +# frequent_period = 15 -> four snapshot each hour 15 minutes apart +# frequent_period = 5 -> twelve snapshots each hour 5 minutes apart +# frequent_period = 45 -> two snapshots each hour with different time gaps +# between them: 45 minutes and 15 minutes in this case +frequent_period = 15 # If any snapshot type is set to 0, we will not take snapshots for it - and will immediately # prune any of those type snapshots already present. @@ -22,12 +44,15 @@ process_children_only = # Otherwise, if autoprune is set, we will prune any snapshots of that type which are older # than (setting * periodicity) - so if daily = 90, we'll prune any dailies older than 90 days. autoprune = yes +frequently = 0 hourly = 48 daily = 90 weekly = 0 monthly = 6 yearly = 0 -min_percent_free = 10 +# pruning can be skipped based on the used capacity of the pool +# (0: always prune, 1-100: only prune if used capacity is greater than this value) +prune_defer = 0 # We will automatically take snapshots if autosnap is on, at the desired times configured # below (or immediately, if we don't have one since the last preferred time for that type). @@ -67,6 +92,8 @@ yearly_min = 0 monitor = yes monitor_dont_warn = no monitor_dont_crit = no +frequently_warn = 0 +frequently_crit = 0 hourly_warn = 90 hourly_crit = 360 daily_warn = 28 @@ -77,3 +104,7 @@ monthly_warn = 5 monthly_crit = 6 yearly_warn = 0 yearly_crit = 0 + +# default limits for capacity checks (if set to 0, limit will not be checked) +capacity_warn = 80 +capacity_crit = 95 diff --git a/syncoid b/syncoid index d927cba..c5ef3fd 100755 --- a/syncoid +++ b/syncoid @@ -4,56 +4,55 @@ # from http://www.gnu.org/licenses/gpl-3.0.html on 2014-11-17. A copy should also be available in this # project's Git repository at https://github.com/jimsalterjrs/sanoid/blob/master/LICENSE. -my $version = '1.4.16'; +$::VERSION = '1.4.18'; use strict; use warnings; use Data::Dumper; +use Getopt::Long qw(:config auto_version auto_help); +use Pod::Usage; use Time::Local; use Sys::Hostname; -my %args = getargs(@ARGV); +# Blank defaults to use ssh client's default +# TODO: Merge into a single "sshflags" option? +my %args = ('sshkey' => '', 'sshport' => '', 'sshcipher' => '', 'sshoption' => [], 'target-bwlimit' => '', 'source-bwlimit' => ''); +GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsnaps", "recursive|r", + "source-bwlimit=s", "target-bwlimit=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", + "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s", + "no-clone-handling") or pod2usage(2); -if ($args{'version'}) { - print "Syncoid version: $version\n"; - exit 0; +my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set + +# TODO Expand to accept multiple sources? +if (scalar(@ARGV) != 2) { + print("Source or target not found!\n"); + pod2usage(2); + exit 127; +} else { + $args{'source'} = $ARGV[0]; + $args{'target'} = $ARGV[1]; } -if (!(defined $args{'source'} && defined $args{'target'})) { - print 'usage: syncoid [src_user@src_host:]src_pool/src_dataset [dst_user@dst_host:]dst_pool/dst_dataset'."\n"; - exit 127; +# Could possibly merge these into an options function +if (length $args{'source-bwlimit'}) { + $args{'source-bwlimit'} = "-R $args{'source-bwlimit'}"; } +if (length $args{'target-bwlimit'}) { + $args{'target-bwlimit'} = "-r $args{'target-bwlimit'}"; +} +$args{'streamarg'} = (defined $args{'no-stream'} ? '-i' : '-I'); my $rawsourcefs = $args{'source'}; my $rawtargetfs = $args{'target'}; my $debug = $args{'debug'}; my $quiet = $args{'quiet'}; +my $resume = !$args{'no-resume'}; my $zfscmd = '/sbin/zfs'; my $sshcmd = '/usr/bin/ssh'; my $pscmd = '/bin/ps'; -my $sshcipher; -if (defined $args{'c'}) { - $sshcipher = "-c $args{'c'}"; -} else { - # default to no cipher specified now that SSH - # has not defaulted to a cripplingly slow cipher - # in a very long time - $sshcipher = ''; -} -my $sshport = '-p 22'; -my $sshoption; -if (defined $args{'o'}) { - my @options = split(',', $args{'o'}); - foreach my $option (@options) { - $sshoption .= " -o $option"; - if ($option eq "NoneSwitch=yes") { - $sshcipher = ""; - } - } -} else { - $sshoption = ""; -} + my $pvcmd = '/usr/bin/pv'; my $mbuffercmd = '/usr/bin/mbuffer'; my $sudocmd = '/usr/bin/sudo'; @@ -62,23 +61,36 @@ my $mbufferoptions = '-q -s 128k -m 16M 2>/dev/null'; # being present on remote machines. my $lscmd = '/bin/ls'; -if ( $args{'sshport'} ) { - $sshport = "-p $args{'sshport'}"; +if (length $args{'sshcipher'}) { + $args{'sshcipher'} = "-c $args{'sshcipher'}"; } +if (length $args{'sshport'}) { + $args{'sshport'} = "-p $args{'sshport'}"; +} +if (length $args{'sshkey'}) { + $args{'sshkey'} = "-i $args{'sshkey'}"; +} +my $sshoptions = join " ", map { "-o " . $_ } @{$args{'sshoption'}}; # deref required + +my $identifier = ""; +if (length $args{'identifier'}) { + if ($args{'identifier'} !~ /^[a-zA-Z0-9-_:.]+$/) { + # invalid extra identifier + print("CRITICAL: extra identifier contains invalid chars!\n"); + pod2usage(2); + exit 127; + } + $identifier = "$args{'identifier'}_"; +} + # figure out if source and/or target are remote. -if ( $args{'sshkey'} ) { - $sshcmd = "$sshcmd $sshoption $sshcipher $sshport -i $args{'sshkey'}"; -} -else { - $sshcmd = "$sshcmd $sshoption $sshcipher $sshport"; -} +$sshcmd = "$sshcmd $args{'sshcipher'} $sshoptions $args{'sshport'} $args{'sshkey'}"; +if ($debug) { print "DEBUG: SSHCMD: $sshcmd\n"; } my ($sourcehost,$sourcefs,$sourceisroot) = getssh($rawsourcefs); my ($targethost,$targetfs,$targetisroot) = getssh($rawtargetfs); -my $sourcesudocmd; -my $targetsudocmd; -if ($sourceisroot) { $sourcesudocmd = ''; } else { $sourcesudocmd = $sudocmd; } -if ($targetisroot) { $targetsudocmd = ''; } else { $targetsudocmd = $sudocmd; } +my $sourcesudocmd = $sourceisroot ? '' : $sudocmd; +my $targetsudocmd = $targetisroot ? '' : $sudocmd; # figure out whether compression, mbuffering, pv # are available on source, target, local machines. @@ -86,23 +98,66 @@ if ($targetisroot) { $targetsudocmd = ''; } else { $targetsudocmd = $sudocmd; } my %avail = checkcommands(); my %snaps; +my $exitcode = 0; ## break here to call replication individually so that we ## ## can loop across children separately, for recursive ## ## replication ## -if (! $args{'recursive'}) { - syncdataset($sourcehost, $sourcefs, $targethost, $targetfs); +if (!defined $args{'recursive'}) { + syncdataset($sourcehost, $sourcefs, $targethost, $targetfs, undef); } else { if ($debug) { print "DEBUG: recursive sync of $sourcefs.\n"; } my @datasets = getchilddatasets($sourcehost, $sourcefs, $sourceisroot); - foreach my $dataset(@datasets) { - $dataset =~ s/$sourcefs//; + + my @deferred; + + foreach my $datasetProperties(@datasets) { + my $dataset = $datasetProperties->{'name'}; + my $origin = $datasetProperties->{'origin'}; + if ($origin eq "-" || defined $args{'no-clone-handling'}) { + $origin = undef; + } else { + # check if clone source is replicated too + my @values = split(/@/, $origin, 2); + my $srcdataset = $values[0]; + + my $found = 0; + foreach my $datasetProperties(@datasets) { + if ($datasetProperties->{'name'} eq $srcdataset) { + $found = 1; + last; + } + } + + if ($found == 0) { + # clone source is not replicated, do a full replication + $origin = undef; + } else { + # clone source is replicated, defer until all non clones are replicated + push @deferred, $datasetProperties; + next; + } + } + + $dataset =~ s/\Q$sourcefs\E//; chomp $dataset; my $childsourcefs = $sourcefs . $dataset; my $childtargetfs = $targetfs . $dataset; # print "syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs); \n"; - syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs); + syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin); + } + + # replicate cloned datasets and if this is the initial run, recreate them on the target + foreach my $datasetProperties(@deferred) { + my $dataset = $datasetProperties->{'name'}; + my $origin = $datasetProperties->{'origin'}; + + $dataset =~ s/\Q$sourcefs\E//; + chomp $dataset; + my $childsourcefs = $sourcefs . $dataset; + my $childtargetfs = $targetfs . $dataset; + syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin); } } @@ -116,7 +171,7 @@ if ($targethost ne '') { close FH; } -exit 0; +exit $exitcode; ############################################################################## ############################################################################## @@ -126,14 +181,52 @@ exit 0; sub getchilddatasets { my ($rhost,$fs,$isroot,%snaps) = @_; my $mysudocmd; + my $fsescaped = escapeshellparam($fs); if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + if ($rhost ne '') { + $rhost = "$sshcmd $rhost"; + # double escaping needed + $fsescaped = escapeshellparam($fsescaped); + } - my $getchildrencmd = "$rhost $mysudocmd $zfscmd list -o name -t filesystem,volume -Hr $fs |"; + my $getchildrencmd = "$rhost $mysudocmd $zfscmd list -o name,origin -t filesystem,volume -Hr $fsescaped |"; if ($debug) { print "DEBUG: getting list of child datasets on $fs using $getchildrencmd...\n"; } - open FH, $getchildrencmd; - my @children = ; + if (! open FH, $getchildrencmd) { + die "ERROR: list command failed!\n"; + } + + my @children; + my $first = 1; + + DATASETS: while() { + chomp; + + if (defined $args{'skip-parent'} && $first eq 1) { + # parent dataset is the first element + $first = 0; + next; + } + + my ($dataset, $origin) = /^([^\t]+)\t([^\t]+)/; + + if (defined $args{'exclude'}) { + my $excludes = $args{'exclude'}; + foreach (@$excludes) { + print("$dataset\n"); + if ($dataset =~ /$_/) { + if ($debug) { print "DEBUG: excluded $dataset because of $_\n"; } + next DATASETS; + } + } + } + + my %properties; + $properties{'name'} = $dataset; + $properties{'origin'} = $origin; + + push @children, \%properties; + } close FH; return @children; @@ -141,44 +234,95 @@ sub getchilddatasets { sub syncdataset { - my ($sourcehost, $sourcefs, $targethost, $targetfs) = @_; + my ($sourcehost, $sourcefs, $targethost, $targetfs, $origin) = @_; + + my $sourcefsescaped = escapeshellparam($sourcefs); + my $targetfsescaped = escapeshellparam($targetfs); if ($debug) { print "DEBUG: syncing source $sourcefs to target $targetfs.\n"; } + my $sync = getzfsvalue($sourcehost,$sourcefs,$sourceisroot,'syncoid:sync'); + + if ($sync eq 'true' || $sync eq '-' || $sync eq '') { + # empty is handled the same as unset (aka: '-') + # definitely sync this dataset - if a host is called 'true' or '-', then you're special + } elsif ($sync eq 'false') { + if (!$quiet) { print "INFO: Skipping dataset (syncoid:sync=false): $sourcefs...\n"; } + return 0; + } else { + my $hostid = hostname(); + my @hosts = split(/,/,$sync); + if (!(grep $hostid eq $_, @hosts)) { + if (!$quiet) { print "INFO: Skipping dataset (syncoid:sync doesn't include $hostid): $sourcefs...\n"; } + return 0; + } + } + # make sure target is not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } # does the target filesystem exist yet? my $targetexists = targetexists($targethost,$targetfs,$targetisroot); - # build hashes of the snaps on the source and target filesystems. + my $receiveextraargs = ""; + my $receivetoken; + if ($resume) { + # save state of interrupted receive stream + $receiveextraargs = "-s"; - %snaps = getsnaps('source',$sourcehost,$sourcefs,$sourceisroot); + if ($targetexists) { + # check remote dataset for receive resume token (interrupted receive) + $receivetoken = getreceivetoken($targethost,$targetfs,$targetisroot); - if ($targetexists) { - my %targetsnaps = getsnaps('target',$targethost,$targetfs,$targetisroot); - my %sourcesnaps = %snaps; - %snaps = (%sourcesnaps, %targetsnaps); - } - - if ($args{'dumpsnaps'}) { print "merged snapshot list: \n"; dumphash(\%snaps); print "\n\n\n"; } - - # create a new syncoid snapshot on the source filesystem. - my $newsyncsnap; - if (!defined ($args{'no-sync-snap'}) ) { - $newsyncsnap = newsyncsnap($sourcehost,$sourcefs,$sourceisroot); - } else { - # we don't want sync snapshots created, so use the newest snapshot we can find. - $newsyncsnap = getnewestsnapshot($sourcehost,$sourcefs,$sourceisroot); - if ($newsyncsnap eq 0) { - warn "CRITICAL: no snapshots exist on source, and you asked for --no-sync-snap.\n"; - return 0; + if ($debug && defined($receivetoken)) { + print "DEBUG: got receive resume token: $receivetoken: \n"; + } } } + my $newsyncsnap; + + # skip snapshot checking/creation in case of resumed receive + if (!defined($receivetoken)) { + # build hashes of the snaps on the source and target filesystems. + + %snaps = getsnaps('source',$sourcehost,$sourcefs,$sourceisroot); + + if ($targetexists) { + my %targetsnaps = getsnaps('target',$targethost,$targetfs,$targetisroot); + my %sourcesnaps = %snaps; + %snaps = (%sourcesnaps, %targetsnaps); + } + + if (defined $args{'dumpsnaps'}) { + print "merged snapshot list of $targetfs: \n"; + dumphash(\%snaps); + print "\n\n\n"; + } + + if (!defined $args{'no-sync-snap'}) { + # create a new syncoid snapshot on the source filesystem. + $newsyncsnap = newsyncsnap($sourcehost,$sourcefs,$sourceisroot); + if (!$newsyncsnap) { + # we already whined about the error + return 0; + } + } else { + # we don't want sync snapshots created, so use the newest snapshot we can find. + $newsyncsnap = getnewestsnapshot($sourcehost,$sourcefs,$sourceisroot); + if ($newsyncsnap eq 0) { + warn "CRITICAL: no snapshots exist on source $sourcefs, and you asked for --no-sync-snap.\n"; + if ($exitcode < 1) { $exitcode = 1; } + return 0; + } + } + } + my $newsyncsnapescaped = escapeshellparam($newsyncsnap); + # there is currently (2014-09-01) a bug in ZFS on Linux # that causes readonly to always show on if it's EVER # been turned on... even when it's off... unless and @@ -201,6 +345,11 @@ sub syncdataset { } my $oldestsnap = getoldestsnapshot(\%snaps); if (! $oldestsnap) { + if (defined ($args{'no-sync-snap'}) ) { + # we already whined about the missing snapshots + return 0; + } + # getoldestsnapshot() returned false, so use new sync snapshot if ($debug) { print "DEBUG: getoldestsnapshot() returned false, so using $newsyncsnap.\n"; } $oldestsnap = $newsyncsnap; @@ -208,15 +357,30 @@ sub syncdataset { # if --no-stream is specified, our full needs to be the newest snapshot, not the oldest. if (defined $args{'no-stream'}) { $oldestsnap = getnewestsnapshot(\%snaps); } + my $oldestsnapescaped = escapeshellparam($oldestsnap); - my $sendcmd = "$sourcesudocmd $zfscmd send $sourcefs\@$oldestsnap"; - my $recvcmd = "$targetsudocmd $zfscmd receive -F $targetfs"; + my $sendcmd = "$sourcesudocmd $zfscmd send $sourcefsescaped\@$oldestsnapescaped"; + my $recvcmd = "$targetsudocmd $zfscmd receive $receiveextraargs -F $targetfsescaped"; + + my $pvsize; + if (defined $origin) { + my $originescaped = escapeshellparam($origin); + $sendcmd = "$sourcesudocmd $zfscmd send -i $originescaped $sourcefsescaped\@$oldestsnapescaped"; + my $streamargBackup = $args{'streamarg'}; + $args{'streamarg'} = "-i"; + $pvsize = getsendsize($sourcehost,$origin,"$sourcefs\@$oldestsnap",$sourceisroot); + $args{'streamarg'} = $streamargBackup; + } else { + $pvsize = getsendsize($sourcehost,"$sourcefs\@$oldestsnap",0,$sourceisroot); + } - my $pvsize = getsendsize($sourcehost,"$sourcefs\@$oldestsnap",0,$sourceisroot); my $disp_pvsize = readablebytes($pvsize); if ($pvsize == 0) { $disp_pvsize = 'UNKNOWN'; } my $synccmd = buildsynccmd($sendcmd,$recvcmd,$pvsize,$sourceisroot,$targetisroot); if (!$quiet) { + if (defined $origin) { + print "INFO: Clone is recreated on target $targetfs based on $origin\n"; + } if (!defined ($args{'no-stream'}) ) { print "INFO: Sending oldest full snapshot $sourcefs\@$oldestsnap (~ $disp_pvsize) to new target filesystem:\n"; } else { @@ -228,10 +392,14 @@ sub syncdataset { # make sure target is (still) not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } - system($synccmd) == 0 - or die "CRITICAL ERROR: $synccmd failed: $?"; + system($synccmd) == 0 or do { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; # now do an -I to the new sync snapshot, assuming there were any snapshots # other than the new sync snapshot to begin with, of course - and that we @@ -245,7 +413,7 @@ sub syncdataset { # $originaltargetreadonly = getzfsvalue($targethost,$targetfs,$targetisroot,'readonly'); # setzfsvalue($targethost,$targetfs,$targetisroot,'readonly','on'); - $sendcmd = "$sourcesudocmd $zfscmd send $args{'streamarg'} $sourcefs\@$oldestsnap $sourcefs\@$newsyncsnap"; + $sendcmd = "$sourcesudocmd $zfscmd send $args{'streamarg'} $sourcefsescaped\@$oldestsnapescaped $sourcefsescaped\@$newsyncsnapescaped"; $pvsize = getsendsize($sourcehost,"$sourcefs\@$oldestsnap","$sourcefs\@$newsyncsnap",$sourceisroot); $disp_pvsize = readablebytes($pvsize); if ($pvsize == 0) { $disp_pvsize = "UNKNOWN"; } @@ -254,6 +422,7 @@ sub syncdataset { # make sure target is (still) not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } @@ -261,9 +430,12 @@ sub syncdataset { if ($debug) { print "DEBUG: $synccmd\n"; } if ($oldestsnap ne $newsyncsnap) { - system($synccmd) == 0 - or warn "CRITICAL ERROR: $synccmd failed: $?"; + my $ret = system($synccmd); + if ($ret != 0) { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 1) { $exitcode = 1; } return 0; + } } else { if (!$quiet) { print "INFO: no incremental sync needed; $oldestsnap is already the newest available snapshot.\n"; } } @@ -274,6 +446,30 @@ sub syncdataset { # setzfsvalue($targethost,$targetfs,$targetisroot,'readonly',$originaltargetreadonly); } } else { + # resume interrupted receive if there is a valid resume $token + # and because this will ony resume the receive to the next + # snapshot, do a normal sync after that + if (defined($receivetoken)) { + my $sendcmd = "$sourcesudocmd $zfscmd send -t $receivetoken"; + my $recvcmd = "$targetsudocmd $zfscmd receive $receiveextraargs -F $targetfsescaped"; + my $pvsize = getsendsize($sourcehost,"","",$sourceisroot,$receivetoken); + my $disp_pvsize = readablebytes($pvsize); + if ($pvsize == 0) { $disp_pvsize = "UNKNOWN"; } + my $synccmd = buildsynccmd($sendcmd,$recvcmd,$pvsize,$sourceisroot,$targetisroot); + + if (!$quiet) { print "Resuming interrupted zfs send/receive from $sourcefs to $targetfs (~ $disp_pvsize remaining):\n"; } + if ($debug) { print "DEBUG: $synccmd\n"; } + system("$synccmd") == 0 or do { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; + + # a resumed transfer will only be done to the next snapshot, + # so do an normal sync cycle + return syncdataset($sourcehost, $sourcefs, $targethost, $targetfs, undef); + } + # find most recent matching snapshot and do an -I # to the new snapshot @@ -285,7 +481,7 @@ sub syncdataset { my $targetsize = getzfsvalue($targethost,$targetfs,$targetisroot,'-p used'); - my $matchingsnap = getmatchingsnapshot($targetsize, \%snaps); + my $matchingsnap = getmatchingsnapshot($sourcefs, $targetfs, $targetsize, \%snaps); if (! $matchingsnap) { # no matching snapshot; we whined piteously already, but let's go ahead and return false # now in case more child datasets need replication. @@ -295,6 +491,7 @@ sub syncdataset { # make sure target is (still) not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } @@ -302,18 +499,19 @@ sub syncdataset { # barf some text but don't touch the filesystem if (!$quiet) { print "INFO: no snapshots on source newer than $newsyncsnap on target. Nothing to do, not syncing.\n"; } } else { + my $matchingsnapescaped = escapeshellparam($matchingsnap); # rollback target to matchingsnap if ($debug) { print "DEBUG: rolling back target to $targetfs\@$matchingsnap...\n"; } if ($targethost ne '') { - if ($debug) { print "$sshcmd $targethost $targetsudocmd $zfscmd rollback -R $targetfs\@$matchingsnap\n"; } - system ("$sshcmd $targethost $targetsudocmd $zfscmd rollback -R $targetfs\@$matchingsnap"); + if ($debug) { print "$sshcmd $targethost $targetsudocmd $zfscmd rollback -R $targetfsescaped\@$matchingsnapescaped\n"; } + system ("$sshcmd $targethost " . escapeshellparam("$targetsudocmd $zfscmd rollback -R $targetfsescaped\@$matchingsnapescaped")); } else { - if ($debug) { print "$targetsudocmd $zfscmd rollback -R $targetfs\@$matchingsnap\n"; } - system ("$targetsudocmd $zfscmd rollback -R $targetfs\@$matchingsnap"); + if ($debug) { print "$targetsudocmd $zfscmd rollback -R $targetfsescaped\@$matchingsnapescaped\n"; } + system ("$targetsudocmd $zfscmd rollback -R $targetfsescaped\@$matchingsnapescaped"); } - my $sendcmd = "$sourcesudocmd $zfscmd send $args{'streamarg'} $sourcefs\@$matchingsnap $sourcefs\@$newsyncsnap"; - my $recvcmd = "$targetsudocmd $zfscmd receive -F $targetfs"; + my $sendcmd = "$sourcesudocmd $zfscmd send $args{'streamarg'} $sourcefsescaped\@$matchingsnapescaped $sourcefsescaped\@$newsyncsnapescaped"; + my $recvcmd = "$targetsudocmd $zfscmd receive $receiveextraargs -F $targetfsescaped"; my $pvsize = getsendsize($sourcehost,"$sourcefs\@$matchingsnap","$sourcefs\@$newsyncsnap",$sourceisroot); my $disp_pvsize = readablebytes($pvsize); if ($pvsize == 0) { $disp_pvsize = "UNKNOWN"; } @@ -321,8 +519,11 @@ sub syncdataset { if (!$quiet) { print "Sending incremental $sourcefs\@$matchingsnap ... $newsyncsnap (~ $disp_pvsize):\n"; } if ($debug) { print "DEBUG: $synccmd\n"; } - system("$synccmd") == 0 - or die "CRITICAL ERROR: $synccmd failed: $?"; + system("$synccmd") == 0 or do { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; # restore original readonly value to target after sync complete # dyking this functionality out for the time being due to buggy mount/unmount behavior @@ -331,117 +532,80 @@ sub syncdataset { } } - # prune obsolete sync snaps on source and target. - pruneoldsyncsnaps($sourcehost,$sourcefs,$newsyncsnap,$sourceisroot,keys %{ $snaps{'source'}}); - pruneoldsyncsnaps($targethost,$targetfs,$newsyncsnap,$targetisroot,keys %{ $snaps{'target'}}); + if (!defined $args{'no-sync-snap'}) { + # prune obsolete sync snaps on source and target (only if this run created ones). + pruneoldsyncsnaps($sourcehost,$sourcefs,$newsyncsnap,$sourceisroot,keys %{ $snaps{'source'}}); + pruneoldsyncsnaps($targethost,$targetfs,$newsyncsnap,$targetisroot,keys %{ $snaps{'target'}}); + } } # end syncdataset() +sub compressargset { + my ($value) = @_; + my $DEFAULT_COMPRESSION = 'lzo'; + my %COMPRESS_ARGS = ( + 'none' => { + rawcmd => '', + args => '', + decomrawcmd => '', + decomargs => '', + }, + 'gzip' => { + rawcmd => '/bin/gzip', + args => '-3', + decomrawcmd => '/bin/zcat', + decomargs => '', + }, + 'pigz-fast' => { + rawcmd => '/usr/bin/pigz', + args => '-3', + decomrawcmd => '/usr/bin/pigz', + decomargs => '-dc', + }, + 'pigz-slow' => { + rawcmd => '/usr/bin/pigz', + args => '-9', + decomrawcmd => '/usr/bin/pigz', + decomargs => '-dc', + }, + 'zstd-fast' => { + rawcmd => '/usr/bin/zstd', + args => '-3', + decomrawcmd => '/usr/bin/zstd', + decomargs => '-dc', + }, + 'zstd-slow' => { + rawcmd => '/usr/bin/zstd', + args => '-19', + decomrawcmd => '/usr/bin/zstd', + decomargs => '-dc', + }, + 'lzo' => { + rawcmd => '/usr/bin/lzop', + args => '', + decomrawcmd => '/usr/bin/lzop', + decomargs => '-dfc', + }, + 'lz4' => { + rawcmd => '/usr/bin/lz4', + args => '', + decomrawcmd => '/usr/bin/lz4', + decomargs => '-dc', + }, + ); -sub getargs { - my @args = @_; - my %args; - - my %novaluearg; - my %validarg; - push my @validargs, ('debug','nocommandchecks','version','monitor-version','compress','c','o','source-bwlimit','target-bwlimit','dumpsnaps','recursive','r','sshkey','sshport','quiet','no-stream','no-sync-snap'); - foreach my $item (@validargs) { $validarg{$item} = 1; } - push my @novalueargs, ('debug','nocommandchecks','version','monitor-version','dumpsnaps','recursive','r','quiet','no-stream','no-sync-snap'); - foreach my $item (@novalueargs) { $novaluearg{$item} = 1; } - - while (my $rawarg = shift(@args)) { - my $arg = $rawarg; - my $argvalue = ''; - if ($rawarg =~ /=/) { - # user specified the value for a CLI argument with = - # instead of with blank space. separate appropriately. - $argvalue = $arg; - $arg =~ s/=.*$//; - $argvalue =~ s/^.*=//; - } - if ($rawarg =~ /^--/) { - # doubledash arg - $arg =~ s/^--//; - if (! $validarg{$arg}) { die "ERROR: don't understand argument $rawarg.\n"; } - if ($novaluearg{$arg}) { - $args{$arg} = 1; - } else { - # if this CLI arg takes a user-specified value and - # we don't already have it, then the user must have - # specified with a space, so pull in the next value - # from the array as this value rather than as the - # next argument. - if ($argvalue eq '') { $argvalue = shift(@args); } - $args{$arg} = $argvalue; - } - } elsif ($arg =~ /^-/) { - # singledash arg - $arg =~ s/^-//; - if (! $validarg{$arg}) { die "ERROR: don't understand argument $rawarg.\n"; } - if ($novaluearg{$arg}) { - $args{$arg} = 1; - } else { - # if this CLI arg takes a user-specified value and - # we don't already have it, then the user must have - # specified with a space, so pull in the next value - # from the array as this value rather than as the - # next argument. - if ($argvalue eq '') { $argvalue = shift(@args); } - $args{$arg} = $argvalue; - } - } else { - # bare arg - if (defined $args{'source'}) { - if (! defined $args{'target'}) { - $args{'target'} = $arg; - } else { - die "ERROR: don't know what to do with third bare argument $rawarg.\n"; - } - } else { - $args{'source'} = $arg; - } - } + if ($value eq 'default') { + $value = $DEFAULT_COMPRESSION; + } elsif (!(grep $value eq $_, ('gzip', 'pigz-fast', 'pigz-slow', 'zstd-fast', 'zstd-slow', 'lz4', 'lzo', 'default', 'none'))) { + warn "Unrecognised compression value $value, defaulting to $DEFAULT_COMPRESSION"; + $value = $DEFAULT_COMPRESSION; } - if (defined $args{'source-bwlimit'}) { $args{'source-bwlimit'} = "-R $args{'source-bwlimit'}"; } else { $args{'source-bwlimit'} = ''; } - if (defined $args{'target-bwlimit'}) { $args{'target-bwlimit'} = "-r $args{'target-bwlimit'}"; } else { $args{'target-bwlimit'} = ''; } - - if (defined $args{'no-stream'}) { $args{'streamarg'} = '-i'; } else { $args{'streamarg'} = '-I'; } - - if ($args{'r'}) { $args{'recursive'} = $args{'r'}; } - - if (!defined $args{'compress'}) { $args{'compress'} = 'default'; } - - if ($args{'compress'} eq 'gzip') { - $args{'rawcompresscmd'} = '/bin/gzip'; - $args{'compressargs'} = '-3'; - $args{'rawdecompresscmd'} = '/bin/zcat'; - $args{'decompressargs'} = ''; - } elsif ( ($args{'compress'} eq 'pigz-fast')) { - $args{'rawcompresscmd'} = '/usr/bin/pigz'; - $args{'compressargs'} = '-3'; - $args{'rawdecompresscmd'} = '/usr/bin/pigz'; - $args{'decompressargs'} = '-dc'; - } elsif ( ($args{'compress'} eq 'pigz-slow')) { - $args{'rawcompresscmd'} = '/usr/bin/pigz'; - $args{'compressargs'} = '-9'; - $args{'rawdecompresscmd'} = '/usr/bin/pigz'; - $args{'decompressargs'} = '-dc'; - } elsif ( ($args{'compress'} eq 'lzo') || ($args{'compress'} eq 'default') ) { - $args{'rawcompresscmd'} = '/usr/bin/lzop'; - $args{'compressargs'} = ''; - $args{'rawdecompresscmd'} = '/usr/bin/lzop'; - $args{'decompressargs'} = '-dfc'; - } else { - $args{'rawcompresscmd'} = ''; - $args{'compressargs'} = ''; - $args{'rawdecompresscmd'} = ''; - $args{'decompressargs'} = ''; - } - $args{'compresscmd'} = "$args{'rawcompresscmd'} $args{'compressargs'}"; - $args{'decompresscmd'} = "$args{'rawdecompresscmd'} $args{'decompressargs'}"; - - return %args; + my %comargs = %{$COMPRESS_ARGS{$value}}; # copy + $comargs{'compress'} = $value; + $comargs{'cmd'} = "$comargs{'rawcmd'} $comargs{'args'}"; + $comargs{'decomcmd'} = "$comargs{'decomrawcmd'} $comargs{'decomargs'}"; + return \%comargs; } sub checkcommands { @@ -460,6 +624,8 @@ sub checkcommands { $avail{'localmbuffer'} = 1; $avail{'sourcembuffer'} = 1; $avail{'targetmbuffer'} = 1; + $avail{'sourceresume'} = 1; + $avail{'targetresume'} = 1; return %avail; } @@ -471,24 +637,15 @@ sub checkcommands { # if raw compress command is null, we must have specified no compression. otherwise, # make sure that compression is available everywhere we need it - if ($args{'rawcompresscmd'} eq '') { - $avail{'sourcecompress'} = 0; - $avail{'sourcecompress'} = 0; - $avail{'localcompress'} = 0; - if ($args{'compress'} eq 'none' || - $args{'compress'} eq 'no' || - $args{'compress'} eq '0') { - if ($debug) { print "DEBUG: compression forced off from command line arguments.\n"; } - } else { - print "WARN: value $args{'compress'} for argument --compress not understood, proceeding without compression.\n"; - } + if ($compressargs{'compress'} eq 'none') { + if ($debug) { print "DEBUG: compression forced off from command line arguments.\n"; } } else { - if ($debug) { print "DEBUG: checking availability of $args{'rawcompresscmd'} on source...\n"; } - $avail{'sourcecompress'} = `$sourcessh $lscmd $args{'rawcompresscmd'} 2>/dev/null`; - if ($debug) { print "DEBUG: checking availability of $args{'rawcompresscmd'} on target...\n"; } - $avail{'targetcompress'} = `$targetssh $lscmd $args{'rawcompresscmd'} 2>/dev/null`; - if ($debug) { print "DEBUG: checking availability of $args{'rawcompresscmd'} on local machine...\n"; } - $avail{'localcompress'} = `$lscmd $args{'rawcompresscmd'} 2>/dev/null`; + if ($debug) { print "DEBUG: checking availability of $compressargs{'rawcmd'} on source...\n"; } + $avail{'sourcecompress'} = `$sourcessh $lscmd $compressargs{'rawcmd'} 2>/dev/null`; + if ($debug) { print "DEBUG: checking availability of $compressargs{'rawcmd'} on target...\n"; } + $avail{'targetcompress'} = `$targetssh $lscmd $compressargs{'rawcmd'} 2>/dev/null`; + if ($debug) { print "DEBUG: checking availability of $compressargs{'rawcmd'} on local machine...\n"; } + $avail{'localcompress'} = `$lscmd $compressargs{'rawcmd'} 2>/dev/null`; } my ($s,$t); @@ -514,14 +671,14 @@ sub checkcommands { if ($avail{'sourcecompress'} eq '') { - if ($args{'rawcompresscmd'} ne '') { - print "WARN: $args{'compresscmd'} not available on source $s- sync will continue without compression.\n"; + if ($compressargs{'rawcmd'} ne '') { + print "WARN: $compressargs{'rawcmd'} not available on source $s- sync will continue without compression.\n"; } $avail{'compress'} = 0; } if ($avail{'targetcompress'} eq '') { - if ($args{'rawcompresscmd'} ne '') { - print "WARN: $args{'compresscmd'} not available on target $t - sync will continue without compression.\n"; + if ($compressargs{'rawcmd'} ne '') { + print "WARN: $compressargs{'rawcmd'} not available on target $t - sync will continue without compression.\n"; } $avail{'compress'} = 0; } @@ -533,8 +690,8 @@ sub checkcommands { # corner case - if source AND target are BOTH remote, we have to check for local compress too if ($sourcehost ne '' && $targethost ne '' && $avail{'localcompress'} eq '') { - if ($args{'rawcompresscmd'} ne '') { - print "WARN: $args{'compresscmd'} not available on local machine - sync will continue without compression.\n"; + if ($compressargs{'rawcmd'} ne '') { + print "WARN: $compressargs{'rawcmd'} not available on local machine - sync will continue without compression.\n"; } $avail{'compress'} = 0; } @@ -542,7 +699,7 @@ sub checkcommands { if ($debug) { print "DEBUG: checking availability of $mbuffercmd on source...\n"; } $avail{'sourcembuffer'} = `$sourcessh $lscmd $mbuffercmd 2>/dev/null`; if ($avail{'sourcembuffer'} eq '') { - print "WARN: $mbuffercmd not available on source $s - sync will continue without source buffering.\n"; + if (!$quiet) { print "WARN: $mbuffercmd not available on source $s - sync will continue without source buffering.\n"; } $avail{'sourcembuffer'} = 0; } else { $avail{'sourcembuffer'} = 1; @@ -551,7 +708,7 @@ sub checkcommands { if ($debug) { print "DEBUG: checking availability of $mbuffercmd on target...\n"; } $avail{'targetmbuffer'} = `$targetssh $lscmd $mbuffercmd 2>/dev/null`; if ($avail{'targetmbuffer'} eq '') { - print "WARN: $mbuffercmd not available on target $t - sync will continue without target buffering.\n"; + if (!$quiet) { print "WARN: $mbuffercmd not available on target $t - sync will continue without target buffering.\n"; } $avail{'targetmbuffer'} = 0; } else { $avail{'targetmbuffer'} = 1; @@ -563,19 +720,50 @@ sub checkcommands { $avail{'localmbuffer'} = `$lscmd $mbuffercmd 2>/dev/null`; if ($avail{'localmbuffer'} eq '') { $avail{'localmbuffer'} = 0; - print "WARN: $mbuffercmd not available on local machine - sync will continue without local buffering.\n"; + if (!$quiet) { print "WARN: $mbuffercmd not available on local machine - sync will continue without local buffering.\n"; } } } if ($debug) { print "DEBUG: checking availability of $pvcmd on local machine...\n"; } $avail{'localpv'} = `$lscmd $pvcmd 2>/dev/null`; if ($avail{'localpv'} eq '') { - print "WARN: $pvcmd not available on local machine - sync will continue without progress bar.\n"; + if (!$quiet) { print "WARN: $pvcmd not available on local machine - sync will continue without progress bar.\n"; } $avail{'localpv'} = 0; } else { $avail{'localpv'} = 1; } + # check for ZFS resume feature support + if ($resume) { + my $resumechkcmd = "$zfscmd get -d 0 receive_resume_token"; + + if ($debug) { print "DEBUG: checking availability of zfs resume feature on source...\n"; } + $avail{'sourceresume'} = system("$sourcessh $resumechkcmd >/dev/null 2>&1"); + $avail{'sourceresume'} = $avail{'sourceresume'} == 0 ? 1 : 0; + + if ($debug) { print "DEBUG: checking availability of zfs resume feature on target...\n"; } + $avail{'targetresume'} = system("$targetssh $resumechkcmd >/dev/null 2>&1"); + $avail{'targetresume'} = $avail{'targetresume'} == 0 ? 1 : 0; + + if ($avail{'sourceresume'} == 0 || $avail{'targetresume'} == 0) { + # disable resume + $resume = ''; + + my @hosts = (); + if ($avail{'sourceresume'} == 0) { + push @hosts, 'source'; + } + if ($avail{'targetresume'} == 0) { + push @hosts, 'target'; + } + my $affected = join(" and ", @hosts); + print "WARN: ZFS resume feature not available on $affected machine - sync will continue without resume support.\n"; + } + } else { + $avail{'sourceresume'} = 0; + $avail{'targetresume'} = 0; + } + return %avail; } @@ -590,7 +778,7 @@ sub iszfsbusy { foreach my $process (@processes) { # if ($debug) { print "DEBUG: checking process $process...\n"; } - if ($process =~ /zfs *(receive|recv).*$fs/) { + if ($process =~ /zfs *(receive|recv).*\Q$fs\E/) { # there's already a zfs receive process for our target filesystem - return true if ($debug) { print "DEBUG: process $process matches target $fs!\n"; } return 1; @@ -603,27 +791,43 @@ sub iszfsbusy { sub setzfsvalue { my ($rhost,$fs,$isroot,$property,$value) = @_; - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + + my $fsescaped = escapeshellparam($fs); + + if ($rhost ne '') { + $rhost = "$sshcmd $rhost"; + # double escaping needed + $fsescaped = escapeshellparam($fsescaped); + } + if ($debug) { print "DEBUG: setting $property to $value on $fs...\n"; } my $mysudocmd; if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - if ($debug) { print "$rhost $mysudocmd $zfscmd set $property=$value $fs\n"; } - system("$rhost $mysudocmd $zfscmd set $property=$value $fs") == 0 - or warn "WARNING: $rhost $mysudocmd $zfscmd set $property=$value $fs died: $?, proceeding anyway.\n"; + if ($debug) { print "$rhost $mysudocmd $zfscmd set $property=$value $fsescaped\n"; } + system("$rhost $mysudocmd $zfscmd set $property=$value $fsescaped") == 0 + or warn "WARNING: $rhost $mysudocmd $zfscmd set $property=$value $fsescaped died: $?, proceeding anyway.\n"; return; } sub getzfsvalue { my ($rhost,$fs,$isroot,$property) = @_; - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + + my $fsescaped = escapeshellparam($fs); + + if ($rhost ne '') { + $rhost = "$sshcmd $rhost"; + # double escaping needed + $fsescaped = escapeshellparam($fsescaped); + } + if ($debug) { print "DEBUG: getting current value of $property on $fs...\n"; } my $mysudocmd; if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - if ($debug) { print "$rhost $mysudocmd $zfscmd get -H $property $fs\n"; } - open FH, "$rhost $mysudocmd $zfscmd get -H $property $fs |"; + if ($debug) { print "$rhost $mysudocmd $zfscmd get -H $property $fsescaped\n"; } + open FH, "$rhost $mysudocmd $zfscmd get -H $property $fsescaped |"; my $value = ; close FH; - my @values = split(/\s/,$value); + my @values = split(/\t/,$value); $value = $values[2]; return $value; } @@ -651,7 +855,7 @@ sub getoldestsnapshot { # must not have had any snapshots on source - luckily, we already made one, amirite? if (defined ($args{'no-sync-snap'}) ) { # well, actually we set --no-sync-snap, so no we *didn't* already make one. Whoops. - die "CRIT: --no-sync-snap is set, and getoldestsnapshot() could not find any snapshots on source!\n"; + warn "CRIT: --no-sync-snap is set, and getoldestsnapshot() could not find any snapshots on source!\n"; } return 0; } @@ -660,7 +864,7 @@ sub getnewestsnapshot { my $snaps = shift; foreach my $snap ( sort { $snaps{'source'}{$b}{'creation'}<=>$snaps{'source'}{$a}{'creation'} } keys %{ $snaps{'source'} }) { # return on first snap found - it's the newest - print "NEWEST SNAPSHOT: $snap\n"; + if (!$quiet) { print "NEWEST SNAPSHOT: $snap\n"; } return $snap; } # must not have had any snapshots on source - looks like we'd better create one! @@ -673,6 +877,7 @@ sub getnewestsnapshot { # we also probably need an argument to mute this WARN, for people who deliberately exclude # datasets from recursive replication this way. warn "WARN: --no-sync-snap is set, and getnewestsnapshot() could not find any snapshots on source for current dataset. Continuing.\n"; + if ($exitcode < 2) { $exitcode = 2; } } return 0; } @@ -690,9 +895,9 @@ sub buildsynccmd { $synccmd = "$sendcmd |"; # avoid confusion - accept either source-bwlimit or target-bwlimit as the bandwidth limiting option here my $bwlimit = ''; - if (defined $args{'source-bwlimit'}) { + if (length $args{'source-bwlimit'}) { $bwlimit = $args{'source-bwlimit'}; - } elsif (defined $args{'target-bwlimit'}) { + } elsif (length $args{'target-bwlimit'}) { $bwlimit = $args{'target-bwlimit'}; } @@ -701,48 +906,67 @@ sub buildsynccmd { $synccmd .= " $recvcmd"; } elsif ($sourcehost eq '') { # local source, remote target. - #$synccmd = "$sendcmd | $pvcmd | $args{'compresscmd'} | $mbuffercmd | $sshcmd $targethost '$args{'decompresscmd'} | $mbuffercmd | $recvcmd'"; + #$synccmd = "$sendcmd | $pvcmd | $compressargs{'cmd'} | $mbuffercmd | $sshcmd $targethost '$compressargs{'decomcmd'} | $mbuffercmd | $recvcmd'"; $synccmd = "$sendcmd |"; if ($avail{'localpv'} && !$quiet) { $synccmd .= " $pvcmd -s $pvsize |"; } - if ($avail{'compress'}) { $synccmd .= " $args{'compresscmd'} |"; } + if ($avail{'compress'}) { $synccmd .= " $compressargs{'cmd'} |"; } if ($avail{'sourcembuffer'}) { $synccmd .= " $mbuffercmd $args{'source-bwlimit'} $mbufferoptions |"; } - $synccmd .= " $sshcmd $targethost '"; - if ($avail{'targetmbuffer'}) { $synccmd .= " $mbuffercmd $args{'target-bwlimit'} $mbufferoptions |"; } - if ($avail{'compress'}) { $synccmd .= " $args{'decompresscmd'} |"; } - $synccmd .= " $recvcmd'"; + $synccmd .= " $sshcmd $targethost "; + + my $remotecmd = ""; + if ($avail{'targetmbuffer'}) { $remotecmd .= " $mbuffercmd $args{'target-bwlimit'} $mbufferoptions |"; } + if ($avail{'compress'}) { $remotecmd .= " $compressargs{'decomcmd'} |"; } + $remotecmd .= " $recvcmd"; + + $synccmd .= escapeshellparam($remotecmd); } elsif ($targethost eq '') { # remote source, local target. - #$synccmd = "$sshcmd $sourcehost '$sendcmd | $args{'compresscmd'} | $mbuffercmd' | $args{'decompresscmd'} | $mbuffercmd | $pvcmd | $recvcmd"; - $synccmd = "$sshcmd $sourcehost '$sendcmd"; - if ($avail{'compress'}) { $synccmd .= " | $args{'compresscmd'}"; } - if ($avail{'sourcembuffer'}) { $synccmd .= " | $mbuffercmd $args{'source-bwlimit'} $mbufferoptions"; } - $synccmd .= "' | "; + #$synccmd = "$sshcmd $sourcehost '$sendcmd | $compressargs{'cmd'} | $mbuffercmd' | $compressargs{'decomcmd'} | $mbuffercmd | $pvcmd | $recvcmd"; + + my $remotecmd = $sendcmd; + if ($avail{'compress'}) { $remotecmd .= " | $compressargs{'cmd'}"; } + if ($avail{'sourcembuffer'}) { $remotecmd .= " | $mbuffercmd $args{'source-bwlimit'} $mbufferoptions"; } + + $synccmd = "$sshcmd $sourcehost " . escapeshellparam($remotecmd); + $synccmd .= " | "; if ($avail{'targetmbuffer'}) { $synccmd .= "$mbuffercmd $args{'target-bwlimit'} $mbufferoptions | "; } - if ($avail{'compress'}) { $synccmd .= "$args{'decompresscmd'} | "; } + if ($avail{'compress'}) { $synccmd .= "$compressargs{'decomcmd'} | "; } if ($avail{'localpv'} && !$quiet) { $synccmd .= "$pvcmd -s $pvsize | "; } $synccmd .= "$recvcmd"; } else { #remote source, remote target... weird, but whatever, I'm not here to judge you. - #$synccmd = "$sshcmd $sourcehost '$sendcmd | $args{'compresscmd'} | $mbuffercmd' | $args{'decompresscmd'} | $pvcmd | $args{'compresscmd'} | $mbuffercmd | $sshcmd $targethost '$args{'decompresscmd'} | $mbuffercmd | $recvcmd'"; - $synccmd = "$sshcmd $sourcehost '$sendcmd"; - if ($avail{'compress'}) { $synccmd .= " | $args{'compresscmd'}"; } - if ($avail{'sourcembuffer'}) { $synccmd .= " | $mbuffercmd $args{'source-bwlimit'} $mbufferoptions"; } - $synccmd .= "' | "; - if ($avail{'compress'}) { $synccmd .= "$args{'decompresscmd'} | "; } + #$synccmd = "$sshcmd $sourcehost '$sendcmd | $compressargs{'cmd'} | $mbuffercmd' | $compressargs{'decomcmd'} | $pvcmd | $compressargs{'cmd'} | $mbuffercmd | $sshcmd $targethost '$compressargs{'decomcmd'} | $mbuffercmd | $recvcmd'"; + + my $remotecmd = $sendcmd; + if ($avail{'compress'}) { $remotecmd .= " | $compressargs{'cmd'}"; } + if ($avail{'sourcembuffer'}) { $remotecmd .= " | $mbuffercmd $args{'source-bwlimit'} $mbufferoptions"; } + + $synccmd = "$sshcmd $sourcehost " . escapeshellparam($remotecmd); + $synccmd .= " | "; + + if ($avail{'compress'}) { $synccmd .= "$compressargs{'decomcmd'} | "; } if ($avail{'localpv'} && !$quiet) { $synccmd .= "$pvcmd -s $pvsize | "; } - if ($avail{'compress'}) { $synccmd .= "$args{'compresscmd'} | "; } + if ($avail{'compress'}) { $synccmd .= "$compressargs{'cmd'} | "; } if ($avail{'localmbuffer'}) { $synccmd .= "$mbuffercmd $mbufferoptions | "; } - $synccmd .= "$sshcmd $targethost '"; - if ($avail{'targetmbuffer'}) { $synccmd .= "$mbuffercmd $args{'target-bwlimit'} $mbufferoptions | "; } - if ($avail{'compress'}) { $synccmd .= "$args{'decompresscmd'} | "; } - $synccmd .= "$recvcmd'"; + $synccmd .= "$sshcmd $targethost "; + + $remotecmd = ""; + if ($avail{'targetmbuffer'}) { $remotecmd .= " $mbuffercmd $args{'target-bwlimit'} $mbufferoptions |"; } + if ($avail{'compress'}) { $remotecmd .= " $compressargs{'decomcmd'} |"; } + $remotecmd .= " $recvcmd"; + + $synccmd .= escapeshellparam($remotecmd); } return $synccmd; } sub pruneoldsyncsnaps { my ($rhost,$fs,$newsyncsnap,$isroot,@snaps) = @_; + + my $fsescaped = escapeshellparam($fs); + if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + my $hostid = hostname(); my $mysudocmd; @@ -752,7 +976,7 @@ sub pruneoldsyncsnaps { # only prune snaps beginning with syncoid and our own hostname foreach my $snap(@snaps) { - if ($snap =~ /^syncoid_$hostid/) { + if ($snap =~ /^syncoid_\Q$identifier$hostid\E/) { # no matter what, we categorically refuse to # prune the new sync snap we created for this run if ($snap ne $newsyncsnap) { @@ -768,14 +992,16 @@ sub pruneoldsyncsnaps { my $prunecmd; foreach my $snap(@prunesnaps) { $counter ++; - $prunecmd .= "$mysudocmd $zfscmd destroy $fs\@$snap; "; + $prunecmd .= "$mysudocmd $zfscmd destroy $fsescaped\@$snap; "; if ($counter > $maxsnapspercmd) { $prunecmd =~ s/\; $//; - if ($rhost ne '') { $prunecmd = '"' . $prunecmd . '"'; } if ($debug) { print "DEBUG: pruning up to $maxsnapspercmd obsolete sync snapshots...\n"; } if ($debug) { print "DEBUG: $rhost $prunecmd\n"; } + if ($rhost ne '') { + $prunecmd = escapeshellparam($prunecmd); + } system("$rhost $prunecmd") == 0 - or warn "CRITICAL ERROR: $rhost $prunecmd failed: $?"; + or warn "WARNING: $rhost $prunecmd failed: $?"; $prunecmd = ''; $counter = 0; } @@ -784,9 +1010,11 @@ sub pruneoldsyncsnaps { # the loop, commit 'em now if ($counter) { $prunecmd =~ s/\; $//; - if ($rhost ne '') { $prunecmd = '"' . $prunecmd . '"'; } if ($debug) { print "DEBUG: pruning up to $maxsnapspercmd obsolete sync snapshots...\n"; } if ($debug) { print "DEBUG: $rhost $prunecmd\n"; } + if ($rhost ne '') { + $prunecmd = escapeshellparam($prunecmd); + } system("$rhost $prunecmd") == 0 or warn "WARNING: $rhost $prunecmd failed: $?"; } @@ -794,7 +1022,7 @@ sub pruneoldsyncsnaps { } sub getmatchingsnapshot { - my ($targetsize, $snaps) = shift; + my ($sourcefs, $targetfs, $targetsize, $snaps) = @_; foreach my $snap ( sort { $snaps{'source'}{$b}{'creation'}<=>$snaps{'source'}{$a}{'creation'} } keys %{ $snaps{'source'} }) { if (defined $snaps{'target'}{$snap}{'guid'}) { if ($snaps{'source'}{$snap}{'guid'} == $snaps{'target'}{$snap}{'guid'}) { @@ -804,9 +1032,10 @@ sub getmatchingsnapshot { } # if we got this far, we failed to find a matching snapshot. + if ($exitcode < 2) { $exitcode = 2; } print "\n"; - print "CRITICAL ERROR: Target exists but has no matching snapshots!\n"; + print "CRITICAL ERROR: Target $targetfs exists but has no snapshots matching with $sourcefs!\n"; print " Replication to target would require destroying existing\n"; print " target. Cowardly refusing to destroy your existing target.\n\n"; @@ -814,7 +1043,7 @@ sub getmatchingsnapshot { # zfs create targetpool/targetsnap ; syncoid sourcepool/sourcesnap targetpool/targetsnap ... if ( $targetsize < (64*1024*1024) ) { - print " NOTE: Target dataset is < 64MB used - did you mistakenly run\n"; + print " NOTE: Target $targetfs dataset is < 64MB used - did you mistakenly run\n"; print " \`zfs create $args{'target'}\` on the target? ZFS initial\n"; print " replication must be to a NON EXISTENT DATASET, which will\n"; print " then be CREATED BY the initial replication process.\n\n"; @@ -824,30 +1053,44 @@ sub getmatchingsnapshot { sub newsyncsnap { my ($rhost,$fs,$isroot) = @_; - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + my $fsescaped = escapeshellparam($fs); + if ($rhost ne '') { + $rhost = "$sshcmd $rhost"; + # double escaping needed + $fsescaped = escapeshellparam($fsescaped); + } my $mysudocmd; if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } my $hostid = hostname(); my %date = getdate(); - my $snapname = "syncoid\_$hostid\_$date{'stamp'}"; - my $snapcmd = "$rhost $mysudocmd $zfscmd snapshot $fs\@$snapname\n"; - system($snapcmd) == 0 - or die "CRITICAL ERROR: $snapcmd failed: $?"; + my $snapname = "syncoid\_$identifier$hostid\_$date{'stamp'}"; + my $snapcmd = "$rhost $mysudocmd $zfscmd snapshot $fsescaped\@$snapname\n"; + system($snapcmd) == 0 or do { + warn "CRITICAL ERROR: $snapcmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; + return $snapname; } sub targetexists { my ($rhost,$fs,$isroot) = @_; - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + my $fsescaped = escapeshellparam($fs); + if ($rhost ne '') { + $rhost = "$sshcmd $rhost"; + # double escaping needed + $fsescaped = escapeshellparam($fsescaped); + } my $mysudocmd; if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - my $checktargetcmd = "$rhost $mysudocmd $zfscmd get -H name $fs"; + my $checktargetcmd = "$rhost $mysudocmd $zfscmd get -H name $fsescaped"; if ($debug) { print "DEBUG: checking to see if target filesystem exists using \"$checktargetcmd 2>&1 |\"...\n"; } open FH, "$checktargetcmd 2>&1 |"; my $targetexists = ; close FH; my $exit = $?; - $targetexists = ( $targetexists =~ /^$fs/ && $exit == 0 ); + $targetexists = ( $targetexists =~ /^\Q$fs\E/ && $exit == 0 ); return $targetexists; } @@ -862,13 +1105,13 @@ sub getssh { if ($fs =~ /\@/) { $rhost = $fs; $fs =~ s/^\S*\@\S*://; - $rhost =~ s/:$fs$//; + $rhost =~ s/:\Q$fs\E$//; my $remoteuser = $rhost; $remoteuser =~ s/\@.*$//; if ($remoteuser eq 'root') { $isroot = 1; } else { $isroot = 0; } # now we need to establish a persistent master SSH connection $socket = "/tmp/syncoid-$remoteuser-$rhost-" . time(); - open FH, "$sshcmd -M -S $socket -o ControlPersist=1m $sshport $rhost exit |"; + open FH, "$sshcmd -M -S $socket -o ControlPersist=1m $args{'sshport'} $rhost exit |"; close FH; $rhost = "-S $socket $rhost"; } else { @@ -888,11 +1131,16 @@ sub dumphash() { sub getsnaps() { my ($type,$rhost,$fs,$isroot,%snaps) = @_; my $mysudocmd; + my $fsescaped = escapeshellparam($fs); if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } - if ($rhost ne '') { $rhost = "$sshcmd $rhost"; } + if ($rhost ne '') { + $rhost = "$sshcmd $rhost"; + # double escaping needed + $fsescaped = escapeshellparam($fsescaped); + } - my $getsnapcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 -t snapshot guid,creation $fs |"; + my $getsnapcmd = "$rhost $mysudocmd $zfscmd get -Hpd 1 -t snapshot guid,creation $fsescaped |"; if ($debug) { print "DEBUG: getting list of snapshots on $fs using $getsnapcmd...\n"; } open FH, $getsnapcmd; my @rawsnaps = ; @@ -903,24 +1151,24 @@ sub getsnaps() { foreach my $line (@rawsnaps) { # only import snap guids from the specified filesystem - if ($line =~ /$fs\@.*guid/) { + if ($line =~ /\Q$fs\E\@.*guid/) { chomp $line; my $guid = $line; - $guid =~ s/^.*\sguid\s*(\d*).*/$1/; + $guid =~ s/^.*\tguid\t*(\d*).*/$1/; my $snap = $line; - $snap =~ s/^\S*\@(\S*)\s*guid.*$/$1/; + $snap =~ s/^.*\@(.*)\tguid.*$/$1/; $snaps{$type}{$snap}{'guid'}=$guid; } } foreach my $line (@rawsnaps) { # only import snap creations from the specified filesystem - if ($line =~ /$fs\@.*creation/) { + if ($line =~ /\Q$fs\E\@.*creation/) { chomp $line; my $creation = $line; - $creation =~ s/^.*\screation\s*(\d*).*/$1/; + $creation =~ s/^.*\tcreation\t*(\d*).*/$1/; my $snap = $line; - $snap =~ s/^\S*\@(\S*)\s*creation.*$/$1/; + $snap =~ s/^.*\@(.*)\tcreation.*$/$1/; $snaps{$type}{$snap}{'creation'}=$creation; } } @@ -930,22 +1178,37 @@ sub getsnaps() { sub getsendsize { - my ($sourcehost,$snap1,$snap2,$isroot) = @_; + my ($sourcehost,$snap1,$snap2,$isroot,$receivetoken) = @_; + + my $snap1escaped = escapeshellparam($snap1); + my $snap2escaped = escapeshellparam($snap2); my $mysudocmd; if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } + my $sourcessh; + if ($sourcehost ne '') { + $sourcessh = "$sshcmd $sourcehost"; + $snap1escaped = escapeshellparam($snap1escaped); + $snap2escaped = escapeshellparam($snap2escaped); + } else { + $sourcessh = ''; + } + my $snaps; if ($snap2) { # if we got a $snap2 argument, we want an incremental send estimate from $snap1 to $snap2. - $snaps = "$args{'streamarg'} $snap1 $snap2"; + $snaps = "$args{'streamarg'} $snap1escaped $snap2escaped"; } else { # if we didn't get a $snap2 arg, we want a full send estimate for $snap1. - $snaps = "$snap1"; + $snaps = "$snap1escaped"; } - my $sourcessh; - if ($sourcehost ne '') { $sourcessh = "$sshcmd $sourcehost"; } else { $sourcessh = ''; } + # in case of a resumed receive, get the remaining + # size based on the resume token + if (defined($receivetoken)) { + $snaps = "-t $receivetoken"; + } my $getsendsizecmd = "$sourcessh $mysudocmd $zfscmd send -nP $snaps"; if ($debug) { print "DEBUG: getting estimated transfer size from source $sourcehost using \"$getsendsizecmd 2>&1 |\"...\n"; } @@ -959,9 +1222,20 @@ sub getsendsize { # size of proposed xfer in bytes, but we need to remove # human-readable crap from it my $sendsize = pop(@rawsize); - $sendsize =~ s/^size\s*//; + # the output format is different in case of + # a resumed receive + if (defined($receivetoken)) { + $sendsize =~ s/.*\t([0-9]+)$/$1/; + } else { + $sendsize =~ s/^size\t*//; + } chomp $sendsize; + # check for valid value + if ($sendsize !~ /^\d+$/) { + $sendsize = ''; + } + # to avoid confusion with a zero size pv, give sendsize # a minimum 4K value - or if empty, make sure it reads UNKNOWN if ($debug) { print "DEBUG: sendsize = $sendsize\n"; } @@ -987,3 +1261,75 @@ sub getdate { $date{'stamp'} = "$date{'year'}-$date{'mon'}-$date{'mday'}:$date{'hour'}:$date{'min'}:$date{'sec'}"; return %date; } + +sub escapeshellparam { + my ($par) = @_; + # avoid use of uninitialized string in regex + if (length($par)) { + # "escape" all single quotes + $par =~ s/'/'"'"'/g; + } else { + # avoid use of uninitialized string in concatenation below + $par = ''; + } + # single-quote entire string + return "'$par'"; +} + +sub getreceivetoken() { + my ($rhost,$fs,$isroot) = @_; + my $token = getzfsvalue($rhost,$fs,$isroot,"receive_resume_token"); + + if ($token ne '-' && $token ne '') { + return $token; + } + + if ($debug) { + print "DEBUG: no receive token found \n"; + } + + return +} + +__END__ + +=head1 NAME + +syncoid - ZFS snapshot replication tool + +=head1 SYNOPSIS + + syncoid [options]... SOURCE TARGET + or syncoid [options]... SOURCE USER@HOST:TARGET + or syncoid [options]... USER@HOST:SOURCE TARGET + or syncoid [options]... USER@HOST:SOURCE USER@HOST:TARGET + + SOURCE Source ZFS dataset. Can be either local or remote + TARGET Target ZFS dataset. Can be either local or remote + +Options: + + --compress=FORMAT Compresses data during transfer. Currently accepted options are gzip, pigz-fast, pigz-slow, zstd-fast, zstd-slow, lz4, lzo (default) & none + --identifier=EXTRA Extra identifier which is included in the snapshot name. Can be used for replicating to multiple targets. + --recursive|r Also transfers child datasets + --skip-parent Skips syncing of the parent dataset. Does nothing without '--recursive' option. + --source-bwlimit= Bandwidth limit on the source transfer + --target-bwlimit= Bandwidth limit on the target transfer + --no-stream Replicates using newest snapshot instead of intermediates + --no-sync-snap Does not create new snapshot, only transfers existing + --exclude=REGEX Exclude specific datasets which match the given regular expression. Can be specified multiple times + + --sshkey=FILE Specifies a ssh public key to use to connect + --sshport=PORT Connects to remote on a particular port + --sshcipher|c=CIPHER Passes CIPHER to ssh to use a particular cipher set + --sshoption|o=OPTION Passes OPTION to ssh for remote usage. Can be specified multiple times + + --help Prints this helptext + --version Prints the version number + --debug Prints out a lot of additional information during a syncoid run + --monitor-version Currently does nothing + --quiet Suppresses non-error output + --dumpsnaps Dumps a list of snapshots during the run + --no-command-checks Do not check command existence before attempting transfer. Not recommended + --no-resume Don't use the ZFS resume feature if available + --no-clone-handling Don't try to recreate clones on target diff --git a/tests/1_one_year/run.sh b/tests/1_one_year/run.sh new file mode 100755 index 0000000..1cae7b4 --- /dev/null +++ b/tests/1_one_year/run.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +# this test will take hourly, daily and monthly snapshots +# for the whole year of 2017 in the timezone Europe/Vienna +# sanoid is run hourly and no snapshots are pruned + +. ../common/lib.sh + +POOL_NAME="sanoid-test-1" +POOL_TARGET="" # root +RESULT="/tmp/sanoid_test_result" +RESULT_CHECKSUM="68c67161a59d0e248094a66061972f53613067c9db52ad981030f36bc081fed7" + +# UTC timestamp of start and end +START="1483225200" +END="1514761199" + +# prepare +setup +checkEnvironment +disableTimeSync + +# set timezone +ln -sf /usr/share/zoneinfo/Europe/Vienna /etc/localtime + +timestamp=$START + +mkdir -p "${POOL_TARGET}" +truncate -s 5120M "${POOL_TARGET}"/zpool.img + +zpool create -f "${POOL_NAME}" "${POOL_TARGET}"/zpool.img + +function cleanUp { + zpool export "${POOL_NAME}" +} + +# export pool in any case +trap cleanUp EXIT + +while [ $timestamp -le $END ]; do + date --utc --set @$timestamp; date; "${SANOID}" --cron --verbose + timestamp=$((timestamp+3600)) +done + +saveSnapshotList "${POOL_NAME}" "${RESULT}" + +# hourly daily monthly +verifySnapshotList "${RESULT}" 8760 365 12 "${RESULT_CHECKSUM}" diff --git a/tests/1_one_year/sanoid.conf b/tests/1_one_year/sanoid.conf new file mode 100644 index 0000000..f5692f0 --- /dev/null +++ b/tests/1_one_year/sanoid.conf @@ -0,0 +1,10 @@ +[sanoid-test-1] + use_template = production + +[template_production] + hourly = 36 + daily = 30 + monthly = 3 + yearly = 0 + autosnap = yes + autoprune = no diff --git a/tests/2_dst_handling/run.sh b/tests/2_dst_handling/run.sh new file mode 100755 index 0000000..eba21ed --- /dev/null +++ b/tests/2_dst_handling/run.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -x + +# this test will check the behaviour arround a date where DST ends +# with hourly, daily and monthly snapshots checked in a 15 minute interval + +# Daylight saving time 2017 in Europe/Vienna began at 02:00 on Sunday, 26 March +# and ended at 03:00 on Sunday, 29 October. All times are in +# Central European Time. + +. ../common/lib.sh + +POOL_NAME="sanoid-test-2" +POOL_TARGET="" # root +RESULT="/tmp/sanoid_test_result" +RESULT_CHECKSUM="a916d9cd46f4b80f285d069f3497d02671bbb1bfd12b43ef93531cbdaf89d55c" + +# UTC timestamp of start and end +START="1509141600" +END="1509400800" + +# prepare +setup +checkEnvironment +disableTimeSync + +# set timezone +ln -sf /usr/share/zoneinfo/Europe/Vienna /etc/localtime + +timestamp=$START + +mkdir -p "${POOL_TARGET}" +truncate -s 512M "${POOL_TARGET}"/zpool2.img + +zpool create -f "${POOL_NAME}" "${POOL_TARGET}"/zpool2.img + +function cleanUp { + zpool export "${POOL_NAME}" +} + +# export pool in any case +trap cleanUp EXIT + +while [ $timestamp -le $END ]; do + date --utc --set @$timestamp; date; "${SANOID}" --cron --verbose + timestamp=$((timestamp+900)) +done + +saveSnapshotList "${POOL_NAME}" "${RESULT}" + +# hourly daily monthly +verifySnapshotList "${RESULT}" 73 3 1 "${RESULT_CHECKSUM}" + +# one more hour because of DST diff --git a/tests/2_dst_handling/sanoid.conf b/tests/2_dst_handling/sanoid.conf new file mode 100644 index 0000000..7ded3f8 --- /dev/null +++ b/tests/2_dst_handling/sanoid.conf @@ -0,0 +1,10 @@ +[sanoid-test-2] + use_template = production + +[template_production] + hourly = 36 + daily = 30 + monthly = 3 + yearly = 0 + autosnap = yes + autoprune = no diff --git a/tests/common/lib.sh b/tests/common/lib.sh new file mode 100644 index 0000000..78f128b --- /dev/null +++ b/tests/common/lib.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +function setup { + export LANG=C + export LANGUAGE=C + export LC_ALL=C + + export SANOID="../../sanoid" + + # make sure that there is no cache file + rm -f /var/cache/sanoidsnapshots.txt + + # install needed sanoid configuration files + [ -f sanoid.conf ] && cp sanoid.conf /etc/sanoid/sanoid.conf + cp ../../sanoid.defaults.conf /etc/sanoid/sanoid.defaults.conf +} + +function checkEnvironment { + ASK=1 + + which systemd-detect-virt > /dev/null + if [ $? -eq 0 ]; then + systemd-detect-virt --vm > /dev/null + if [ $? -eq 0 ]; then + # we are in a vm + ASK=0 + fi + fi + + if [ $ASK -eq 1 ]; then + set +x + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "you should be running this test in a" + echo "dedicated vm, as it will mess with your system!" + echo "Are you sure you wan't to continue? (y)" + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + set -x + + read -n 1 c + if [ "$c" != "y" ]; then + exit 1 + fi + fi +} + +function disableTimeSync { + # disable ntp sync + which timedatectl > /dev/null + if [ $? -eq 0 ]; then + timedatectl set-ntp 0 + fi +} + +function saveSnapshotList { + POOL_NAME="$1" + RESULT="$2" + + zfs list -t snapshot -o name -Hr "${POOL_NAME}" | sort > "${RESULT}" + + # clear the seconds for comparing + sed -i 's/\(autosnap_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]_[0-9][0-9]:[0-9][0-9]:\)[0-9][0-9]_/\100_/g' "${RESULT}" +} + +function verifySnapshotList { + RESULT="$1" + HOURLY_COUNT=$2 + DAILY_COUNT=$3 + MONTHLY_COUNT=$4 + CHECKSUM="$5" + + failed=0 + message="" + + hourly_count=$(grep -c "autosnap_.*_hourly" < "${RESULT}") + daily_count=$(grep -c "autosnap_.*_daily" < "${RESULT}") + monthly_count=$(grep -c "autosnap_.*_monthly" < "${RESULT}") + + if [ "${hourly_count}" -ne "${HOURLY_COUNT}" ]; then + failed=1 + message="${message}hourly snapshot count is wrong: ${hourly_count}\n" + fi + + if [ "${daily_count}" -ne "${DAILY_COUNT}" ]; then + failed=1 + message="${message}daily snapshot count is wrong: ${daily_count}\n" + fi + + if [ "${monthly_count}" -ne "${MONTHLY_COUNT}" ]; then + failed=1 + message="${message}monthly snapshot count is wrong: ${monthly_count}\n" + fi + + checksum=$(sha256sum "${RESULT}" | cut -d' ' -f1) + if [ "${checksum}" != "${CHECKSUM}" ]; then + failed=1 + message="${message}result checksum mismatch\n" + fi + + if [ "${failed}" -eq 0 ]; then + exit 0 + fi + + echo "TEST FAILED:" >&2 + echo -n -e "${message}" >&2 + + exit 1 +} diff --git a/tests/run-tests.sh b/tests/run-tests.sh new file mode 100755 index 0000000..a8469e9 --- /dev/null +++ b/tests/run-tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# run's all the available tests + +for test in */; do + if [ ! -x "${test}/run.sh" ]; then + continue + fi + + testName="${test%/}" + + LOGFILE=/tmp/sanoid_test_run_"${testName}".log + + pushd . > /dev/null + + echo -n "Running test ${testName} ... " + cd "${test}" + echo | bash run.sh > "${LOGFILE}" 2>&1 + + if [ $? -eq 0 ]; then + echo "[PASS]" + else + echo "[FAILED] (see ${LOGFILE})" + fi + + popd > /dev/null +done