From 4a3e93372c3502a7a4d9e2202d147ab00fd23559 Mon Sep 17 00:00:00 2001 From: Jason Lewis Date: Mon, 20 Nov 2017 15:16:43 +1100 Subject: [PATCH 01/29] check for emtpy lockfile --- sanoid | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sanoid b/sanoid index d6e58ce..889696f 100755 --- a/sanoid +++ b/sanoid @@ -930,13 +930,22 @@ sub checklock { # no lockfile return 1; } + # make sure lockfile contains something + if ( -z $lockfile) { + # zero size lockfile, something is wrong + die "ERROR: something is wrong! $lockfile is empty\n"; + } # lockfile exists. read pid and mutex from it. see if it's our pid. if not, see if # there's still a process running with that pid and with the same mutex. - open FH, "< $lockfile"; + open FH, "< $lockfile" or die "ERROR: unable to open $lockfile"; my @lock = ; close FH; + # if we didn't get exactly 2 items from the lock file there is a problem + if (scalar(@lock) != 2) { + die "ERROR: $lockfile is invalid.\n" + } my $lockmutex = pop(@lock); my $lockpid = pop(@lock); @@ -948,7 +957,6 @@ sub checklock { # we own the lockfile. no need to check any further. return 2; } - open PL, "$pscmd -p $lockpid -o args= |"; my @processlist = ; close PL; From 31da53140fda7c5bfc79d063e3c6da8d8f49300c Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 19:00:44 +0100 Subject: [PATCH 02/29] implemented a simple test which will take snapshots over a whole year and checks the resulting snapshot list --- tests/1_one_year/run.sh | 55 ++++++++++++++++++ tests/1_one_year/sanoid.conf | 10 ++++ tests/common/lib.sh | 106 +++++++++++++++++++++++++++++++++++ tests/run-tests.sh | 27 +++++++++ 4 files changed, 198 insertions(+) create mode 100755 tests/1_one_year/run.sh create mode 100644 tests/1_one_year/sanoid.conf create mode 100644 tests/common/lib.sh create mode 100755 tests/run-tests.sh diff --git a/tests/1_one_year/run.sh b/tests/1_one_year/run.sh new file mode 100755 index 0000000..7cec813 --- /dev/null +++ b/tests/1_one_year/run.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -x + +# this test will take hourly, daily and monthly snapshots +# for the whole year of 2017 in the timezone Europe/Vienna +# sanoid is run hourly and no snapshots are pruned + +. ../common/lib.sh + +POOL_NAME="sanoid-test-1" +POOL_TARGET="" # root +RESULT="/tmp/sanoid_test_result" +RESULT_CHECKSUM="aa15e5595b0ed959313289ecb70323dad9903328ac46e881da5c4b0f871dd7cf" + +# UTC timestamp of start and end +START="1483225200" +END="1514761199" + +# prepare +setup +checkEnvironment +disableTimeSync + +# set timezone +ln -sf /usr/share/zoneinfo/Europe/Vienna /etc/localtime + +timestamp=$START + +mkdir -p "${POOL_TARGET}" +truncate -s 5120M "${POOL_TARGET}"/zpool.img + +zpool create -f "${POOL_NAME}" "${POOL_TARGET}"/zpool.img + +function cleanUp { + zpool export "${POOL_NAME}" +} + +# export pool in any case +trap cleanUp EXIT + +while [ $timestamp -le $END ]; do + date --utc --set @$timestamp; date; "${SANOID}" --cron --verbose + timestamp=$((timestamp+3600)) +done + +saveSnapshotList "${POOL_NAME}" "${RESULT}" + +# hourly daily monthly +verifySnapshotList "${RESULT}" 8759 366 12 "${RESULT_CHECKSUM}" + +# hourly count should be 8760 but one hour get's lost because of DST + +# daily count should be 365 but one additional daily is taken +# because the DST change leads to a day with 25 hours +# which will trigger an additional daily snapshot diff --git a/tests/1_one_year/sanoid.conf b/tests/1_one_year/sanoid.conf new file mode 100644 index 0000000..f5692f0 --- /dev/null +++ b/tests/1_one_year/sanoid.conf @@ -0,0 +1,10 @@ +[sanoid-test-1] + use_template = production + +[template_production] + hourly = 36 + daily = 30 + monthly = 3 + yearly = 0 + autosnap = yes + autoprune = no diff --git a/tests/common/lib.sh b/tests/common/lib.sh new file mode 100644 index 0000000..2c15e9b --- /dev/null +++ b/tests/common/lib.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +function setup { + export LANG=C + export LANGUAGE=C + export LC_ALL=C + + export SANOID="../../sanoid" + + # make sure that there is no cache file + rm -f /var/cache/sanoidsnapshots.txt + + # install needed sanoid configuration files + [ -f sanoid.conf ] && cp sanoid.conf /etc/sanoid/sanoid.conf + cp ../../sanoid.defaults.conf /etc/sanoid/sanoid.defaults.conf +} + +function checkEnvironment { + ASK=1 + + which systemd-detect-virt > /dev/null + if [ $? -eq 0 ]; then + systemd-detect-virt --vm > /dev/null + if [ $? -eq 0 ]; then + # we are in a vm + ASK=0 + fi + fi + + if [ $ASK -eq 1 ]; then + set +x + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "you should be running this test in a" + echo "dedicated vm, as it will mess with your system!" + echo "Are you sure you wan't to continue? (y)" + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + set -x + + read -n 1 c + if [ "$c" != "y" ]; then + exit 1 + fi + fi +} + +function disableTimeSync { + # disable ntp sync + which timedatectl > /dev/null + if [ $? -eq 0 ]; then + timedatectl set-ntp 0 + fi +} + +function saveSnapshotList { + POOL_NAME="$1" + RESULT="$2" + + zfs list -t snapshot -o name -Hr "${POOL_NAME}" | sort > "${RESULT}" + + # clear the seconds for comparing + sed -i 's/\(autosnap_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]_[0-9][0-9]:[0-9][0-9]:\)[0-9][0-9]_/\100_/g' "${RESULT}" +} + +function verifySnapshotList { + RESULT="$1" + HOURLY_COUNT=$2 + DAILY_COUNT=$3 + MONTHLY_COUNT=$4 + CHECKSUM="$5" + + failed=0 + message="" + + hourly_count=$(grep -c "autosnap_.*_hourly" < "${RESULT}") + daily_count=$(grep -c "autosnap_.*_daily" < "${RESULT}") + monthly_count=$(grep -c "autosnap_.*_monthly" < "${RESULT}") + + if [ "${hourly_count}" -ne "${HOURLY_COUNT}" ]; then + failed=1 + message="${message}hourly snapshot count is wrong: ${hourly_count}\n" + fi + + if [ "${daily_count}" -ne "${DAILY_COUNT}" ]; then + failed=1 + message="${message}daily snapshot count is wrong: ${daily_count}\n" + fi + + if [ "${monthly_count}" -ne "${MONTHLY_COUNT}" ]; then + failed=1 + message="${message}monthly snapshot count is wrong: ${monthly_count}\n" + fi + + checksum=$(sha256sum "${RESULT}" | cut -d' ' -f1) + if [ "${checksum}" != "${CHECKSUM}" ]; then + failed=1 + message="${message}result checksum mismatch\n" + fi + + if [ "${failed}" -eq 0 ]; then + exit 0 + fi + + echo "TEST FAILED:" >&2 + echo -n -e "${message}" >&2 + +} diff --git a/tests/run-tests.sh b/tests/run-tests.sh new file mode 100755 index 0000000..a8469e9 --- /dev/null +++ b/tests/run-tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# run's all the available tests + +for test in */; do + if [ ! -x "${test}/run.sh" ]; then + continue + fi + + testName="${test%/}" + + LOGFILE=/tmp/sanoid_test_run_"${testName}".log + + pushd . > /dev/null + + echo -n "Running test ${testName} ... " + cd "${test}" + echo | bash run.sh > "${LOGFILE}" 2>&1 + + if [ $? -eq 0 ]; then + echo "[PASS]" + else + echo "[FAILED] (see ${LOGFILE})" + fi + + popd > /dev/null +done From 9a6cdb85438eb730348a35e23fd4fdf7b41b60f3 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 20:15:29 +0100 Subject: [PATCH 03/29] handle DST (daylight saving times) properly for hourly and daily snapshots, fixes #155 --- sanoid | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/sanoid b/sanoid index d6e58ce..030fea2 100755 --- a/sanoid +++ b/sanoid @@ -268,6 +268,19 @@ sub take_snapshots { my @newsnaps; + # get utc timestamp of the current day for DST check + my $daystartUtc = timelocal(0, 0, 0, $datestamp{'mday'}, ($datestamp{'mon'}-1), $datestamp{'year'}); + my ($isdst) = (localtime($daystartUtc))[8]; + my $dstOffset = 0; + + if ($isdst ne $datestamp{'isdst'}) { + # current dst is different then at the beginning og the day + if ($isdst) { + # DST ended in the current day + $dstOffset = 60*60; + } + } + if ($args{'verbose'}) { print "INFO: taking snapshots...\n"; } foreach my $section (keys %config) { if ($section =~ /^template/) { next; } @@ -291,6 +304,9 @@ sub take_snapshots { my @preferredtime; my $lastpreferred; + # to avoid duplicates with DST + my $dateSuffix = ""; + if ($type eq 'hourly') { push @preferredtime,0; # try to hit 0 seconds push @preferredtime,$config{$section}{'hourly_min'}; @@ -299,6 +315,13 @@ sub take_snapshots { push @preferredtime,($datestamp{'mon'}-1); # january is month 0 push @preferredtime,$datestamp{'year'}; $lastpreferred = timelocal(@preferredtime); + + if ($dstOffset ne 0) { + # timelocal doesn't take DST into account + $lastpreferred += $dstOffset; + # DST ended, avoid duplicates + $dateSuffix = "_y"; + } if ($lastpreferred > time()) { $lastpreferred -= 60*60; } # preferred time is later this hour - so look at last hour's } elsif ($type eq 'daily') { push @preferredtime,0; # try to hit 0 seconds @@ -308,7 +331,29 @@ sub take_snapshots { push @preferredtime,($datestamp{'mon'}-1); # january is month 0 push @preferredtime,$datestamp{'year'}; $lastpreferred = timelocal(@preferredtime); - if ($lastpreferred > time()) { $lastpreferred -= 60*60*24; } # preferred time is later today - so look at yesterday's + + # timelocal doesn't take DST into account + $lastpreferred += $dstOffset; + + # check if the planned time has different DST flag than the current + my ($isdst) = (localtime($lastpreferred))[8]; + if ($isdst ne $datestamp{'isdst'}) { + if (!$isdst) { + # correct DST difference + $lastpreferred -= 60*60; + } + } + + if ($lastpreferred > time()) { + $lastpreferred -= 60*60*24; + + if ($dstOffset ne 0) { + # because we are going back one day + # the DST difference has to be accounted + # for in reverse now + $lastpreferred -= 2*$dstOffset; + } + } # preferred time is later today - so look at yesterday's } elsif ($type eq 'monthly') { push @preferredtime,0; # try to hit 0 seconds push @preferredtime,$config{$section}{'monthly_min'}; @@ -336,7 +381,7 @@ sub take_snapshots { # update to most current possible datestamp %datestamp = get_date(); # print "we should have had a $type snapshot of $path $maxage seconds ago; most recent is $newestage seconds old.\n"; - push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_$type"); + push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_${dateSuffix}_$type"); } } } From c1f7cd4241cefcafc1951b1e005b4c0a566ac062 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 20:23:52 +0100 Subject: [PATCH 04/29] fixed snapshot suffix --- sanoid | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sanoid b/sanoid index 030fea2..1c470c3 100755 --- a/sanoid +++ b/sanoid @@ -381,7 +381,7 @@ sub take_snapshots { # update to most current possible datestamp %datestamp = get_date(); # print "we should have had a $type snapshot of $path $maxage seconds ago; most recent is $newestage seconds old.\n"; - push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_${dateSuffix}_$type"); + push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}${dateSuffix}_$type"); } } } From e61ccf1c9dcccb1e156ed684413e0c0a5671664a Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 20:24:54 +0100 Subject: [PATCH 05/29] added test for checking for correct DST handling behaviour --- tests/2_dst_handling/run.sh | 54 ++++++++++++++++++++++++++++++++ tests/2_dst_handling/sanoid.conf | 10 ++++++ 2 files changed, 64 insertions(+) create mode 100755 tests/2_dst_handling/run.sh create mode 100644 tests/2_dst_handling/sanoid.conf diff --git a/tests/2_dst_handling/run.sh b/tests/2_dst_handling/run.sh new file mode 100755 index 0000000..eba21ed --- /dev/null +++ b/tests/2_dst_handling/run.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -x + +# this test will check the behaviour arround a date where DST ends +# with hourly, daily and monthly snapshots checked in a 15 minute interval + +# Daylight saving time 2017 in Europe/Vienna began at 02:00 on Sunday, 26 March +# and ended at 03:00 on Sunday, 29 October. All times are in +# Central European Time. + +. ../common/lib.sh + +POOL_NAME="sanoid-test-2" +POOL_TARGET="" # root +RESULT="/tmp/sanoid_test_result" +RESULT_CHECKSUM="a916d9cd46f4b80f285d069f3497d02671bbb1bfd12b43ef93531cbdaf89d55c" + +# UTC timestamp of start and end +START="1509141600" +END="1509400800" + +# prepare +setup +checkEnvironment +disableTimeSync + +# set timezone +ln -sf /usr/share/zoneinfo/Europe/Vienna /etc/localtime + +timestamp=$START + +mkdir -p "${POOL_TARGET}" +truncate -s 512M "${POOL_TARGET}"/zpool2.img + +zpool create -f "${POOL_NAME}" "${POOL_TARGET}"/zpool2.img + +function cleanUp { + zpool export "${POOL_NAME}" +} + +# export pool in any case +trap cleanUp EXIT + +while [ $timestamp -le $END ]; do + date --utc --set @$timestamp; date; "${SANOID}" --cron --verbose + timestamp=$((timestamp+900)) +done + +saveSnapshotList "${POOL_NAME}" "${RESULT}" + +# hourly daily monthly +verifySnapshotList "${RESULT}" 73 3 1 "${RESULT_CHECKSUM}" + +# one more hour because of DST diff --git a/tests/2_dst_handling/sanoid.conf b/tests/2_dst_handling/sanoid.conf new file mode 100644 index 0000000..7ded3f8 --- /dev/null +++ b/tests/2_dst_handling/sanoid.conf @@ -0,0 +1,10 @@ +[sanoid-test-2] + use_template = production + +[template_production] + hourly = 36 + daily = 30 + monthly = 3 + yearly = 0 + autosnap = yes + autoprune = no From 8d4484a2d1641789c1e667c03e69773e96cde6ea Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 23:24:34 +0100 Subject: [PATCH 06/29] exit with error code upon failure --- debian/changelog | 4 ++++ tests/common/lib.sh | 1 + 2 files changed, 5 insertions(+) diff --git a/debian/changelog b/debian/changelog index ab530b0..beb6584 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,7 @@ +sanoid (1.4.17-SNAPSHOT) unstable; urgency=medium + +-- Jim Salter Wed, 9 Aug 2017 12:28:49 -0400 + sanoid (1.4.16) unstable; urgency=medium * merged @hrast01's extended fix to support -o option1=val,option2=val passthrough to SSH. merged @JakobR's diff --git a/tests/common/lib.sh b/tests/common/lib.sh index 2c15e9b..78f128b 100644 --- a/tests/common/lib.sh +++ b/tests/common/lib.sh @@ -103,4 +103,5 @@ function verifySnapshotList { echo "TEST FAILED:" >&2 echo -n -e "${message}" >&2 + exit 1 } From 53894b2855016e6398609995ee383c173fc2ff99 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 23:36:13 +0100 Subject: [PATCH 07/29] indentation fix --- sanoid | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sanoid b/sanoid index 1c470c3..12d746d 100755 --- a/sanoid +++ b/sanoid @@ -317,7 +317,7 @@ sub take_snapshots { $lastpreferred = timelocal(@preferredtime); if ($dstOffset ne 0) { - # timelocal doesn't take DST into account + # timelocal doesn't take DST into account $lastpreferred += $dstOffset; # DST ended, avoid duplicates $dateSuffix = "_y"; From ceb1397ef084941b92bc2348d85b39020cac49a7 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 23:40:23 +0100 Subject: [PATCH 08/29] Revert "exit with error code upon failure" This reverts commit 8d4484a2d1641789c1e667c03e69773e96cde6ea. --- debian/changelog | 4 ---- tests/common/lib.sh | 1 - 2 files changed, 5 deletions(-) diff --git a/debian/changelog b/debian/changelog index beb6584..ab530b0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,3 @@ -sanoid (1.4.17-SNAPSHOT) unstable; urgency=medium - --- Jim Salter Wed, 9 Aug 2017 12:28:49 -0400 - sanoid (1.4.16) unstable; urgency=medium * merged @hrast01's extended fix to support -o option1=val,option2=val passthrough to SSH. merged @JakobR's diff --git a/tests/common/lib.sh b/tests/common/lib.sh index 78f128b..2c15e9b 100644 --- a/tests/common/lib.sh +++ b/tests/common/lib.sh @@ -103,5 +103,4 @@ function verifySnapshotList { echo "TEST FAILED:" >&2 echo -n -e "${message}" >&2 - exit 1 } From 371f8ff318ad749bcb9374b8acc723ac77da841d Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 6 Dec 2017 23:41:15 +0100 Subject: [PATCH 09/29] exit with error code upon failure --- tests/common/lib.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/common/lib.sh b/tests/common/lib.sh index 2c15e9b..78f128b 100644 --- a/tests/common/lib.sh +++ b/tests/common/lib.sh @@ -103,4 +103,5 @@ function verifySnapshotList { echo "TEST FAILED:" >&2 echo -n -e "${message}" >&2 + exit 1 } From 742db32e686520dc22ea11a8f831ed15618fdf9c Mon Sep 17 00:00:00 2001 From: Martin van Wingerden Date: Fri, 29 Dec 2017 20:02:01 +0100 Subject: [PATCH 10/29] Made two INFO prints mutable Added a quiet check for two non-controllable INFO prints Fixes: #182 Signed-off-by: Martin van Wingerden --- sanoid | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sanoid b/sanoid index d6e58ce..b995924 100755 --- a/sanoid +++ b/sanoid @@ -235,7 +235,7 @@ sub prune_snapshots { foreach my $snap( @prunesnaps ){ if ($args{'verbose'}) { print "INFO: pruning $snap ... \n"; } if (iszfsbusy($path)) { - print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; + if ($args{'verbose'}) { print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; } } else { if (! $args{'readonly'}) { system($zfs, "destroy",$snap) == 0 or warn "could not remove $snap : $?"; } } @@ -244,7 +244,7 @@ sub prune_snapshots { $forcecacheupdate = 1; %snaps = getsnaps(%config,$cacheTTL,$forcecacheupdate); } else { - print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; + if ($args{'verbose'}) { print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; } } } } From 1f64c9c35aac5d45c433833af56bba4126a0bcbd Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 20 Feb 2018 18:16:35 +0100 Subject: [PATCH 11/29] let monitor-health check the capacity too --- sanoid | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sanoid b/sanoid index d6e58ce..6b1257e 100755 --- a/sanoid +++ b/sanoid @@ -798,6 +798,17 @@ sub check_zpool() { ## determine health of zpool and subsequent error status if ($health eq "ONLINE" ) { $state = "OK"; + + # check capacity + my $capn = $cap; + $capn =~ s/\D//g; + + if ($capn >= 80) { + $state = "WARNING"; + } + if ($capn >= 95) { + $state = "CRITICAL"; + } } else { if ($health eq "DEGRADED") { $state = "WARNING"; From 06d029db684f49a577a4d912f82c4ea0462e137e Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Thu, 22 Feb 2018 16:53:30 +0100 Subject: [PATCH 12/29] remove destroyed snapshots from cache file instead of regenerating the whole thing (which can take very long on systems with many snapshots and/or datasets) --- sanoid | 47 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/sanoid b/sanoid index d6e58ce..238f955 100755 --- a/sanoid +++ b/sanoid @@ -39,6 +39,7 @@ my %config = init($conf_file,$default_conf_file); # if we call getsnaps(%config,1) it will forcibly update the cache, TTL or no TTL my $forcecacheupdate = 0; +my $cache = '/var/cache/sanoidsnapshots.txt'; my $cacheTTL = 900; # 15 minutes my %snaps = getsnaps( \%config, $cacheTTL, $forcecacheupdate ); @@ -232,17 +233,23 @@ sub prune_snapshots { # print "found some snaps to prune!\n" if (checklock('sanoid_pruning')) { writelock('sanoid_pruning'); + my @pruned; foreach my $snap( @prunesnaps ){ if ($args{'verbose'}) { print "INFO: pruning $snap ... \n"; } if (iszfsbusy($path)) { print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; } else { - if (! $args{'readonly'}) { system($zfs, "destroy",$snap) == 0 or warn "could not remove $snap : $?"; } + if (! $args{'readonly'}) { + if (system($zfs, "destroy", $snap) == 0) { + push(@pruned, $snap); + } else { + warn "could not remove $snap : $?"; + } + } } } removelock('sanoid_pruning'); - $forcecacheupdate = 1; - %snaps = getsnaps(%config,$cacheTTL,$forcecacheupdate); + removecachedsnapshots(@pruned); } else { print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; } @@ -484,7 +491,6 @@ sub getsnaps { my ($config, $cacheTTL, $forcecacheupdate) = @_; - my $cache = '/var/cache/sanoidsnapshots.txt'; my @rawsnaps; my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($cache); @@ -1056,6 +1062,39 @@ sub getchilddatasets { return @children; } +#######################################################################################################################3 +#######################################################################################################################3 +#######################################################################################################################3 + +sub removecachedsnapshots { + my @prunedlist = shift; + my %pruned = map { $_ => 1 } @prunedlist; + + if (checklock('sanoid_cacheupdate')) { + writelock('sanoid_cacheupdate'); + + if ($args{'verbose'}) { + print "INFO: removing destroyed snapshots from cache.\n"; + } + open FH, "< $cache"; + my @rawsnaps = ; + close FH; + + open FH, "> $cache" or die 'Could not write to $cache!\n'; + foreach my $snapline ( @rawsnaps ) { + my @columns = split("\t", $snapline); + my $snap = $columns[0]; + print FH $snapline unless ( exists($pruned{$snap}) ); + } + close FH; + + removelock('sanoid_cacheupdate'); + %snaps = getsnaps(\%config,$cacheTTL,$forcecacheupdate); + } else { + if ($args{'verbose'}) { print "WARN: skipping cache update (snapshot removal) - valid cache update lock held by another sanoid process.\n"; } + } +} + __END__ =head1 NAME From d0f1445784c4e54470b72588d9d8954d0e2bf258 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Thu, 22 Feb 2018 17:29:58 +0100 Subject: [PATCH 13/29] defer cache updates after snapshot pruning and do them after all pruning is done (waiting for the cache update lock if necessary) --- sanoid | 74 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/sanoid b/sanoid index 238f955..e73b4a4 100755 --- a/sanoid +++ b/sanoid @@ -42,6 +42,7 @@ my $forcecacheupdate = 0; my $cache = '/var/cache/sanoidsnapshots.txt'; my $cacheTTL = 900; # 15 minutes my %snaps = getsnaps( \%config, $cacheTTL, $forcecacheupdate ); +my %pruned; my %snapsbytype = getsnapsbytype( \%config, \%snaps ); @@ -233,7 +234,6 @@ sub prune_snapshots { # print "found some snaps to prune!\n" if (checklock('sanoid_pruning')) { writelock('sanoid_pruning'); - my @pruned; foreach my $snap( @prunesnaps ){ if ($args{'verbose'}) { print "INFO: pruning $snap ... \n"; } if (iszfsbusy($path)) { @@ -241,7 +241,7 @@ sub prune_snapshots { } else { if (! $args{'readonly'}) { if (system($zfs, "destroy", $snap) == 0) { - push(@pruned, $snap); + $pruned{$snap} = 1; } else { warn "could not remove $snap : $?"; } @@ -249,7 +249,7 @@ sub prune_snapshots { } } removelock('sanoid_pruning'); - removecachedsnapshots(@pruned); + removecachedsnapshots(0); } else { print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; } @@ -258,7 +258,9 @@ sub prune_snapshots { } } - + # if there were any deferred cache updates, + # do them now and wait if necessary + removecachedsnapshots(1); } # end prune_snapshots @@ -1067,32 +1069,48 @@ sub getchilddatasets { #######################################################################################################################3 sub removecachedsnapshots { - my @prunedlist = shift; - my %pruned = map { $_ => 1 } @prunedlist; + my $wait = shift; - if (checklock('sanoid_cacheupdate')) { - writelock('sanoid_cacheupdate'); - - if ($args{'verbose'}) { - print "INFO: removing destroyed snapshots from cache.\n"; - } - open FH, "< $cache"; - my @rawsnaps = ; - close FH; - - open FH, "> $cache" or die 'Could not write to $cache!\n'; - foreach my $snapline ( @rawsnaps ) { - my @columns = split("\t", $snapline); - my $snap = $columns[0]; - print FH $snapline unless ( exists($pruned{$snap}) ); - } - close FH; - - removelock('sanoid_cacheupdate'); - %snaps = getsnaps(\%config,$cacheTTL,$forcecacheupdate); - } else { - if ($args{'verbose'}) { print "WARN: skipping cache update (snapshot removal) - valid cache update lock held by another sanoid process.\n"; } + if (not %pruned) { + return; } + + my $unlocked = checklock('sanoid_cacheupdate'); + + if ($wait != 1 && not $unlocked) { + if ($args{'verbose'}) { print "INFO: deferring cache update (snapshot removal) - valid cache update lock held by another sanoid process.\n"; } + return; + } + + # wait until we can get a lock to do our cache changes + while (not $unlocked) { + if ($args{'verbose'}) { print "INFO: waiting for cache update lock held by another sanoid process.\n"; } + sleep(10); + $unlocked = checklock('sanoid_cacheupdate'); + } + + writelock('sanoid_cacheupdate'); + + if ($args{'verbose'}) { + print "INFO: removing destroyed snapshots from cache.\n"; + } + open FH, "< $cache"; + my @rawsnaps = ; + close FH; + + open FH, "> $cache" or die 'Could not write to $cache!\n'; + foreach my $snapline ( @rawsnaps ) { + my @columns = split("\t", $snapline); + my $snap = $columns[0]; + print FH $snapline unless ( exists($pruned{$snap}) ); + } + close FH; + + removelock('sanoid_cacheupdate'); + %snaps = getsnaps(\%config,$cacheTTL,$forcecacheupdate); + + # clear hash + undef %pruned; } __END__ From 6c695f1a86274d8c7763833076b3b17e3a415b1b Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Mon, 26 Feb 2018 18:16:06 +0100 Subject: [PATCH 14/29] allow monitor-health to optionally check zpool capacity too by providing limits along the flag --- README.md | 2 +- sanoid | 89 ++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 66 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 9c4e6ba..6239b79 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da + --monitor-health - This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. + This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. Optionally it can check capacity limits too by appending them like '=80,95" (>= 80% warning, >=95% critical). + --force-update diff --git a/sanoid b/sanoid index 6b1257e..9dd6198 100755 --- a/sanoid +++ b/sanoid @@ -17,7 +17,7 @@ use Time::Local; # to parse dates in reverse my %args = ("configdir" => "/etc/sanoid"); GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", - "monitor-health", "force-update", "configdir=s", + "monitor-health:s", "force-update", "configdir=s", "monitor-snapshots", "take-snapshots", "prune-snapshots" ) or pod2usage(2); @@ -51,7 +51,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath ); if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } -if ($args{'monitor-health'}) { monitor_health(@params); } +if (defined($args{'monitor-health'})) { monitor_health(@params); } if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } if ($args{'cron'}) { @@ -76,13 +76,32 @@ sub monitor_health { my @messages; my $errlevel=0; + my %capacitylimits; + + # if provided, parse capacity limits + if ($args{'monitor-health'} ne "") { + my @values = split(',', $args{'monitor-health'}); + + if (!check_capacity_limit($values[0])) { + die "ERROR: invalid zpool capacity warning limit!\n"; + } + $capacitylimits{"warn"} = $values[0]; + + if (scalar @values > 1) { + if (!check_capacity_limit($values[1])) { + die "ERROR: invalid zpool capacity critical limit!\n"; + } + $capacitylimits{"crit"} = $values[1]; + } + } + foreach my $path (keys %{ $snapsbypath}) { my @pool = split ('/',$path); $pools{$pool[0]}=1; } foreach my $pool (keys %pools) { - my ($exitcode, $msg) = check_zpool($pool,2); + my ($exitcode, $msg) = check_zpool($pool,2,\%capacitylimits); if ($exitcode > $errlevel) { $errlevel = $exitcode; } chomp $msg; push (@messages, $msg); @@ -748,6 +767,8 @@ sub check_zpool() { my $pool=shift; my $verbose=shift; + my $capacitylimitsref=shift; + my %capacitylimits=%$capacitylimitsref; my $size=""; my $used=""; @@ -799,15 +820,21 @@ sub check_zpool() { if ($health eq "ONLINE" ) { $state = "OK"; - # check capacity - my $capn = $cap; - $capn =~ s/\D//g; + if (%capacitylimits) { + # check capacity + my $capn = $cap; + $capn =~ s/\D//g; - if ($capn >= 80) { - $state = "WARNING"; - } - if ($capn >= 95) { - $state = "CRITICAL"; + if ($capacitylimits{"warn"}) { + if ($capn >= $capacitylimits{"warn"}) { + $state = "WARNING"; + } + } + if ($capacitylimits{"crit"}) { + if ($capn >= $capacitylimits{"crit"}) { + $state = "CRITICAL"; + } + } } } else { if ($health eq "DEGRADED") { @@ -911,6 +938,20 @@ sub check_zpool() { return ($ERRORS{$state},$msg); } # end check_zpool() +sub check_capacity_limit() { + my $value = shift; + + if ($value !~ /^\d+\z/) { + return undef; + } + + if ($value < 1 || $value > 100) { + return undef; + } + + return 1 +} + ###################################################################################################### ###################################################################################################### ###################################################################################################### @@ -1081,19 +1122,19 @@ Assumes --cron --verbose if no other arguments (other than configdir) are specif Options: - --configdir=DIR Specify a directory to find config file sanoid.conf + --configdir=DIR Specify a directory to find config file sanoid.conf - --cron Creates snapshots and purges expired snapshots - --verbose Prints out additional information during a sanoid run - --readonly Simulates creation/deletion of snapshots - --quiet Suppresses non-error output - --force-update Clears out sanoid's zfs snapshot cache + --cron Creates snapshots and purges expired snapshots + --verbose Prints out additional information during a sanoid run + --readonly Simulates creation/deletion of snapshots + --quiet Suppresses non-error output + --force-update Clears out sanoid's zfs snapshot cache - --monitor-health Reports on zpool "health", in a Nagios compatible format - --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format - --take-snapshots Creates snapshots as specified in sanoid.conf - --prune-snapshots Purges expired snapshots as specified in sanoid.conf + --monitor-health[=wlimit[,climit]] Reports on zpool "health", in a Nagios compatible format + --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format + --take-snapshots Creates snapshots as specified in sanoid.conf + --prune-snapshots Purges expired snapshots as specified in sanoid.conf - --help Prints this helptext - --version Prints the version number - --debug Prints out a lot of additional information during a sanoid run + --help Prints this helptext + --version Prints the version number + --debug Prints out a lot of additional information during a sanoid run From 6f29bed441aa0c4db015271ceb3b24931286cc3b Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 27 Feb 2018 17:53:04 +0100 Subject: [PATCH 15/29] Revert "allow monitor-health to optionally check zpool capacity too by providing limits along the flag" This reverts commit 6c695f1a86274d8c7763833076b3b17e3a415b1b. --- README.md | 2 +- sanoid | 89 +++++++++++++++---------------------------------------- 2 files changed, 25 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 6239b79..9c4e6ba 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da + --monitor-health - This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. Optionally it can check capacity limits too by appending them like '=80,95" (>= 80% warning, >=95% critical). + This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. + --force-update diff --git a/sanoid b/sanoid index 9dd6198..6b1257e 100755 --- a/sanoid +++ b/sanoid @@ -17,7 +17,7 @@ use Time::Local; # to parse dates in reverse my %args = ("configdir" => "/etc/sanoid"); GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", - "monitor-health:s", "force-update", "configdir=s", + "monitor-health", "force-update", "configdir=s", "monitor-snapshots", "take-snapshots", "prune-snapshots" ) or pod2usage(2); @@ -51,7 +51,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath ); if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } -if (defined($args{'monitor-health'})) { monitor_health(@params); } +if ($args{'monitor-health'}) { monitor_health(@params); } if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } if ($args{'cron'}) { @@ -76,32 +76,13 @@ sub monitor_health { my @messages; my $errlevel=0; - my %capacitylimits; - - # if provided, parse capacity limits - if ($args{'monitor-health'} ne "") { - my @values = split(',', $args{'monitor-health'}); - - if (!check_capacity_limit($values[0])) { - die "ERROR: invalid zpool capacity warning limit!\n"; - } - $capacitylimits{"warn"} = $values[0]; - - if (scalar @values > 1) { - if (!check_capacity_limit($values[1])) { - die "ERROR: invalid zpool capacity critical limit!\n"; - } - $capacitylimits{"crit"} = $values[1]; - } - } - foreach my $path (keys %{ $snapsbypath}) { my @pool = split ('/',$path); $pools{$pool[0]}=1; } foreach my $pool (keys %pools) { - my ($exitcode, $msg) = check_zpool($pool,2,\%capacitylimits); + my ($exitcode, $msg) = check_zpool($pool,2); if ($exitcode > $errlevel) { $errlevel = $exitcode; } chomp $msg; push (@messages, $msg); @@ -767,8 +748,6 @@ sub check_zpool() { my $pool=shift; my $verbose=shift; - my $capacitylimitsref=shift; - my %capacitylimits=%$capacitylimitsref; my $size=""; my $used=""; @@ -820,21 +799,15 @@ sub check_zpool() { if ($health eq "ONLINE" ) { $state = "OK"; - if (%capacitylimits) { - # check capacity - my $capn = $cap; - $capn =~ s/\D//g; + # check capacity + my $capn = $cap; + $capn =~ s/\D//g; - if ($capacitylimits{"warn"}) { - if ($capn >= $capacitylimits{"warn"}) { - $state = "WARNING"; - } - } - if ($capacitylimits{"crit"}) { - if ($capn >= $capacitylimits{"crit"}) { - $state = "CRITICAL"; - } - } + if ($capn >= 80) { + $state = "WARNING"; + } + if ($capn >= 95) { + $state = "CRITICAL"; } } else { if ($health eq "DEGRADED") { @@ -938,20 +911,6 @@ sub check_zpool() { return ($ERRORS{$state},$msg); } # end check_zpool() -sub check_capacity_limit() { - my $value = shift; - - if ($value !~ /^\d+\z/) { - return undef; - } - - if ($value < 1 || $value > 100) { - return undef; - } - - return 1 -} - ###################################################################################################### ###################################################################################################### ###################################################################################################### @@ -1122,19 +1081,19 @@ Assumes --cron --verbose if no other arguments (other than configdir) are specif Options: - --configdir=DIR Specify a directory to find config file sanoid.conf + --configdir=DIR Specify a directory to find config file sanoid.conf - --cron Creates snapshots and purges expired snapshots - --verbose Prints out additional information during a sanoid run - --readonly Simulates creation/deletion of snapshots - --quiet Suppresses non-error output - --force-update Clears out sanoid's zfs snapshot cache + --cron Creates snapshots and purges expired snapshots + --verbose Prints out additional information during a sanoid run + --readonly Simulates creation/deletion of snapshots + --quiet Suppresses non-error output + --force-update Clears out sanoid's zfs snapshot cache - --monitor-health[=wlimit[,climit]] Reports on zpool "health", in a Nagios compatible format - --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format - --take-snapshots Creates snapshots as specified in sanoid.conf - --prune-snapshots Purges expired snapshots as specified in sanoid.conf + --monitor-health Reports on zpool "health", in a Nagios compatible format + --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format + --take-snapshots Creates snapshots as specified in sanoid.conf + --prune-snapshots Purges expired snapshots as specified in sanoid.conf - --help Prints this helptext - --version Prints the version number - --debug Prints out a lot of additional information during a sanoid run + --help Prints this helptext + --version Prints the version number + --debug Prints out a lot of additional information during a sanoid run From 01398789f60f31f67002666daacce715c6e626a5 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 27 Feb 2018 17:53:32 +0100 Subject: [PATCH 16/29] Revert "let monitor-health check the capacity too" This reverts commit 1f64c9c35aac5d45c433833af56bba4126a0bcbd. --- sanoid | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sanoid b/sanoid index 6b1257e..d6e58ce 100755 --- a/sanoid +++ b/sanoid @@ -798,17 +798,6 @@ sub check_zpool() { ## determine health of zpool and subsequent error status if ($health eq "ONLINE" ) { $state = "OK"; - - # check capacity - my $capn = $cap; - $capn =~ s/\D//g; - - if ($capn >= 80) { - $state = "WARNING"; - } - if ($capn >= 95) { - $state = "CRITICAL"; - } } else { if ($health eq "DEGRADED") { $state = "WARNING"; From f961a9f447344b0b5558b3c8382aec3c2b74632e Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 27 Feb 2018 17:58:51 +0100 Subject: [PATCH 17/29] implemented monitor-capacity flag for checking zpool capacity limits in the nagios monitoring format --- README.md | 5 ++ sanoid | 146 ++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 137 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 9c4e6ba..1b3b37f 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,11 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. ++ --monitor-capacity + + This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file. The default limits are 80% for the warning and 95% for the critical state. Those can be overridden by providing them + along like '=80,95". + + --force-update This clears out sanoid's zfs snapshot listing cache. This is normally not needed. diff --git a/sanoid b/sanoid index d6e58ce..660dfb2 100755 --- a/sanoid +++ b/sanoid @@ -18,7 +18,8 @@ use Time::Local; # to parse dates in reverse my %args = ("configdir" => "/etc/sanoid"); GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", "monitor-health", "force-update", "configdir=s", - "monitor-snapshots", "take-snapshots", "prune-snapshots" + "monitor-snapshots", "take-snapshots", "prune-snapshots", + "monitor-capacity:s" ) or pod2usage(2); # If only config directory (or nothing) has been specified, default to --cron --verbose @@ -52,6 +53,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath ); if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } if ($args{'monitor-health'}) { monitor_health(@params); } +if (defined($args{'monitor-capacity'})) { monitor_capacity(@params); } if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } if ($args{'cron'}) { @@ -174,6 +176,57 @@ sub monitor_snapshots { exit $errorlevel; } + +#################################################################################### +#################################################################################### +#################################################################################### + +sub monitor_capacity { + my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; + my %pools; + my @messages; + my $errlevel=0; + + my %capacitylimits = ( + "warn" => 80, + "crit" => 95 + ); + + # if provided, parse capacity limits + if ($args{'monitor-capacity'} ne "") { + my @values = split(',', $args{'monitor-capacity'}); + + if (!check_capacity_limit($values[0])) { + die "ERROR: invalid zpool capacity warning limit!\n"; + } + $capacitylimits{"warn"} = $values[0]; + + if (scalar @values > 1) { + if (!check_capacity_limit($values[1])) { + die "ERROR: invalid zpool capacity critical limit!\n"; + } + $capacitylimits{"crit"} = $values[1]; + } + } + + foreach my $path (keys %{ $snapsbypath}) { + my @pool = split ('/',$path); + $pools{$pool[0]}=1; + } + + foreach my $pool (keys %pools) { + my ($exitcode, $msg) = check_zpool_capacity($pool,\%capacitylimits); + if ($exitcode > $errlevel) { $errlevel = $exitcode; } + chomp $msg; + push (@messages, $msg); + } + + my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** '); + my $message = $warninglevels[$errlevel] . join (', ',@messages); + print "$message\n"; + exit $errlevel; +} + #################################################################################### #################################################################################### #################################################################################### @@ -900,6 +953,70 @@ sub check_zpool() { return ($ERRORS{$state},$msg); } # end check_zpool() +sub check_capacity_limit() { + my $value = shift; + + if ($value !~ /^\d+\z/) { + return undef; + } + + if ($value < 1 || $value > 100) { + return undef; + } + + return 1 +} + +sub check_zpool_capacity() { + my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2); + my $state="UNKNOWN"; + my $msg="FAILURE"; + + my $pool=shift; + my $capacitylimitsref=shift; + my %capacitylimits=%$capacitylimitsref; + + my $statcommand="/sbin/zpool list -H -o cap $pool"; + + if (! open STAT, "$statcommand|") { + print ("$state '$statcommand' command returns no result!\n"); + exit $ERRORS{$state}; + } + + my $line = ; + close(STAT); + + chomp $line; + my @row = split(/ +/, $line); + my $cap=$row[0]; + + ## check for valid capacity value + if ($cap !~ m/^[0-9]{1,3}%$/ ) { + $state = "CRITICAL"; + $msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool; + print $state, " ", $msg; + exit ($ERRORS{$state}); + } + + $state="OK"; + + # check capacity + my $capn = $cap; + $capn =~ s/\D//g; + + if ($capn >= $capacitylimits{"warn"}) { + $state = "WARNING"; + } + + if ($capn >= $capacitylimits{"crit"}) { + $state = "CRITICAL"; + } + + $msg = sprintf "ZPOOL %s : %s\n", $pool, $cap; + $msg = "$state $msg"; + return ($ERRORS{$state},$msg); +} # end check_zpool_capacity() + ###################################################################################################### ###################################################################################################### ###################################################################################################### @@ -1070,19 +1187,20 @@ Assumes --cron --verbose if no other arguments (other than configdir) are specif Options: - --configdir=DIR Specify a directory to find config file sanoid.conf + --configdir=DIR Specify a directory to find config file sanoid.conf - --cron Creates snapshots and purges expired snapshots - --verbose Prints out additional information during a sanoid run - --readonly Simulates creation/deletion of snapshots - --quiet Suppresses non-error output - --force-update Clears out sanoid's zfs snapshot cache + --cron Creates snapshots and purges expired snapshots + --verbose Prints out additional information during a sanoid run + --readonly Simulates creation/deletion of snapshots + --quiet Suppresses non-error output + --force-update Clears out sanoid's zfs snapshot cache - --monitor-health Reports on zpool "health", in a Nagios compatible format - --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format - --take-snapshots Creates snapshots as specified in sanoid.conf - --prune-snapshots Purges expired snapshots as specified in sanoid.conf + --monitor-health Reports on zpool "health", in a Nagios compatible format + --monitor-capacity[=wlimit[,climit]] Reports on zpool capacity, in a Nagios compatible format + --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format + --take-snapshots Creates snapshots as specified in sanoid.conf + --prune-snapshots Purges expired snapshots as specified in sanoid.conf - --help Prints this helptext - --version Prints the version number - --debug Prints out a lot of additional information during a sanoid run + --help Prints this helptext + --version Prints the version number + --debug Prints out a lot of additional information during a sanoid run From b405b589801dd05ea12be500766ef333638b3e80 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Thu, 1 Mar 2018 09:22:22 +0100 Subject: [PATCH 18/29] let monitor-capacity parse the limits from the configuration file --- README.md | 3 +- sanoid | 92 ++++++++++++++++++++++++-------------------- sanoid.defaults.conf | 4 ++ 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 1b3b37f..324fd30 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,7 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da + --monitor-capacity - This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file. The default limits are 80% for the warning and 95% for the critical state. Those can be overridden by providing them - along like '=80,95". + This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file. + --force-update diff --git a/sanoid b/sanoid index 660dfb2..a4a256a 100755 --- a/sanoid +++ b/sanoid @@ -19,7 +19,7 @@ my %args = ("configdir" => "/etc/sanoid"); GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", "monitor-health", "force-update", "configdir=s", "monitor-snapshots", "take-snapshots", "prune-snapshots", - "monitor-capacity:s" + "monitor-capacity" ) or pod2usage(2); # If only config directory (or nothing) has been specified, default to --cron --verbose @@ -53,7 +53,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath ); if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } if ($args{'monitor-health'}) { monitor_health(@params); } -if (defined($args{'monitor-capacity'})) { monitor_capacity(@params); } +if ($args{'monitor-capacity'}) { monitor_capacity(@params); } if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } if ($args{'cron'}) { @@ -187,35 +187,39 @@ sub monitor_capacity { my @messages; my $errlevel=0; - my %capacitylimits = ( - "warn" => 80, - "crit" => 95 - ); + # build pool list with corresponding capacity limits + foreach my $section (keys %config) { + my @pool = split ('/',$section); - # if provided, parse capacity limits - if ($args{'monitor-capacity'} ne "") { - my @values = split(',', $args{'monitor-capacity'}); + if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) { + my %capacitylimits; - if (!check_capacity_limit($values[0])) { - die "ERROR: invalid zpool capacity warning limit!\n"; - } - $capacitylimits{"warn"} = $values[0]; + if (!check_capacity_limit($config{$section}{'capacity_warn'})) { + die "ERROR: invalid zpool capacity warning limit!\n"; + } - if (scalar @values > 1) { - if (!check_capacity_limit($values[1])) { + if ($config{$section}{'capacity_warn'} != 0) { + $capacitylimits{'warn'} = $config{$section}{'capacity_warn'}; + } + + if (!check_capacity_limit($config{$section}{'capacity_crit'})) { die "ERROR: invalid zpool capacity critical limit!\n"; } - $capacitylimits{"crit"} = $values[1]; - } - } - foreach my $path (keys %{ $snapsbypath}) { - my @pool = split ('/',$path); - $pools{$pool[0]}=1; + if ($config{$section}{'capacity_crit'} != 0) { + $capacitylimits{'crit'} = $config{$section}{'capacity_crit'}; + } + + if (%capacitylimits) { + $pools{$pool[0]} = \%capacitylimits; + } + } } foreach my $pool (keys %pools) { - my ($exitcode, $msg) = check_zpool_capacity($pool,\%capacitylimits); + my $capacitylimitsref = $pools{$pool}; + + my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref); if ($exitcode > $errlevel) { $errlevel = $exitcode; } chomp $msg; push (@messages, $msg); @@ -956,11 +960,11 @@ sub check_zpool() { sub check_capacity_limit() { my $value = shift; - if ($value !~ /^\d+\z/) { + if (!defined($value) || $value !~ /^\d+\z/) { return undef; } - if ($value < 1 || $value > 100) { + if ($value < 0 || $value > 100) { return undef; } @@ -1004,12 +1008,16 @@ sub check_zpool_capacity() { my $capn = $cap; $capn =~ s/\D//g; - if ($capn >= $capacitylimits{"warn"}) { - $state = "WARNING"; + if (defined($capacitylimits{"warn"})) { + if ($capn >= $capacitylimits{"warn"}) { + $state = "WARNING"; + } } - if ($capn >= $capacitylimits{"crit"}) { - $state = "CRITICAL"; + if (defined($capacitylimits{"crit"})) { + if ($capn >= $capacitylimits{"crit"}) { + $state = "CRITICAL"; + } } $msg = sprintf "ZPOOL %s : %s\n", $pool, $cap; @@ -1187,20 +1195,20 @@ Assumes --cron --verbose if no other arguments (other than configdir) are specif Options: - --configdir=DIR Specify a directory to find config file sanoid.conf + --configdir=DIR Specify a directory to find config file sanoid.conf - --cron Creates snapshots and purges expired snapshots - --verbose Prints out additional information during a sanoid run - --readonly Simulates creation/deletion of snapshots - --quiet Suppresses non-error output - --force-update Clears out sanoid's zfs snapshot cache + --cron Creates snapshots and purges expired snapshots + --verbose Prints out additional information during a sanoid run + --readonly Simulates creation/deletion of snapshots + --quiet Suppresses non-error output + --force-update Clears out sanoid's zfs snapshot cache - --monitor-health Reports on zpool "health", in a Nagios compatible format - --monitor-capacity[=wlimit[,climit]] Reports on zpool capacity, in a Nagios compatible format - --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format - --take-snapshots Creates snapshots as specified in sanoid.conf - --prune-snapshots Purges expired snapshots as specified in sanoid.conf + --monitor-health Reports on zpool "health", in a Nagios compatible format + --monitor-capacity Reports on zpool capacity, in a Nagios compatible format + --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format + --take-snapshots Creates snapshots as specified in sanoid.conf + --prune-snapshots Purges expired snapshots as specified in sanoid.conf - --help Prints this helptext - --version Prints the version number - --debug Prints out a lot of additional information during a sanoid run + --help Prints this helptext + --version Prints the version number + --debug Prints out a lot of additional information during a sanoid run diff --git a/sanoid.defaults.conf b/sanoid.defaults.conf index 35c804d..d86cc47 100644 --- a/sanoid.defaults.conf +++ b/sanoid.defaults.conf @@ -70,3 +70,7 @@ monthly_warn = 32 monthly_crit = 35 yearly_warn = 0 yearly_crit = 0 + +# default limits for capacity checks (if set to 0, limit will not be checked) +capacity_warn = 80 +capacity_crit = 95 From 8dfdd1a7169b97a8b6d2a6a6c84b6ac3826caa49 Mon Sep 17 00:00:00 2001 From: Janne Savikko Date: Thu, 15 Mar 2018 13:50:12 +0200 Subject: [PATCH 19/29] syncoid: fix --version in options list of helptext syncoid does not have verbose option. Replace verbose with version in options list of helptext. --- syncoid | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncoid b/syncoid index 7337f5b..29f0176 100755 --- a/syncoid +++ b/syncoid @@ -956,7 +956,7 @@ Options: --sshoption|o=OPTION Passes OPTION to ssh for remote usage. Can be specified multiple times --help Prints this helptext - --verbose Prints the version number + --version Prints the version number --debug Prints out a lot of additional information during a syncoid run --monitor-version Currently does nothing --quiet Suppresses non-error output From ecf2a852b5e0fa54131c878597c9b24cffb8a663 Mon Sep 17 00:00:00 2001 From: danielewood Date: Thu, 12 Apr 2018 17:43:08 -0700 Subject: [PATCH 20/29] Added support for ZStandard compression. Available in all major distros with a simple yum/apt-get/pkg. References: ZSTD Compression by Allan Jude - https://www.youtube.com/watch?v=hWnWEitDPlM Zstandard - https://facebook.github.io/zstd/ --- syncoid | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/syncoid b/syncoid index 7337f5b..baf5053 100755 --- a/syncoid +++ b/syncoid @@ -368,6 +368,18 @@ sub compressargset { decomrawcmd => '/usr/bin/pigz', decomargs => '-dc', }, + 'zstd-fast' => { + rawcmd => '/usr/bin/zstd', + args => '-3', + decomrawcmd => '/usr/bin/zstd', + decomargs => '-dc', + }, + 'zstd-slow' => { + rawcmd => '/usr/bin/zstd', + args => '-19', + decomrawcmd => '/usr/bin/zstd', + decomargs => '-dc', + }, 'lzo' => { rawcmd => '/usr/bin/lzop', args => '', @@ -378,7 +390,7 @@ sub compressargset { if ($value eq 'default') { $value = $DEFAULT_COMPRESSION; - } elsif (!(grep $value eq $_, ('gzip', 'pigz-fast', 'pigz-slow', 'lzo', 'default', 'none'))) { + } elsif (!(grep $value eq $_, ('gzip', 'pigz-fast', 'pigz-slow', 'zstd-fast', 'zstd-slow', 'lzo', 'default', 'none'))) { warn "Unrecognised compression value $value, defaulting to $DEFAULT_COMPRESSION"; $value = $DEFAULT_COMPRESSION; } From bb654c1cf6d59f06727be8b93c66bbdd2a42b9d1 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 5 Jun 2018 11:41:48 +0200 Subject: [PATCH 21/29] use utc for timestamps as default --- README.md | 4 +++- packages/debian/sanoid.service | 1 + packages/rhel/sanoid.spec | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cc75bdf..be6195d 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,11 @@ More prosaically, you can use Sanoid to create, automatically thin, and monitor snapshots and pool health from a single eminently human-readable TOML config file at /etc/sanoid/sanoid.conf. (Sanoid also requires a "defaults" file located at /etc/sanoid/sanoid.defaults.conf, which is not user-editable.) A typical Sanoid system would have a single cron job: ``` -* * * * * /usr/local/bin/sanoid --cron +* * * * * TZ=UTC /usr/local/bin/sanoid --cron ``` +`Note`: Using UTC as timezone is recommend to prevent problems with daylight saving times + And its /etc/sanoid/sanoid.conf might look something like this: ``` diff --git a/packages/debian/sanoid.service b/packages/debian/sanoid.service index b54c586..2d01bbf 100644 --- a/packages/debian/sanoid.service +++ b/packages/debian/sanoid.service @@ -5,5 +5,6 @@ After=zfs.target ConditionFileNotEmpty=/etc/sanoid/sanoid.conf [Service] +Environment=TZ=UTC Type=oneshot ExecStart=/usr/sbin/sanoid --cron diff --git a/packages/rhel/sanoid.spec b/packages/rhel/sanoid.spec index ab299a5..3a9412f 100644 --- a/packages/rhel/sanoid.spec +++ b/packages/rhel/sanoid.spec @@ -58,6 +58,7 @@ Requires=zfs.target After=zfs.target [Service] +Environment=TZ=UTC Type=oneshot ExecStart=%{_sbindir}/sanoid --cron EOF From 52661afb4125ba350ce0ec54e3928a4ab2140f33 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 5 Jun 2018 12:45:13 +0200 Subject: [PATCH 22/29] updated debian changelog --- packages/debian/changelog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/debian/changelog b/packages/debian/changelog index ab530b0..2bcf423 100644 --- a/packages/debian/changelog +++ b/packages/debian/changelog @@ -1,3 +1,18 @@ +sanoid (1.4.18) unstable; urgency=medium + + implemented special character handling and support of ZFS resume/receive tokens by default in syncoid, + thank you @phreaker0! + + -- Jim Salter Wed, 25 Apr 2018 16:24:00 -0400 + +sanoid (1.4.17) unstable; urgency=medium + + changed die to warn when unexpectedly unable to remove a snapshot - this + allows sanoid to continue taking/removing other snapshots not affected by + whatever lock prevented the first from being taken or removed + + -- Jim Salter Wed, 8 Nov 2017 15:25:00 -0400 + sanoid (1.4.16) unstable; urgency=medium * merged @hrast01's extended fix to support -o option1=val,option2=val passthrough to SSH. merged @JakobR's From e65879d1f8c4a1304ee0d16d4393645b5e07de1e Mon Sep 17 00:00:00 2001 From: Lucas Salibian Date: Fri, 8 Jun 2018 11:52:52 -0400 Subject: [PATCH 23/29] Fix --help typo --- syncoid | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncoid b/syncoid index 999ee79..a15f872 100755 --- a/syncoid +++ b/syncoid @@ -1148,7 +1148,7 @@ Options: --sshoption|o=OPTION Passes OPTION to ssh for remote usage. Can be specified multiple times --help Prints this helptext - --verbose Prints the version number + --version Prints the version number --debug Prints out a lot of additional information during a syncoid run --monitor-version Currently does nothing --quiet Suppresses non-error output From c0c30500765395cd22f975abb198ce3d4fb1d1f3 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 19 Jun 2018 18:21:06 +0200 Subject: [PATCH 24/29] added option for skipping the parent dataset in recursive replication --- README.md | 4 ++++ syncoid | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cc75bdf..7392fa7 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,10 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup This will also transfer child datasets. ++ --skip-parent + + This will skip the syncing of the parent dataset. Does nothing without '--recursive' option. + + --compress Currently accepted options: gzip, pigz-fast, pigz-slow, lzo (default) & none. If the selected compression method is unavailable on the source and destination, no compression will be used. diff --git a/syncoid b/syncoid index 999ee79..5cbc1c4 100755 --- a/syncoid +++ b/syncoid @@ -19,7 +19,7 @@ use Sys::Hostname; my %args = ('sshkey' => '', 'sshport' => '', 'sshcipher' => '', 'sshoption' => [], 'target-bwlimit' => '', 'source-bwlimit' => ''); GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsnaps", "recursive|r", "source-bwlimit=s", "target-bwlimit=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", - "debug", "quiet", "no-stream", "no-sync-snap", "no-resume") or pod2usage(2); + "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "skip-parent") or pod2usage(2); my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set @@ -141,6 +141,11 @@ sub getchilddatasets { my @children = ; close FH; + if (defined $args{'skip-parent'}) { + # parent dataset is the first element + shift @children; + } + return @children; } @@ -1137,6 +1142,7 @@ Options: --compress=FORMAT Compresses data during transfer. Currently accepted options are gzip, pigz-fast, pigz-slow, lzo (default) & none --recursive|r Also transfers child datasets + --skip-parent Skipp the syncing of the parent dataset. Doesg nothing without '--recursive' option. --source-bwlimit= Bandwidth limit on the source transfer --target-bwlimit= Bandwidth limit on the target transfer --no-stream Replicates using newest snapshot instead of intermediates From 70b259ac3cb9ef9167776eac0424d0a78ea0c0c4 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Tue, 19 Jun 2018 18:24:34 +0200 Subject: [PATCH 25/29] typos --- syncoid | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncoid b/syncoid index 5cbc1c4..8ab3a41 100755 --- a/syncoid +++ b/syncoid @@ -1142,7 +1142,7 @@ Options: --compress=FORMAT Compresses data during transfer. Currently accepted options are gzip, pigz-fast, pigz-slow, lzo (default) & none --recursive|r Also transfers child datasets - --skip-parent Skipp the syncing of the parent dataset. Doesg nothing without '--recursive' option. + --skip-parent Skips syncing of the parent dataset. Does nothing without '--recursive' option. --source-bwlimit= Bandwidth limit on the source transfer --target-bwlimit= Bandwidth limit on the target transfer --no-stream Replicates using newest snapshot instead of intermediates From 34b942ea45cbc686c1e0fe16c8dd6c4515115826 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Thu, 21 Jun 2018 18:18:28 +0200 Subject: [PATCH 26/29] correctly parse zfs column output (space can be included in the values) --- sanoid | 2 +- syncoid | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sanoid b/sanoid index b6dc9fe..c07e090 100755 --- a/sanoid +++ b/sanoid @@ -521,7 +521,7 @@ sub getsnaps { } foreach my $snap (@rawsnaps) { - my ($fs,$snapname,$snapdate) = ($snap =~ m/(.*)\@(.*ly)\s*creation\s*(\d*)/); + my ($fs,$snapname,$snapdate) = ($snap =~ m/(.*)\@(.*ly)\t*creation\t*(\d*)/); # avoid pissing off use warnings if (defined $snapname) { diff --git a/syncoid b/syncoid index 999ee79..ca7328f 100755 --- a/syncoid +++ b/syncoid @@ -664,7 +664,7 @@ sub getzfsvalue { open FH, "$rhost $mysudocmd $zfscmd get -H $property $fsescaped |"; my $value = ; close FH; - my @values = split(/\s/,$value); + my @values = split(/\t/,$value); $value = $values[2]; return $value; } @@ -985,7 +985,7 @@ sub getsnaps() { if ($line =~ /\Q$fs\E\@.*guid/) { chomp $line; my $guid = $line; - $guid =~ s/^.*\sguid\s*(\d*).*/$1/; + $guid =~ s/^.*\tguid\t*(\d*).*/$1/; my $snap = $line; $snap =~ s/^.*\@(.*)\tguid.*$/$1/; $snaps{$type}{$snap}{'guid'}=$guid; @@ -997,7 +997,7 @@ sub getsnaps() { if ($line =~ /\Q$fs\E\@.*creation/) { chomp $line; my $creation = $line; - $creation =~ s/^.*\screation\s*(\d*).*/$1/; + $creation =~ s/^.*\tcreation\t*(\d*).*/$1/; my $snap = $line; $snap =~ s/^.*\@(.*)\tcreation.*$/$1/; $snaps{$type}{$snap}{'creation'}=$creation; @@ -1056,9 +1056,9 @@ sub getsendsize { # the output format is different in case of # a resumed receive if (defined($receivetoken)) { - $sendsize =~ s/.*\s([0-9]+)$/$1/; + $sendsize =~ s/.*\t([0-9]+)$/$1/; } else { - $sendsize =~ s/^size\s*//; + $sendsize =~ s/^size\t*//; } chomp $sendsize; From 1f885801993eec2ce259d3ba4beb7704c6eb496d Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Thu, 28 Jun 2018 17:45:18 +0200 Subject: [PATCH 27/29] implemented support for excluding datasets from replication with a regular expression --- README.md | 4 ++++ syncoid | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cc75bdf..e98763b 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,10 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup This argument tells syncoid to restrict itself to existing snapshots, instead of creating a semi-ephemeral syncoid snapshot at execution time. Especially useful in multi-target (A->B, A->C) replication schemes, where you might otherwise accumulate a large number of foreign syncoid snapshots. ++ --exclude=REGEX + + The given regular expression will be matched against all datasets which would be synced by this run and excludes them. This argument can be specified multiple times. + + --no-resume This argument tells syncoid to not use resumeable zfs send/receive streams. diff --git a/syncoid b/syncoid index 999ee79..5cd925c 100755 --- a/syncoid +++ b/syncoid @@ -19,7 +19,7 @@ use Sys::Hostname; my %args = ('sshkey' => '', 'sshport' => '', 'sshcipher' => '', 'sshoption' => [], 'target-bwlimit' => '', 'source-bwlimit' => ''); GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsnaps", "recursive|r", "source-bwlimit=s", "target-bwlimit=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", - "debug", "quiet", "no-stream", "no-sync-snap", "no-resume") or pod2usage(2); + "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@") or pod2usage(2); my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set @@ -141,6 +141,20 @@ sub getchilddatasets { my @children = ; close FH; + if (defined $args{'exclude'}) { + my $excludes = $args{'exclude'}; + foreach (@$excludes) { + for my $i ( 0 .. $#children ) { + if ($children[$i] =~ /$_/) { + if ($debug) { print "DEBUG: excluded $children[$i] because of $_\n"; } + undef $children[$i] + } + } + + @children = grep{ defined }@children; + } + } + return @children; } @@ -1141,6 +1155,7 @@ Options: --target-bwlimit= Bandwidth limit on the target transfer --no-stream Replicates using newest snapshot instead of intermediates --no-sync-snap Does not create new snapshot, only transfers existing + --exclude=REGEX Exclude specific datasets which match the given regular expression. Can be specified multiple times --sshkey=FILE Specifies a ssh public key to use to connect --sshport=PORT Connects to remote on a particular port From ba3836ec520efc30689238e294de1d1c2026fc1b Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Fri, 6 Jul 2018 15:52:54 +0200 Subject: [PATCH 28/29] fixed monitor-health command for pools containing cache and log devices --- sanoid | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sanoid b/sanoid index 9cd9d33..485ee08 100755 --- a/sanoid +++ b/sanoid @@ -976,6 +976,11 @@ sub check_zpool() { ## other cases my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/; + if (!defined($sta)) { + # cache and logs are special and don't have a status + next; + } + ## pool online, not degraded thanks to dead/corrupted disk if ($state eq "OK" && $sta eq "UNAVAIL") { $state="WARNING"; @@ -1111,7 +1116,7 @@ sub checklock { # make sure lockfile contains something if ( -z $lockfile) { # zero size lockfile, something is wrong - die "ERROR: something is wrong! $lockfile is empty\n"; + die "ERROR: something is wrong! $lockfile is empty\n"; } # lockfile exists. read pid and mutex from it. see if it's our pid. if not, see if From f9c1cbb74a3c07fc9e0368721cdcedcc0b2b4a0f Mon Sep 17 00:00:00 2001 From: Jim Salter Date: Sat, 7 Jul 2018 12:06:35 -0400 Subject: [PATCH 29/29] Update INSTALL --- INSTALL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL b/INSTALL index 33b510d..f0de17b 100644 --- a/INSTALL +++ b/INSTALL @@ -8,7 +8,7 @@ default for SSH transport since v1.4.6. Syncoid runs will fail if one of them is not available on either end of the transport. On Ubuntu: apt install pv lzop mbuffer -On CentOS: yum install lzo pv mbuffer lzop +On CentOS: yum install lzo pv mbuffer lzop perl-Data-Dumper On FreeBSD: pkg install pv mbuffer lzop FreeBSD notes: FreeBSD may place pv and lzop in somewhere other than