Merge pull request #201 from phreaker0/monitor-capacity

implemented monitor-capacity flag for checking zpool capacity limits
This commit is contained in:
Jim Salter 2018-06-28 16:03:25 -04:00 committed by GitHub
commit 8ee41752c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 135 additions and 1 deletions

View File

@ -64,6 +64,10 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da
This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file. This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file.
+ --monitor-capacity
This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file.
+ --force-update + --force-update
This clears out sanoid's zfs snapshot listing cache. This is normally not needed. This clears out sanoid's zfs snapshot listing cache. This is normally not needed.

128
sanoid
View File

@ -18,7 +18,8 @@ use Time::Local; # to parse dates in reverse
my %args = ("configdir" => "/etc/sanoid"); my %args = ("configdir" => "/etc/sanoid");
GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet",
"monitor-health", "force-update", "configdir=s", "monitor-health", "force-update", "configdir=s",
"monitor-snapshots", "take-snapshots", "prune-snapshots" "monitor-snapshots", "take-snapshots", "prune-snapshots",
"monitor-capacity"
) or pod2usage(2); ) or pod2usage(2);
# If only config directory (or nothing) has been specified, default to --cron --verbose # If only config directory (or nothing) has been specified, default to --cron --verbose
@ -52,6 +53,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath );
if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); }
if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); }
if ($args{'monitor-health'}) { monitor_health(@params); } if ($args{'monitor-health'}) { monitor_health(@params); }
if ($args{'monitor-capacity'}) { monitor_capacity(@params); }
if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); }
if ($args{'cron'}) { if ($args{'cron'}) {
@ -174,6 +176,61 @@ sub monitor_snapshots {
exit $errorlevel; exit $errorlevel;
} }
####################################################################################
####################################################################################
####################################################################################
sub monitor_capacity {
my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
my %pools;
my @messages;
my $errlevel=0;
# build pool list with corresponding capacity limits
foreach my $section (keys %config) {
my @pool = split ('/',$section);
if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) {
my %capacitylimits;
if (!check_capacity_limit($config{$section}{'capacity_warn'})) {
die "ERROR: invalid zpool capacity warning limit!\n";
}
if ($config{$section}{'capacity_warn'} != 0) {
$capacitylimits{'warn'} = $config{$section}{'capacity_warn'};
}
if (!check_capacity_limit($config{$section}{'capacity_crit'})) {
die "ERROR: invalid zpool capacity critical limit!\n";
}
if ($config{$section}{'capacity_crit'} != 0) {
$capacitylimits{'crit'} = $config{$section}{'capacity_crit'};
}
if (%capacitylimits) {
$pools{$pool[0]} = \%capacitylimits;
}
}
}
foreach my $pool (keys %pools) {
my $capacitylimitsref = $pools{$pool};
my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref);
if ($exitcode > $errlevel) { $errlevel = $exitcode; }
chomp $msg;
push (@messages, $msg);
}
my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** ');
my $message = $warninglevels[$errlevel] . join (', ',@messages);
print "$message\n";
exit $errlevel;
}
#################################################################################### ####################################################################################
#################################################################################### ####################################################################################
#################################################################################### ####################################################################################
@ -900,6 +957,74 @@ sub check_zpool() {
return ($ERRORS{$state},$msg); return ($ERRORS{$state},$msg);
} # end check_zpool() } # end check_zpool()
sub check_capacity_limit() {
my $value = shift;
if (!defined($value) || $value !~ /^\d+\z/) {
return undef;
}
if ($value < 0 || $value > 100) {
return undef;
}
return 1
}
sub check_zpool_capacity() {
my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
my $state="UNKNOWN";
my $msg="FAILURE";
my $pool=shift;
my $capacitylimitsref=shift;
my %capacitylimits=%$capacitylimitsref;
my $statcommand="/sbin/zpool list -H -o cap $pool";
if (! open STAT, "$statcommand|") {
print ("$state '$statcommand' command returns no result!\n");
exit $ERRORS{$state};
}
my $line = <STAT>;
close(STAT);
chomp $line;
my @row = split(/ +/, $line);
my $cap=$row[0];
## check for valid capacity value
if ($cap !~ m/^[0-9]{1,3}%$/ ) {
$state = "CRITICAL";
$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
print $state, " ", $msg;
exit ($ERRORS{$state});
}
$state="OK";
# check capacity
my $capn = $cap;
$capn =~ s/\D//g;
if (defined($capacitylimits{"warn"})) {
if ($capn >= $capacitylimits{"warn"}) {
$state = "WARNING";
}
}
if (defined($capacitylimits{"crit"})) {
if ($capn >= $capacitylimits{"crit"}) {
$state = "CRITICAL";
}
}
$msg = sprintf "ZPOOL %s : %s\n", $pool, $cap;
$msg = "$state $msg";
return ($ERRORS{$state},$msg);
} # end check_zpool_capacity()
###################################################################################################### ######################################################################################################
###################################################################################################### ######################################################################################################
###################################################################################################### ######################################################################################################
@ -1079,6 +1204,7 @@ Options:
--force-update Clears out sanoid's zfs snapshot cache --force-update Clears out sanoid's zfs snapshot cache
--monitor-health Reports on zpool "health", in a Nagios compatible format --monitor-health Reports on zpool "health", in a Nagios compatible format
--monitor-capacity Reports on zpool capacity, in a Nagios compatible format
--monitor-snapshots Reports on snapshot "health", in a Nagios compatible format --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format
--take-snapshots Creates snapshots as specified in sanoid.conf --take-snapshots Creates snapshots as specified in sanoid.conf
--prune-snapshots Purges expired snapshots as specified in sanoid.conf --prune-snapshots Purges expired snapshots as specified in sanoid.conf

View File

@ -70,3 +70,7 @@ monthly_warn = 32
monthly_crit = 35 monthly_crit = 35
yearly_warn = 0 yearly_warn = 0
yearly_crit = 0 yearly_crit = 0
# default limits for capacity checks (if set to 0, limit will not be checked)
capacity_warn = 80
capacity_crit = 95