mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
b41c1bdaa1
Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
280 lines
8.7 KiB
Perl
Executable File
280 lines
8.7 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
# Nagios plugin to monitor CTDB (Clustered Trivial Database)
|
|
#
|
|
# License: GPL
|
|
# Copyright (c) 2011 Nantes Metropole
|
|
# Author: Mathieu Parent <math.parent@gmail.com>
|
|
# Contributor(s): -
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
use strict;
|
|
use warnings;
|
|
use vars qw($PROGNAME $VERSION $output $values $result);
|
|
use Nagios::Plugin;
|
|
use File::Basename;
|
|
|
|
$PROGNAME = basename($0);
|
|
$VERSION = '0.4';
|
|
|
|
my $np = Nagios::Plugin->new(
|
|
usage => "Usage: %s -i <info>\n"
|
|
. " [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]\n"
|
|
. " [ -H <host> ] [-s] [ -l <login_name> ]\n"
|
|
. ' [ -V ] [ -h ]',
|
|
version => $VERSION,
|
|
plugin => $PROGNAME,
|
|
shortname => uc($PROGNAME),
|
|
blurb => 'CTDB plugin',
|
|
extra => "Supported commands:\n"
|
|
. " * scriptstatus :\n"
|
|
. " check the ctdb scriptstatus command and return CRITICAL if one of the\n"
|
|
. " scripts fails.\n"
|
|
. " Perfdata count the number of scripts by state (ok, disabled, error,\n"
|
|
. " total).\n"
|
|
. " * ping :\n"
|
|
. " check the ctdb ping command.\n"
|
|
. " Perfdata count the number of nodes, the total ping time and the number\n"
|
|
. " of clients.\n"
|
|
. " Thresholds are checked against the number of nodes.\n"
|
|
. "\n\nCopyright (c) 2011 Nantes Metropole",
|
|
timeout => 30,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'info|i=s',
|
|
help => "-i, --info=<info>\n"
|
|
. ' Information: One of scriptstatus or ping.',
|
|
required => 1,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'hostname|H=s',
|
|
help => "-H, --hostname=<login_name>\n"
|
|
. ' Host name or IP Address.',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'sudo|s',
|
|
help => "-s, --sudo\n"
|
|
. ' Use sudo.',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'login|l=s',
|
|
help => "-l, --login=<host>\n"
|
|
. ' The user to log in as on the remote machine.',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'warning|w=s',
|
|
help => "-w, --warning=THRESHOLD\n"
|
|
. " Warning threshold. See\n"
|
|
. " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
|
|
. ' for the threshold format.',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'critical|c=s',
|
|
help => "-c, --critical=THRESHOLD\n"
|
|
. " Critical threshold. See\n"
|
|
. " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
|
|
. ' for the threshold format.',
|
|
required => 0,
|
|
);
|
|
|
|
$np->getopts;
|
|
|
|
my $info = $np->opts->info;
|
|
my $hostname = $np->opts->hostname;
|
|
my $login = $np->opts->login;
|
|
my $sudo = $np->opts->sudo;
|
|
my $warning = $np->opts->warning;
|
|
my $critical = $np->opts->critical;
|
|
my $percw;
|
|
my $percc;
|
|
|
|
$output = "";
|
|
|
|
if (defined($critical))
|
|
{
|
|
($percc, $critical) = check_percantage($critical);
|
|
$critical = undef if ($critical eq '');
|
|
}
|
|
|
|
if (defined($warning))
|
|
{
|
|
($percw, $warning) = check_percantage($warning);
|
|
$warning = undef if ($warning eq '');
|
|
}
|
|
|
|
$np->set_thresholds(critical => $critical, warning => $warning);
|
|
|
|
my $stderr;
|
|
|
|
sub safe_open_command {
|
|
unshift @_, "sudo" if $sudo;
|
|
if ($hostname) {
|
|
unshift @_, $hostname;
|
|
unshift @_, "-l", $login if $login;
|
|
unshift @_, "ssh";
|
|
}
|
|
open(OLDERR, ">&", \*STDERR) or die "Can't dup STDERR: $!";
|
|
$stderr = "";
|
|
close STDERR;
|
|
open(STDERR, ">>", \$stderr) or die "Can't open STDERR: $!";
|
|
if ($np->opts->verbose) {
|
|
print "Executing: @_\n";
|
|
}
|
|
if (!open(PIPE, '-|', @_)) {
|
|
$result = CRITICAL;
|
|
$output .= "Cannot open command '@_': $! ($stderr). ";
|
|
# restore STDERR
|
|
open(STDERR, ">", \*OLDERR) or die "Can't dup OLDERR: $!";
|
|
}
|
|
}
|
|
|
|
sub safe_close_command {
|
|
close(PIPE);
|
|
|
|
if ($? == -1) {
|
|
$result = CRITICAL;
|
|
$output .= "failed to execute: $!. ";
|
|
} elsif ($? & 127) {
|
|
$result = CRITICAL;
|
|
$output .= sprintf("child died with signal %d, %s coredump. ",
|
|
($? & 127), ($? & 128) ? 'with' : 'without');
|
|
} elsif ($? >> 8) {
|
|
if (($? >> 8) == 255) {
|
|
# ctdb returns -1=255 if any node is disconnected
|
|
$result = WARNING;
|
|
$output .= sprintf("child exited with value %d. ", $? >> 8) if $output eq "";
|
|
} else {
|
|
$result = CRITICAL;
|
|
$output .= sprintf("child exited with value %d. ", $? >> 8);
|
|
}
|
|
}
|
|
# restore STDERR
|
|
open(STDERR, ">&OLDERR") or die "Can't dup OLDERR: $!";
|
|
}
|
|
|
|
# main :
|
|
|
|
if ($info eq "scriptstatus") {
|
|
$result = OK;
|
|
safe_open_command('ctdb', '-X', 'scriptstatus');
|
|
if ($result == OK) {
|
|
my $script_count = 0;
|
|
my $ok_script_count = 0;
|
|
my $disabled_script_count = 0;
|
|
my $error_script_count = 0;
|
|
while (<PIPE>) {
|
|
next if $. == 1; # Header
|
|
$script_count++;
|
|
chop;
|
|
my ($col0, $type, $name, $code, $status, $start, $end, @error) = split("|");
|
|
if ($col0 ne '') {
|
|
# Old version, before 30 Aug 2011 and commit a779d83a6213
|
|
($type, $name, $code, $status, $start, $end, @error) = ($col0, $type, $name, $code, $status, $start, $end, @error);
|
|
}
|
|
my $error = join(':', @error);
|
|
if ($error ne "") {
|
|
$output = "$output ;; " if $output;
|
|
$output = "$output$name ($status=$code): $error ";
|
|
if ($result != CRITICAL) {
|
|
$result = WARNING;
|
|
}
|
|
}
|
|
if ($status eq "OK") {
|
|
$ok_script_count++;
|
|
next;
|
|
}
|
|
if ($status eq "DISABLED") {
|
|
$disabled_script_count++;
|
|
next;
|
|
}
|
|
$error_script_count++;
|
|
$result = WARNING;
|
|
}
|
|
safe_close_command();
|
|
$np->add_perfdata(label => "ok", value => $ok_script_count, uom => '',
|
|
min => 0, max => $script_count);
|
|
$np->add_perfdata(label => "disabled", value => $disabled_script_count, uom => '',
|
|
min => 0, max => $script_count);
|
|
$np->add_perfdata(label => "error", value => $error_script_count, uom => '',
|
|
min => 0, max => $script_count, warning => '0', critical => '0');
|
|
$np->add_perfdata(label => "total", value => $script_count, uom => '',
|
|
min => 0, max => $script_count);
|
|
if ($result == OK) {
|
|
$result = $np->check_threshold(check => $error_script_count, warning => '0', critical => '0');
|
|
}
|
|
}
|
|
$np->nagios_exit($result, $output);
|
|
} elsif ($info eq "ping") {
|
|
# Get expected nodes count
|
|
$result = OK;
|
|
safe_open_command('cat', '/etc/ctdb/nodes');
|
|
1 while( <PIPE> );
|
|
my $max_nodes_count = $.;
|
|
safe_close_command();
|
|
# ctdb ping
|
|
$result = OK;
|
|
safe_open_command('ctdb', '-n', 'all', 'ping');
|
|
if ($result == OK) {
|
|
my $nodes_count = 0;
|
|
my $time_total = 0.0;
|
|
my $clients_count = 0;
|
|
while (<PIPE>) {
|
|
chop;
|
|
if ($_ =~ /^response from (\d+) time=([0-9.]+) sec \((\d+) clients\)$/) {
|
|
my ($node_id, $time, $clients) = ($1,$2,$3);
|
|
$nodes_count += 1;
|
|
$time_total += $time;
|
|
$clients_count += $clients;
|
|
} elsif ($_ =~ /^Unable to get ping response from node (\d+)$/) {
|
|
#
|
|
} else {
|
|
$result = CRITICAL;
|
|
$output .= "'$_' doesn't match regexp. "
|
|
}
|
|
}
|
|
$output .= sprintf("%d missing nodes. ", $max_nodes_count - $nodes_count) if $nodes_count < $max_nodes_count;
|
|
safe_close_command();
|
|
$np->add_perfdata(label => "nodes", value => $nodes_count, uom => '',
|
|
min => 0, max => $max_nodes_count, warning => $warning, critical => $critical);
|
|
$np->add_perfdata(label => "ping_time", value => $time_total, uom => 's',
|
|
min => 0, max => undef);
|
|
$np->add_perfdata(label => "clients", value => $clients_count, uom => '',
|
|
min => 0, max => undef);
|
|
if ($result == OK) {
|
|
$result = $np->check_threshold(check => $nodes_count);
|
|
}
|
|
}
|
|
$np->nagios_exit($result, $output);
|
|
} else {
|
|
$np->nagios_exit(UNKNOWN, "Unknown command: '$info'");
|
|
}
|
|
|
|
sub check_percantage
|
|
{
|
|
my ($number) = shift(@_);
|
|
my $perc = $number =~ s/\%//;
|
|
return ($perc, $number);
|
|
}
|
|
|