1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-04 17:47:26 +03:00
samba-mirror/ctdb/tools/ctdb_diagnostics
Martin Schwenke 0f18859a6c Add some command-line options to ctdb_diagnostics.
In some contexts ctdb_diagnostics generates too many errors when it is
run on heterogeneous and machine-configured clusters.  In some
clusters some nodes are expected to be differently configured and also
machine-generated configured files can have comments containing
timestamps.

This adds some command-line options that can be used to reduce the
number of errors reported:

    -n <nodes>  Comma separated list of nodes to operate on
    -c          Ignore comment lines (starting with '#') in file comparisons
    -w          Ignore whitespace in file comparisons
    --no-ads    Do not use commands that assume an Active Directory Server

The -n option simply allows ctdb_diagnostics to operate on a subset of
nodes, avoiding file comparisons with and data collection on nodes
that are differently configured.  For file comparisons, instead of
showing each file on the current node and then comparing other nodes
to that file, the file from the first (available or requested) nodes
is shown and then other nodes are compared to that.  That has resulted
in changes in output - that is, ctdb diagnostics no longer prints
messages referencing the current node.

-c and -w are used to weaken comparisons between configuration files.

--no-ads can be used to avoid running ADS-specific commands if a
cluster uses LDAP (or other non-ADS) configuration.

This also fixes a number of bugs in related code:

* A call to onnode was losing the >> NODE ...  << lines because they
  now go to stderr.  This was changed in onnode long ago but
  ctdb_diagnostics was never updated to match.

* ctdb_diagnostics was counting lines in /etc/ctdb/nodes to determine
  what nodes to operate on.  For some time the nodes file has
  supported syntax that makes this invalid.  "ctdb listnodes -Y" is
  now used to list available nodes.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit 36c8244a0f68c7c9bbee40982f230e9d14d3c0ea)
2010-08-06 11:10:56 +10:00

324 lines
7.9 KiB
Bash
Executable File

#!/bin/sh
# a script to test the basic setup of a CTDB/Samba install
# tridge@samba.org September 2007
# martin@meltin.net August 2010
usage ()
{
cat >&2 <<EOF
Usage: ctdb_diagnostics [OPTION] ...
options:
-n <nodes> Comma separated list of nodes to operate on
-c Ignore comment lines (starting with '#') in file comparisons
-w Ignore whitespace in file comparisons
--no-ads Do not use commands that assume an Active Directory Server
EOF
exit 1
}
nodes=$(ctdb listnodes -Y | cut -d: -f2)
diff_opts=
no_ads=false
parse_options ()
{
temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
[ $? != 0 ] && usage
eval set -- "$temp"
while true ; do
case "$1" in
-n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
-c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
-w) diff_opts="${diff_opts} -w" ; shift ;;
--no-ads) no_ads=true ; shift ;;
--) shift ; break ;;
-h|--help|*) usage ;;
esac
done
[ $# -ne 0 ] && usage
}
parse_options "$@"
nodes_comma=$(echo $nodes | sed -e 's@[[:space:]]@,@g')
PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
# list of config files that must exist and that we check are the same
# on the nodes
CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
# list of config files that may exist and should be checked that they
# are the same on the nodes
CONFIG_FILES_MAY="/etc/ctdb/public_addresses /etc/ctdb/static-routes"
2>&1
cat <<EOF
--------------------------------------------------------------------
ctdb_diagnostics starting. This script will gather information about
your ctdb cluster. You should send the output of this script along
with any ctdb or clustered Samba bug reports.
--------------------------------------------------------------------
EOF
date
error() {
msg="$1"
echo "ERROR: $msg"
NUM_ERRORS=`expr $NUM_ERRORS + 1`
echo " ERROR[$NUM_ERRORS]: $msg" >> $ERRORS
}
show_file() {
fname="$1"
echo " ================================"
echo " File: $fname"
echo " `ls -l $fname 2>&1`"
cat "$fname" 2>&1 | sed 's/^/ /'
echo " ================================"
}
show_all() {
echo "running $1 on nodes $nodes_comma"
onnode $nodes_comma "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
}
show_and_compare_files () {
fmt="$1" ; shift
for f ; do
first=true
for n in $nodes ; do
if $first ; then
onnode $n [ -r "$f" ] || {
msg=$(printf "$fmt" "$f" $n)
error "$msg"
continue 2;
}
fstf=/tmp/`basename $f`.node$n
onnode $n cat $f > $fstf 2>&1
echo " ================================"
echo " File (on node $n): $f"
echo " `onnode $n ls -l $f 2>&1`"
cat "$fstf" | sed 's/^/ /'
echo " ================================"
first=false
else
echo "Testing for same config file $f on node $n"
tmpf=/tmp/`basename $f`.node$n
onnode $n cat $f > $tmpf 2>&1
diff $diff_opts $fstf $tmpf >/dev/null 2>&1 || {
error "File $f is different on node $n"
diff -u $diff_opts $fstf $tmpf
}
rm -f $tmpf
fi
done
rm -f $fstf
done
}
ERRORS="/tmp/diag_err.$$"
NUM_ERRORS=0
cat <<EOF
Diagnosis started on these nodes:
$nodes_comma
For reference, here is the nodes file on the current node...
EOF
show_file /etc/ctdb/nodes
cat <<EOF
--------------------------------------------------------------------
Comping critical config files on nodes $nodes_comma
EOF
show_and_compare_files \
"%s is missing on node %d" \
$CONFIG_FILES_MUST
show_and_compare_files \
"Optional file %s is not present on node %d" \
$CONFIG_FILES_MAY
cat <<EOF
--------------------------------------------------------------------
Checking for clock drift
EOF
t=`date +%s`
for i in $nodes; do
t2=`onnode $i date +%s`
d=`expr $t2 - $t`
if [ $d -gt 30 -o $d -lt -30 ]; then
error "time on node $i differs by $d seconds"
fi
done
cat <<EOF
--------------------------------------------------------------------
Showing software versions
EOF
show_all "uname -a"
[ -x /bin/rpm ] && {
show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
}
[ -x /usr/bin/dpkg-query ] && {
show_all "/usr/bin/dpkg-query --show 'ctdb'"
show_all "/usr/bin/dpkg-query --show 'samba'"
#show_all "/usr/bin/dpkg-query --show 'gpfs'"
}
cat <<EOF
--------------------------------------------------------------------
Showing ctdb status and recent log entries
EOF
show_all "ctdb status; ctdb ip"
show_all "ctdb statistics"
show_all "ctdb uptime"
echo "Showing log.ctdb"
show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
echo "Showing log.ctdb"
show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
show_all "tail -200 /var/log/messages"
show_all "tail -200 /etc/ctdb/state/vacuum.log"
show_all "ls -lRs /var/ctdb"
show_all "ls -lRs /etc/ctdb"
cat <<EOF
--------------------------------------------------------------------
Showing system and process status
EOF
show_all "df"
show_all "df -i"
show_all "mount"
show_all "w"
show_all "ps axfwu"
show_all "dmesg"
show_all "/sbin/lspci"
show_all "dmidecode"
show_all "cat /proc/partitions"
show_all "cat /proc/cpuinfo"
show_all "cat /proc/scsi/scsi"
show_all "/sbin/ifconfig -a"
show_all "/sbin/ifconfig -a"
show_all "/sbin/ip addr list"
show_all "/sbin/route -n"
show_all "netstat -s"
show_all "free"
show_all "crontab -l"
show_all "sysctl -a"
show_all "iptables -L -n"
show_all "iptables -L -n -t nat"
show_all "/usr/sbin/rpcinfo -p"
show_all "/usr/sbin/showmount -a"
show_all "/usr/sbin/showmount -e"
show_all "/usr/sbin/nfsstat -v"
[ -x /sbin/multipath ] && {
show_all "/sbin/multipath -ll"
}
[ -x /sbin/chkconfig ] && {
show_all "/sbin/chkconfig --list"
}
[ -x /usr/sbin/getenforce ] && {
show_all "/usr/sbin/getenforce"
}
[ -d /proc/net/bonding ] && {
for f in /proc/net/bonding/*; do
show_all "cat $f"
done
}
[ -d /usr/lpp/mmfs ] && {
cat <<EOF
--------------------------------------------------------------------
Showing GPFS status and recent log entries
EOF
show_all "tail -100 /var/adm/ras/mmfs.log.latest"
show_all "/usr/lpp/mmfs/bin/mmlsconfig"
show_all "/usr/lpp/mmfs/bin/mmlsfs all"
show_all "/usr/lpp/mmfs/bin/mmlsnsd"
show_all "/usr/lpp/mmfs/bin/mmlsnsd -X"
show_all "/usr/lpp/mmfs/bin/mmfsadm dump version"
show_all "/usr/lpp/mmfs/bin/mmfsadm dump waiters"
show_all "/usr/lpp/mmfs/bin/mmlsmount all"
show_all "/usr/lpp/mmfs/bin/mmlsquota"
show_all "/usr/lpp/mmfs/bin/mmlscluster"
show_all "/usr/lpp/mmfs/bin/mmlsmgr"
devlist=`mmlsfs all|grep ^File.system.attributes | cut -d/ -f3 | cut -d: -f1`
for d in $devlist; do
show_all "mmdf $d"
show_all "mmlsdisk $d"
show_all "mmlsfileset $d"
show_all "mmlspolicy $d"
show_all "mmlssnapshot $d"
done
fslist=`mount|grep type.gpfs|awk '{print $1}'`
for fs in $fslist; do
show_all "/usr/lpp/mmfs/bin/mmlssnapshot $fs"
show_all "/usr/lpp/mmfs/bin/mmlsdisk $fs"
show_all "/usr/lpp/mmfs/bin/mmlsfileset $fs"
done
}
cat <<EOF
--------------------------------------------------------------------
Showing Samba status
EOF
show_all "smbstatus -n -B"
if $no_ads ; then
echo
echo "Skipping \"net ads testjoin\" as requested"
echo
else
show_all "net ads testjoin"
fi
show_all "net conf list"
show_all "lsof -n | grep smbd"
show_all "lsof -n | grep ctdbd"
show_all "netstat -tan"
if $no_ads ; then
echo
echo "Skipping \"net ads info\" as requested"
echo
else
show_all "net ads info"
fi
show_all "date"
show_all "smbclient -U% -L 127.0.0.1"
WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
show_all id "$WORKGROUP/Administrator"
show_all "wbinfo -p"
show_all "wbinfo --online-status"
show_all "smbd -b"
date
echo "Diagnostics finished with $NUM_ERRORS errors"
[ -r $ERRORS ] && {
cat $ERRORS
rm -f $ERRORS
}
exit $NUM_ERRORS