2007-09-05 08:20:34 +04:00
#!/bin/sh
# a script to test the basic setup of a CTDB/Samba install
# tridge@samba.org September 2007
PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
2007-03-10 05:45:38 +03:00
# list of config files that must exist and that we check are the same
# on all nodes
CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
# list of config files that may exist and should be checked that they
# are the same on all nodes
CONFIG_FILES_MAY="/etc/ctdb/public_addresses /etc/ctdb/static-routes"
2007-09-05 08:20:34 +04:00
2>&1
cat <<EOF
--------------------------------------------------------------------
ctdb_diagnostics starting. This script will gather information about
your ctdb cluster. You should send the output of this script along
with any ctdb or clustered Samba bug reports.
--------------------------------------------------------------------
EOF
date
error() {
msg="$1"
echo "ERROR: $msg"
NUM_ERRORS=`expr $NUM_ERRORS + 1`
echo " ERROR[$NUM_ERRORS]: $msg" >> $ERRORS
}
show_file() {
fname="$1"
echo " ================================"
echo " File: $fname"
2007-10-30 02:18:52 +03:00
echo " `ls -l $fname 2>&1`"
cat "$fname" 2>&1 | sed 's/^/ /'
2007-09-05 08:20:34 +04:00
echo " ================================"
}
show_all() {
echo "running $1 on all nodes"
onnode all "hostname; date; $1 2>&1 | sed 's/^/ /'"
}
ERRORS="/tmp/diag_err.$$"
NUM_NODES=`wc -l < /etc/ctdb/nodes`
MAX_NODE=`expr $NUM_NODES - 1`
NUM_ERRORS=0
cat <<EOF
Diagnosis started on a $NUM_NODES node cluster. The following node list will be used:
EOF
show_file /etc/ctdb/nodes
cat <<EOF
--------------------------------------------------------------------
Comping critical config files on all nodes
EOF
2007-03-10 05:45:38 +03:00
for f in $CONFIG_FILES_MUST; do
[ -r "$f" ] || {
error "$f is missing on this node"
continue;
}
show_file $f
for i in `seq 0 $MAX_NODE`; do
echo "Testing for same config file $f on node $i"
tmpf=/tmp/`basename $f`.node$i
onnode $i cat $f > $tmpf 2>&1
cmp $f $tmpf 2>&1 || {
error "File $f is different on node $i"
diff -u $f $tmpf
}
rm -f $tmpf
done
done
for f in $CONFIG_FILES_MAY; do
2007-09-05 08:20:34 +04:00
[ -r "$f" ] || {
2007-03-10 05:45:38 +03:00
echo "Optional file $f is not present on local node"
2007-10-30 02:18:52 +03:00
continue;
2007-09-05 08:20:34 +04:00
}
show_file $f
for i in `seq 0 $MAX_NODE`; do
echo "Testing for same config file $f on node $i"
tmpf=/tmp/`basename $f`.node$i
2007-10-30 02:18:52 +03:00
onnode $i cat $f > $tmpf 2>&1
cmp $f $tmpf 2>&1 || {
2007-09-05 08:20:34 +04:00
error "File $f is different on node $i"
diff -u $f $tmpf
}
rm -f $tmpf
done
done
cat <<EOF
--------------------------------------------------------------------
Checking for clock drift
EOF
t=`date +%s`
for i in `seq 0 $MAX_NODE`; do
t2=`onnode $i date +%s`
d=`expr $t2 - $t`
if [ $d -gt 30 -o $d -lt -30 ]; then
error "time on node $i differs by $d seconds"
fi
done
cat <<EOF
--------------------------------------------------------------------
Showing software versions
EOF
show_all "uname -a"
[ -x /bin/rpm ] && {
show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
}
2008-10-13 01:21:20 +04:00
[ -x /usr/bin/dpkg-query ] && {
show_all "/usr/bin/dpkg-query --show 'ctdb'"
show_all "/usr/bin/dpkg-query --show 'samba'"
#show_all "/usr/bin/dpkg-query --show 'gpfs'"
}
2007-09-05 08:20:34 +04:00
cat <<EOF
--------------------------------------------------------------------
Showing ctdb status and recent log entries
EOF
show_all "ctdb status; ctdb ip"
show_all "ctdb statistics"
2008-09-17 15:00:04 +04:00
show_all "ctdb uptime"
2007-09-05 08:20:34 +04:00
echo "Showing log.ctdb"
2009-05-06 21:01:58 +04:00
show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
2007-09-05 08:20:34 +04:00
echo "Showing log.ctdb"
2009-05-06 21:01:58 +04:00
show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
2007-09-05 08:20:34 +04:00
2008-09-17 15:00:04 +04:00
show_all "tail -200 /var/log/messages"
show_all "tail -200 /etc/ctdb/state/vacuum.log"
show_all "ls -lRs /var/ctdb"
show_all "ls -lRs /etc/ctdb"
2007-09-05 08:20:34 +04:00
cat <<EOF
--------------------------------------------------------------------
2007-09-10 05:27:07 +04:00
Showing system and process status
2007-09-05 08:20:34 +04:00
EOF
2007-09-19 05:46:11 +04:00
show_all "df"
show_all "df -i"
show_all "mount"
2007-09-17 09:31:33 +04:00
show_all "w"
2008-09-17 15:00:04 +04:00
show_all "ps axfwu"
2007-09-05 08:20:34 +04:00
show_all "dmesg"
show_all "/sbin/lspci"
2008-09-17 15:00:04 +04:00
show_all "dmidecode"
2007-09-17 09:31:33 +04:00
show_all "cat /proc/partitions"
show_all "cat /proc/cpuinfo"
show_all "cat /proc/scsi/scsi"
2007-09-05 08:20:34 +04:00
show_all "/sbin/ifconfig -a"
2008-09-17 15:00:04 +04:00
show_all "/sbin/ifconfig -a"
2007-09-05 08:20:34 +04:00
show_all "/sbin/ip addr list"
show_all "/sbin/route -n"
2007-09-17 09:31:33 +04:00
show_all "netstat -s"
2007-10-30 02:18:52 +03:00
show_all "free"
2007-09-10 05:27:07 +04:00
show_all "crontab -l"
show_all "sysctl -a"
2009-10-06 18:16:13 +04:00
show_all "iptables -L -n"
show_all "iptables -L -n -t nat"
2007-09-17 09:31:33 +04:00
show_all "/usr/sbin/rpcinfo -p"
show_all "/usr/sbin/showmount -a"
show_all "/usr/sbin/showmount -e"
show_all "/usr/sbin/nfsstat -v"
[ -x /sbin/multipath ] && {
2007-10-16 14:13:28 +04:00
show_all "/sbin/multipath -ll"
2007-09-17 09:31:33 +04:00
}
[ -x /sbin/chkconfig ] && {
show_all "/sbin/chkconfig --list"
}
[ -x /usr/sbin/getenforce ] && {
show_all "/usr/sbin/getenforce"
}
2007-10-30 02:18:52 +03:00
[ -d /proc/net/bonding ] && {
for f in /proc/net/bonding/*; do
show_all "cat $f"
done
}
2007-09-05 08:20:34 +04:00
[ -d /usr/lpp/mmfs ] && {
cat <<EOF
--------------------------------------------------------------------
Showing GPFS status and recent log entries
EOF
show_all "tail -100 /var/adm/ras/mmfs.log.latest"
show_all "/usr/lpp/mmfs/bin/mmlsconfig"
show_all "/usr/lpp/mmfs/bin/mmlsfs all"
show_all "/usr/lpp/mmfs/bin/mmlsnsd"
2008-01-07 06:31:13 +03:00
show_all "/usr/lpp/mmfs/bin/mmlsnsd -X"
2007-09-05 08:20:34 +04:00
show_all "/usr/lpp/mmfs/bin/mmfsadm dump version"
show_all "/usr/lpp/mmfs/bin/mmfsadm dump waiters"
show_all "/usr/lpp/mmfs/bin/mmlsmount all"
show_all "/usr/lpp/mmfs/bin/mmlsquota"
show_all "/usr/lpp/mmfs/bin/mmlscluster"
show_all "/usr/lpp/mmfs/bin/mmlsmgr"
2008-10-09 11:45:12 +04:00
devlist=`mmlsfs all|grep ^File.system.attributes | cut -d/ -f3 | cut -d: -f1`
for d in $devlist; do
2009-03-12 04:33:19 +03:00
show_all "mmdf $d"
show_all "mmlsdisk $d"
show_all "mmlsfileset $d"
show_all "mmlspolicy $d"
show_all "mmlssnapshot $d"
2008-10-09 11:45:12 +04:00
done
2007-09-05 08:20:34 +04:00
fslist=`mount|grep type.gpfs|awk '{print $1}'`
for fs in $fslist; do
show_all "/usr/lpp/mmfs/bin/mmlssnapshot $fs"
show_all "/usr/lpp/mmfs/bin/mmlsdisk $fs"
show_all "/usr/lpp/mmfs/bin/mmlsfileset $fs"
done
}
cat <<EOF
--------------------------------------------------------------------
Showing Samba status
EOF
show_all "smbstatus -n -B"
show_all "net ads testjoin"
2007-03-10 06:10:21 +03:00
show_all "net conf list"
2007-09-05 08:20:34 +04:00
show_all "lsof -n | grep smbd"
2008-09-17 15:00:04 +04:00
show_all "lsof -n | grep ctdbd"
2007-09-05 08:20:34 +04:00
show_all "netstat -tan"
show_all "net ads info"
show_all "date"
show_all "smbclient -U% -L 127.0.0.1"
WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
show_all id "$WORKGROUP/Administrator"
show_all "wbinfo -p"
2009-03-19 02:43:57 +03:00
show_all "wbinfo --online-status"
2007-09-05 08:20:34 +04:00
show_all "smbd -b"
date
echo "Diagnostics finished with $NUM_ERRORS errors"
[ -r $ERRORS ] && {
cat $ERRORS
rm -f $ERRORS
}
exit $NUM_ERRORS