mirror of
https://github.com/samba-team/samba.git
synced 2025-02-04 17:47:26 +03:00
0f18859a6c
In some contexts ctdb_diagnostics generates too many errors when it is run on heterogeneous and machine-configured clusters. In some clusters some nodes are expected to be differently configured and also machine-generated configured files can have comments containing timestamps. This adds some command-line options that can be used to reduce the number of errors reported: -n <nodes> Comma separated list of nodes to operate on -c Ignore comment lines (starting with '#') in file comparisons -w Ignore whitespace in file comparisons --no-ads Do not use commands that assume an Active Directory Server The -n option simply allows ctdb_diagnostics to operate on a subset of nodes, avoiding file comparisons with and data collection on nodes that are differently configured. For file comparisons, instead of showing each file on the current node and then comparing other nodes to that file, the file from the first (available or requested) nodes is shown and then other nodes are compared to that. That has resulted in changes in output - that is, ctdb diagnostics no longer prints messages referencing the current node. -c and -w are used to weaken comparisons between configuration files. --no-ads can be used to avoid running ADS-specific commands if a cluster uses LDAP (or other non-ADS) configuration. This also fixes a number of bugs in related code: * A call to onnode was losing the >> NODE ... << lines because they now go to stderr. This was changed in onnode long ago but ctdb_diagnostics was never updated to match. * ctdb_diagnostics was counting lines in /etc/ctdb/nodes to determine what nodes to operate on. For some time the nodes file has supported syntax that makes this invalid. "ctdb listnodes -Y" is now used to list available nodes. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 36c8244a0f68c7c9bbee40982f230e9d14d3c0ea)
324 lines
7.9 KiB
Bash
Executable File
324 lines
7.9 KiB
Bash
Executable File
#!/bin/sh
|
|
# a script to test the basic setup of a CTDB/Samba install
|
|
# tridge@samba.org September 2007
|
|
# martin@meltin.net August 2010
|
|
|
|
usage ()
|
|
{
|
|
cat >&2 <<EOF
|
|
Usage: ctdb_diagnostics [OPTION] ...
|
|
options:
|
|
-n <nodes> Comma separated list of nodes to operate on
|
|
-c Ignore comment lines (starting with '#') in file comparisons
|
|
-w Ignore whitespace in file comparisons
|
|
--no-ads Do not use commands that assume an Active Directory Server
|
|
EOF
|
|
exit 1
|
|
|
|
}
|
|
|
|
nodes=$(ctdb listnodes -Y | cut -d: -f2)
|
|
diff_opts=
|
|
no_ads=false
|
|
|
|
parse_options ()
|
|
{
|
|
temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
|
|
|
|
[ $? != 0 ] && usage
|
|
|
|
eval set -- "$temp"
|
|
|
|
while true ; do
|
|
case "$1" in
|
|
-n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
|
|
-c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
|
|
-w) diff_opts="${diff_opts} -w" ; shift ;;
|
|
--no-ads) no_ads=true ; shift ;;
|
|
--) shift ; break ;;
|
|
-h|--help|*) usage ;;
|
|
esac
|
|
done
|
|
|
|
[ $# -ne 0 ] && usage
|
|
}
|
|
|
|
parse_options "$@"
|
|
|
|
nodes_comma=$(echo $nodes | sed -e 's@[[:space:]]@,@g')
|
|
|
|
PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
|
|
|
|
# list of config files that must exist and that we check are the same
|
|
# on the nodes
|
|
CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
|
|
|
|
# list of config files that may exist and should be checked that they
|
|
# are the same on the nodes
|
|
CONFIG_FILES_MAY="/etc/ctdb/public_addresses /etc/ctdb/static-routes"
|
|
|
|
2>&1
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
ctdb_diagnostics starting. This script will gather information about
|
|
your ctdb cluster. You should send the output of this script along
|
|
with any ctdb or clustered Samba bug reports.
|
|
--------------------------------------------------------------------
|
|
EOF
|
|
|
|
date
|
|
|
|
error() {
|
|
msg="$1"
|
|
echo "ERROR: $msg"
|
|
NUM_ERRORS=`expr $NUM_ERRORS + 1`
|
|
echo " ERROR[$NUM_ERRORS]: $msg" >> $ERRORS
|
|
}
|
|
|
|
show_file() {
|
|
fname="$1"
|
|
echo " ================================"
|
|
echo " File: $fname"
|
|
echo " `ls -l $fname 2>&1`"
|
|
cat "$fname" 2>&1 | sed 's/^/ /'
|
|
echo " ================================"
|
|
}
|
|
|
|
show_all() {
|
|
echo "running $1 on nodes $nodes_comma"
|
|
onnode $nodes_comma "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
|
|
}
|
|
|
|
show_and_compare_files () {
|
|
|
|
fmt="$1" ; shift
|
|
|
|
for f ; do
|
|
|
|
first=true
|
|
|
|
for n in $nodes ; do
|
|
|
|
if $first ; then
|
|
onnode $n [ -r "$f" ] || {
|
|
msg=$(printf "$fmt" "$f" $n)
|
|
error "$msg"
|
|
continue 2;
|
|
}
|
|
|
|
fstf=/tmp/`basename $f`.node$n
|
|
onnode $n cat $f > $fstf 2>&1
|
|
|
|
echo " ================================"
|
|
echo " File (on node $n): $f"
|
|
echo " `onnode $n ls -l $f 2>&1`"
|
|
cat "$fstf" | sed 's/^/ /'
|
|
echo " ================================"
|
|
first=false
|
|
else
|
|
echo "Testing for same config file $f on node $n"
|
|
tmpf=/tmp/`basename $f`.node$n
|
|
onnode $n cat $f > $tmpf 2>&1
|
|
diff $diff_opts $fstf $tmpf >/dev/null 2>&1 || {
|
|
error "File $f is different on node $n"
|
|
diff -u $diff_opts $fstf $tmpf
|
|
}
|
|
rm -f $tmpf
|
|
fi
|
|
done
|
|
|
|
rm -f $fstf
|
|
done
|
|
}
|
|
|
|
ERRORS="/tmp/diag_err.$$"
|
|
NUM_ERRORS=0
|
|
|
|
cat <<EOF
|
|
Diagnosis started on these nodes:
|
|
$nodes_comma
|
|
|
|
For reference, here is the nodes file on the current node...
|
|
EOF
|
|
show_file /etc/ctdb/nodes
|
|
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Comping critical config files on nodes $nodes_comma
|
|
EOF
|
|
|
|
show_and_compare_files \
|
|
"%s is missing on node %d" \
|
|
$CONFIG_FILES_MUST
|
|
|
|
show_and_compare_files \
|
|
"Optional file %s is not present on node %d" \
|
|
$CONFIG_FILES_MAY
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Checking for clock drift
|
|
EOF
|
|
t=`date +%s`
|
|
for i in $nodes; do
|
|
t2=`onnode $i date +%s`
|
|
d=`expr $t2 - $t`
|
|
if [ $d -gt 30 -o $d -lt -30 ]; then
|
|
error "time on node $i differs by $d seconds"
|
|
fi
|
|
done
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Showing software versions
|
|
EOF
|
|
show_all "uname -a"
|
|
[ -x /bin/rpm ] && {
|
|
show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
|
|
}
|
|
[ -x /usr/bin/dpkg-query ] && {
|
|
show_all "/usr/bin/dpkg-query --show 'ctdb'"
|
|
show_all "/usr/bin/dpkg-query --show 'samba'"
|
|
#show_all "/usr/bin/dpkg-query --show 'gpfs'"
|
|
}
|
|
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Showing ctdb status and recent log entries
|
|
EOF
|
|
show_all "ctdb status; ctdb ip"
|
|
show_all "ctdb statistics"
|
|
show_all "ctdb uptime"
|
|
|
|
echo "Showing log.ctdb"
|
|
show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
|
|
|
|
echo "Showing log.ctdb"
|
|
show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
|
|
|
|
show_all "tail -200 /var/log/messages"
|
|
show_all "tail -200 /etc/ctdb/state/vacuum.log"
|
|
show_all "ls -lRs /var/ctdb"
|
|
show_all "ls -lRs /etc/ctdb"
|
|
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Showing system and process status
|
|
EOF
|
|
show_all "df"
|
|
show_all "df -i"
|
|
show_all "mount"
|
|
show_all "w"
|
|
show_all "ps axfwu"
|
|
show_all "dmesg"
|
|
show_all "/sbin/lspci"
|
|
show_all "dmidecode"
|
|
show_all "cat /proc/partitions"
|
|
show_all "cat /proc/cpuinfo"
|
|
show_all "cat /proc/scsi/scsi"
|
|
show_all "/sbin/ifconfig -a"
|
|
show_all "/sbin/ifconfig -a"
|
|
show_all "/sbin/ip addr list"
|
|
show_all "/sbin/route -n"
|
|
show_all "netstat -s"
|
|
show_all "free"
|
|
show_all "crontab -l"
|
|
show_all "sysctl -a"
|
|
show_all "iptables -L -n"
|
|
show_all "iptables -L -n -t nat"
|
|
show_all "/usr/sbin/rpcinfo -p"
|
|
show_all "/usr/sbin/showmount -a"
|
|
show_all "/usr/sbin/showmount -e"
|
|
show_all "/usr/sbin/nfsstat -v"
|
|
[ -x /sbin/multipath ] && {
|
|
show_all "/sbin/multipath -ll"
|
|
}
|
|
[ -x /sbin/chkconfig ] && {
|
|
show_all "/sbin/chkconfig --list"
|
|
}
|
|
[ -x /usr/sbin/getenforce ] && {
|
|
show_all "/usr/sbin/getenforce"
|
|
}
|
|
[ -d /proc/net/bonding ] && {
|
|
for f in /proc/net/bonding/*; do
|
|
show_all "cat $f"
|
|
done
|
|
}
|
|
|
|
[ -d /usr/lpp/mmfs ] && {
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Showing GPFS status and recent log entries
|
|
EOF
|
|
show_all "tail -100 /var/adm/ras/mmfs.log.latest"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsconfig"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsfs all"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsnsd"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsnsd -X"
|
|
show_all "/usr/lpp/mmfs/bin/mmfsadm dump version"
|
|
show_all "/usr/lpp/mmfs/bin/mmfsadm dump waiters"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsmount all"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsquota"
|
|
show_all "/usr/lpp/mmfs/bin/mmlscluster"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsmgr"
|
|
devlist=`mmlsfs all|grep ^File.system.attributes | cut -d/ -f3 | cut -d: -f1`
|
|
for d in $devlist; do
|
|
show_all "mmdf $d"
|
|
show_all "mmlsdisk $d"
|
|
show_all "mmlsfileset $d"
|
|
show_all "mmlspolicy $d"
|
|
show_all "mmlssnapshot $d"
|
|
done
|
|
fslist=`mount|grep type.gpfs|awk '{print $1}'`
|
|
for fs in $fslist; do
|
|
show_all "/usr/lpp/mmfs/bin/mmlssnapshot $fs"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsdisk $fs"
|
|
show_all "/usr/lpp/mmfs/bin/mmlsfileset $fs"
|
|
done
|
|
}
|
|
|
|
cat <<EOF
|
|
--------------------------------------------------------------------
|
|
Showing Samba status
|
|
EOF
|
|
show_all "smbstatus -n -B"
|
|
if $no_ads ; then
|
|
echo
|
|
echo "Skipping \"net ads testjoin\" as requested"
|
|
echo
|
|
else
|
|
show_all "net ads testjoin"
|
|
fi
|
|
show_all "net conf list"
|
|
show_all "lsof -n | grep smbd"
|
|
show_all "lsof -n | grep ctdbd"
|
|
show_all "netstat -tan"
|
|
if $no_ads ; then
|
|
echo
|
|
echo "Skipping \"net ads info\" as requested"
|
|
echo
|
|
else
|
|
show_all "net ads info"
|
|
fi
|
|
show_all "date"
|
|
show_all "smbclient -U% -L 127.0.0.1"
|
|
WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
|
|
show_all id "$WORKGROUP/Administrator"
|
|
show_all "wbinfo -p"
|
|
show_all "wbinfo --online-status"
|
|
show_all "smbd -b"
|
|
|
|
date
|
|
echo "Diagnostics finished with $NUM_ERRORS errors"
|
|
|
|
[ -r $ERRORS ] && {
|
|
cat $ERRORS
|
|
rm -f $ERRORS
|
|
}
|
|
exit $NUM_ERRORS
|
|
|