mirror of
https://github.com/samba-team/samba.git
synced 2025-02-24 13:57:43 +03:00
ctdb-scripts: Change statd-callout to be more scalable
Updating ctdb.tdb on each add-client, del-client and each delete during notify was too ambitious. Persistent transactions do not perform well enough to do this. Revert to having add-client and del-client create touch files. Each monitor event calls "statd-callout update" to convert touch files into ctdb.tdb records. Update testcases to do the "update" and add an extra test. Signed-off-by: Martin Schwenke <martin@meltin.net> Pair-programmed-with: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
This commit is contained in:
parent
032441d9a2
commit
500c6e194b
@ -221,6 +221,7 @@ case "$1" in
|
||||
} || exit $?
|
||||
|
||||
update_tickles 2049
|
||||
nfs_update_lock_info
|
||||
|
||||
# check that statd responds to rpc requests
|
||||
# if statd is not running we try to restart it
|
||||
|
@ -91,6 +91,7 @@ case "$1" in
|
||||
} || exit $?
|
||||
|
||||
update_tickles 2049
|
||||
nfs_update_lock_info
|
||||
|
||||
nfs_check_rpc_services
|
||||
|
||||
|
@ -690,6 +690,16 @@ get_tcp_connections_for_ip ()
|
||||
{print $4" "$5}'
|
||||
}
|
||||
|
||||
##################################################################
|
||||
# use statd-callout to update NFS lock info
|
||||
##################################################################
|
||||
nfs_update_lock_info ()
|
||||
{
|
||||
if [ -x "$CTDB_BASE/statd-callout" ] ; then
|
||||
"$CTDB_BASE/statd-callout" update
|
||||
fi
|
||||
}
|
||||
|
||||
########################################################
|
||||
# start/stop the Ganesha nfs service
|
||||
########################################################
|
||||
|
@ -30,7 +30,19 @@ loadconfig nfs
|
||||
nl="
|
||||
"
|
||||
|
||||
ctdb_setup_service_state_dir "statd-callout"
|
||||
|
||||
cd "$service_state_dir" || \
|
||||
die "Failed to change directory to \"${service_state_dir}\""
|
||||
|
||||
case "$1" in
|
||||
# Keep a single file to keep track of the last "add-client" or
|
||||
# "del-client'. These get pushed to ctdb.tdb during "update",
|
||||
# which will generally be run once each "monitor" cycle. In this
|
||||
# way we avoid scalability problems with flood of persistent
|
||||
# transactions after a "notify" when all the clients re-take their
|
||||
# locks.
|
||||
|
||||
add-client)
|
||||
# statd does not tell us to which IP the client connected so
|
||||
# we must add it to all the IPs that we serve
|
||||
@ -38,42 +50,47 @@ case "$1" in
|
||||
pnn=$(ctdb xpnn | sed -e 's/.*://')
|
||||
date=$(date '+%s')
|
||||
ctdb ip -X |
|
||||
tail -n +2 | {
|
||||
# This all needs to be in the end of the pipe so it
|
||||
# doesn't get lost
|
||||
items=""
|
||||
while IFS="|" read x sip node x ; do
|
||||
[ "$node" = "$pnn" ] || continue # not us
|
||||
key="statd-state@${sip}@${cip}"
|
||||
item="\"${key}\" \"${date}\""
|
||||
items="${items}${items:+${nl}}${item}"
|
||||
done
|
||||
if ! echo "$items" | ctdb ptrans "ctdb.tdb" ; then
|
||||
die "Failed to add clients"
|
||||
fi
|
||||
}
|
||||
tail -n +2 |
|
||||
while IFS="|" read x sip node x ; do
|
||||
[ "$node" = "$pnn" ] || continue # not us
|
||||
key="statd-state@${sip}@${cip}"
|
||||
echo "\"${key}\" \"${date}\"" >"$key"
|
||||
done
|
||||
;;
|
||||
del-client)
|
||||
|
||||
del-client)
|
||||
# statd does not tell us from which IP the client disconnected
|
||||
# so we must add it to all the IPs that we serve
|
||||
cip="$2"
|
||||
pnn=$(ctdb xpnn | sed -e 's/.*://')
|
||||
ctdb ip -X |
|
||||
tail -n +2 | {
|
||||
# This all needs to be in the end of the pipe so it
|
||||
# doesn't get lost
|
||||
items=""
|
||||
while IFS="|" read x sip node x ; do
|
||||
[ "$node" = "$pnn" ] || continue # not us
|
||||
key="statd-state@${sip}@${cip}"
|
||||
item="\"${key}\" \"\""
|
||||
items="${items}${items:+${nl}}${item}"
|
||||
done
|
||||
if ! echo "$items" | ctdb ptrans "ctdb.tdb" ; then
|
||||
die "Failed to delete clients"
|
||||
fi
|
||||
}
|
||||
tail -n +2 |
|
||||
while IFS="|" read x sip node x ; do
|
||||
[ "$node" = "$pnn" ] || continue # not us
|
||||
key="statd-state@${sip}@${cip}"
|
||||
echo "\"${key}\" \"\"" >"$key"
|
||||
done
|
||||
;;
|
||||
|
||||
update)
|
||||
files=$(echo statd-state@*)
|
||||
if [ "$files" = "statd-state@*" ] ; then
|
||||
# No files!
|
||||
exit 0
|
||||
fi
|
||||
# Filter out lines for any IP addresses that are not currently
|
||||
# hosted public IP addresses.
|
||||
pnn=$(ctdb xpnn | sed -e 's/.*://')
|
||||
ctdb_ips=$(ctdb ip | tail -n +2)
|
||||
sed_expr=$(echo "$ctdb_ips" |
|
||||
awk -v pnn=$pnn 'pnn == $2 { \
|
||||
ip = $1; gsub(/\./, "\\.", ip); \
|
||||
printf "/statd-state@%s@/p\n", ip }')
|
||||
if cat $files | sed -n "$sed_expr" | ctdb ptrans "ctdb.tdb" ; then
|
||||
rm $files
|
||||
fi
|
||||
;;
|
||||
|
||||
notify)
|
||||
# we must restart the lockmanager (on all nodes) so that we get
|
||||
# a clusterwide grace period (so other clients dont take out
|
||||
@ -144,7 +161,8 @@ case "$1" in
|
||||
# Construct a sed expression to take catdb output and produce pairs of:
|
||||
# server-IP client-IP
|
||||
# but only for the server-IPs that are hosted on this node.
|
||||
sed_expr=$(ctdb ip | tail -n +2 |
|
||||
ctdb_all_ips=$(ctdb ip -n all | tail -n +2)
|
||||
sed_expr=$(echo "$ctdb_all_ips" |
|
||||
awk -v pnn=$pnn 'pnn == $2 { \
|
||||
ip = $1; gsub(/\./, "\\.", ip); \
|
||||
printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }')
|
||||
@ -152,34 +170,42 @@ case "$1" in
|
||||
statd_state=$(ctdb catdb ctdb.tdb | sed -n "$sed_expr" | sort)
|
||||
[ -n "$statd_state" ] || exit 0
|
||||
|
||||
# The following is dangerous if this script times out before
|
||||
# all of the smnotify commands are run. Revert to individual
|
||||
# pdelete commands for now and consider optimising smnotify to
|
||||
# read all the data from stdin and then run it in the
|
||||
# background.
|
||||
#
|
||||
# Delete all the items from the TDB
|
||||
#if ! echo "$statd_state" | \
|
||||
# awk '{ printf "\"statd-state@%s@%s\" \"\"\n", $1, $2 }') | \
|
||||
# ctdb ptrans ctdb.tdb ; then
|
||||
|
||||
# die "Yikes!"
|
||||
#fi
|
||||
|
||||
prev=""
|
||||
echo "$statd_state" |
|
||||
while read sip cip ; do
|
||||
# Delete the entry from the DB
|
||||
ctdb pdelete ctdb.tdb "statd-state@${sip}@${cip}"
|
||||
# Reset stateval for each serverip
|
||||
[ "$sip" = "$prev" ] || stateval="$state_even"
|
||||
# Send notifies for server shutdown
|
||||
smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
|
||||
smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
|
||||
# Send notifies for server startup
|
||||
stateval=$(($stateval + 1))
|
||||
smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
|
||||
smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
|
||||
done
|
||||
echo "$statd_state" | {
|
||||
# This all needs to be in the same command group at the
|
||||
# end of the pipe so it doesn't get lost when the loop
|
||||
# completes.
|
||||
items=""
|
||||
while read sip cip ; do
|
||||
# Collect item to delete from the DB
|
||||
key="statd-state@${sip}@${cip}"
|
||||
item="\"${key}\" \"\""
|
||||
items="${items}${items:+${nl}}${item}"
|
||||
|
||||
# NOTE: Consider optimising smnotify to read all the
|
||||
# data from stdin and then run it in the background.
|
||||
|
||||
# Reset stateval for each serverip
|
||||
[ "$sip" = "$prev" ] || stateval="$state_even"
|
||||
# Send notifies for server shutdown
|
||||
smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
|
||||
smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
|
||||
# Send notifies for server startup
|
||||
stateval=$(($stateval + 1))
|
||||
smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
|
||||
smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
|
||||
done
|
||||
|
||||
echo "$items" | ctdb ptrans "ctdb.tdb"
|
||||
}
|
||||
|
||||
# Remove any stale touch files (i.e. for IPs not currently
|
||||
# hosted on this node and created since the last "update").
|
||||
# There's nothing else we can do with them at this stage.
|
||||
echo "$ctdb_all_ips" |
|
||||
awk -v pnn=$pnn 'pnn != $2 { print $1 }' |
|
||||
while read sip ; do
|
||||
rm -f "statd-state@${sip}@"*
|
||||
done
|
||||
;;
|
||||
esac
|
||||
|
@ -10,5 +10,6 @@ FAKE_DATE_OUTPUT="1234565789"
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
check_ctdb_tdb_statd_state "192.168.123.45"
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "2 x add-client"
|
||||
define_test "2 x add-client, update"
|
||||
|
||||
setup_ctdb
|
||||
|
||||
@ -11,5 +11,6 @@ FAKE_DATE_OUTPUT="1234565789"
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "add-client" "192.168.123.46"
|
||||
simple_test_event "update"
|
||||
|
||||
check_ctdb_tdb_statd_state "192.168.123.45" "192.168.123.46"
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "add-client, del-client"
|
||||
define_test "add-client, update, del-client, update"
|
||||
|
||||
setup_ctdb
|
||||
|
||||
@ -10,6 +10,9 @@ FAKE_DATE_OUTPUT="1234565789"
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
simple_test_event "del-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
check_ctdb_tdb_statd_state
|
||||
|
@ -10,6 +10,7 @@ FAKE_DATE_OUTPUT="1234565789"
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
check_ctdb_tdb_statd_state "192.168.123.45"
|
||||
|
||||
|
@ -10,11 +10,13 @@ FAKE_DATE_OUTPUT="1234565789"
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
FAKE_CTDB_PNN=1
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.46"
|
||||
simple_test_event "update"
|
||||
|
||||
FAKE_CTDB_PNN=0
|
||||
|
||||
|
@ -10,11 +10,13 @@ FAKE_DATE_OUTPUT="1234565789"
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
FAKE_CTDB_PNN=1
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.46"
|
||||
simple_test_event "update"
|
||||
|
||||
FAKE_CTDB_PNN=0
|
||||
|
||||
|
16
ctdb/tests/eventscripts/statd-callout.007.sh
Executable file
16
ctdb/tests/eventscripts/statd-callout.007.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/bin/sh
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "add-client, del-client, update"
|
||||
|
||||
setup_ctdb
|
||||
|
||||
FAKE_DATE_OUTPUT="1234565789"
|
||||
|
||||
ok_null
|
||||
simple_test_event "add-client" "192.168.123.45"
|
||||
simple_test_event "del-client" "192.168.123.45"
|
||||
simple_test_event "update"
|
||||
|
||||
check_ctdb_tdb_statd_state
|
Loading…
x
Reference in New Issue
Block a user