mirror of
https://github.com/samba-team/samba.git
synced 2025-01-06 13:18:07 +03:00
86cdaf5a2e
This database isn't use throughout CTDB, so name the it more specifically. Note that this might cause locks to be lost during upgrade to the first version containing this change. For testing, a different name is chosen to exercise related functionality. Signed-off-by: Martin Schwenke <mschwenke@ddn.com> Reviewed-by: Amitay Isaacs <amitay@gmail.com> Autobuild-User(master): Amitay Isaacs <amitay@samba.org> Autobuild-Date(master): Fri Dec 13 15:01:10 UTC 2024 on atb-devel-224
464 lines
11 KiB
Bash
Executable File
464 lines
11 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
# statd must be configured to use statd_callout, CTDB's binary
|
|
# counterpart to this script, as its availability call-out.
|
|
#
|
|
# Modern NFS utils versions use /etc/nfs.conf:
|
|
#
|
|
# [statd]
|
|
# name = mycluster
|
|
# ha-callout = /usr/local/libexec/ctdb/statd_callout
|
|
#
|
|
# Older Linux versions may use something like the following...
|
|
#
|
|
# /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian):
|
|
# STATD_HOSTNAME="mycluster -H /usr/local/libexec/ctdb/statd_callout"
|
|
#
|
|
# If using Linux kernel NFS then the following should also be set in
|
|
# /etc/nfs.conf:
|
|
#
|
|
# [sm-notify]
|
|
# lift-grace = n
|
|
#
|
|
# See sm-notify(8) for details. This doesn't matter when using
|
|
# NFS-Ganesha because sm-notify's attempt to lift grace will fail
|
|
# silently if /proc/fs/lockd/nlm_end_grace is not found.
|
|
#
|
|
|
|
if [ -z "$CTDB_BASE" ]; then
|
|
export CTDB_BASE="/usr/local/etc/ctdb"
|
|
fi
|
|
|
|
. "${CTDB_BASE}/functions"
|
|
|
|
# Overwrite this so we get some logging
|
|
die()
|
|
{
|
|
script_log "statd_callout_helper" "$@"
|
|
exit 1
|
|
}
|
|
|
|
load_script_options "service" "60.nfs"
|
|
|
|
############################################################
|
|
|
|
ctdb_setup_state_dir "service" "nfs"
|
|
|
|
find_statd_sm_dir()
|
|
{
|
|
if [ -n "$CTDB_TEST_MODE" ]; then
|
|
_f="${CTDB_TEST_TMP_DIR}/sm"
|
|
mkdir -p "$_f" "${_f}.bak"
|
|
echo "$_f"
|
|
return
|
|
fi
|
|
|
|
for _sm_dir in /var/lib/nfs/statd/sm /var/lib/nfs/sm; do
|
|
if [ -d "$_sm_dir" ]; then
|
|
echo "$_sm_dir"
|
|
break
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Ensure the state directory exists and can be written when called as
|
|
# a non-root user. Assume the user to run as is the owner of the
|
|
# system statd sm directory, since both rpc.statd and sm-notify run as
|
|
# this directory's owner, so it can read and modify the directory.
|
|
create_add_del_client_dir()
|
|
{
|
|
_dir="$1"
|
|
|
|
if [ ! -d "$_dir" ]; then
|
|
mkdir -p "$_dir" || die "Failed to create directory \"${_dir}\""
|
|
ref=$(find_statd_sm_dir)
|
|
[ -n "$ref" ] || die "Failed to find statd sm directory"
|
|
chown --reference="$ref" "$_dir"
|
|
fi
|
|
}
|
|
|
|
# script_state_dir set by ctdb_setup_state_dir()
|
|
# shellcheck disable=SC2154
|
|
statd_callout_state_dir="${script_state_dir}/statd_callout"
|
|
|
|
# Set default value, if necessary
|
|
: "${CTDB_STATD_CALLOUT_SHARED_STORAGE:=persistent_db}"
|
|
|
|
statd_callout_mode="${CTDB_STATD_CALLOUT_SHARED_STORAGE%%:*}"
|
|
statd_callout_location="${CTDB_STATD_CALLOUT_SHARED_STORAGE#*:}"
|
|
# If not given then mode determines the default location
|
|
if [ "$statd_callout_location" = "$CTDB_STATD_CALLOUT_SHARED_STORAGE" ]; then
|
|
statd_callout_location=""
|
|
fi
|
|
|
|
case "$statd_callout_mode" in
|
|
persistent_db)
|
|
statd_callout_db="${statd_callout_location:-ctdb_statd_callout.tdb}"
|
|
statd_callout_queue_dir="${statd_callout_state_dir}/queue"
|
|
;;
|
|
shared_dir)
|
|
statd_callout_shared_dir="${statd_callout_location:-statd}"
|
|
case "$statd_callout_shared_dir" in
|
|
/*)
|
|
:
|
|
;;
|
|
*)
|
|
if [ -z "$CTDB_NFS_SHARED_STATE_DIR" ]; then
|
|
die "CTDB_NFS_SHARED_STATE_DIR is not set"
|
|
fi
|
|
t="${CTDB_NFS_SHARED_STATE_DIR}/${statd_callout_shared_dir}"
|
|
statd_callout_shared_dir="$t"
|
|
;;
|
|
esac
|
|
|
|
if [ -n "$CTDB_TEST_MODE" ]; then
|
|
t="${CTDB_TEST_TMP_DIR}${statd_callout_shared_dir}"
|
|
statd_callout_shared_dir="$t"
|
|
fi
|
|
;;
|
|
none)
|
|
:
|
|
;;
|
|
*)
|
|
mode="$statd_callout_mode"
|
|
die "error: unknown CTDB_STATD_CALLOUT_SHARED_STORAGE mode ${mode}"
|
|
;;
|
|
esac
|
|
|
|
############################################################
|
|
|
|
# Read pairs of:
|
|
# server-IP client-IP
|
|
# from stdin and send associated SM_NOTIFY packets.
|
|
send_notifies()
|
|
{
|
|
# State must monotonically increase, across the entire
|
|
# cluster. Use seconds since epoch and assume the time is in
|
|
# sync across nodes. Even numbers mean service is shut down,
|
|
# odd numbers mean service is up. However, sm-notify always
|
|
# reads the state and converts it to odd (if necessary, by
|
|
# adding 1 when it is even) because it only sends "up"
|
|
# notifications. Note that there is a 2038 issue here but we
|
|
# will get to that later.
|
|
_state=$(date '+%s')
|
|
|
|
_helper="${CTDB_HELPER_BINDIR}/ctdb_smnotify_helper"
|
|
|
|
_notify_dir="${statd_callout_state_dir}/sm-notify"
|
|
mkdir -p "$_notify_dir"
|
|
|
|
while read -r _sip _cip; do
|
|
# Create a directory per server IP containing a file
|
|
# for each client IP
|
|
mkdir -p \
|
|
"${_notify_dir}/${_sip}/sm" \
|
|
"${_notify_dir}/${_sip}/sm.bak"
|
|
|
|
_out="${_notify_dir}/${_sip}/sm/${_cip}"
|
|
"$_helper" "monitor" "$_cip" "$_sip" >"$_out"
|
|
done
|
|
|
|
# Send notifications for server startup
|
|
_ref=$(find_statd_sm_dir)
|
|
for _sip_dir in "$_notify_dir"/*; do
|
|
if [ "$_sip_dir" = "${_notify_dir}/*" ]; then
|
|
break
|
|
fi
|
|
|
|
_sip="${_sip_dir##*/}" # basename
|
|
|
|
# Write the state as a host order 32-bit integer. See
|
|
# note at top of function about state.
|
|
_out="${_sip_dir}/state"
|
|
"$_helper" "state" "$_state" >"$_out"
|
|
|
|
# The ownership of the directory and contents should
|
|
# match the system's statd sm directory, so that
|
|
# sm-notify drops privileges and switches to run as
|
|
# the directory owner.
|
|
chown -R --reference="$_ref" "$_sip_dir"
|
|
timeout 10 sm-notify -d -f -m 0 -n -P "$_sip_dir" -v "$_sip"
|
|
|
|
rm -rf "$_sip_dir"
|
|
done
|
|
}
|
|
|
|
############################################################
|
|
|
|
# Use file/key names of the form statd-state@<server-IP>@<client-IP>
|
|
# to track the last "add-client" or "del-client". These files contain
|
|
# the key and a value, quoted and ready to pass to "ctdb ptrans". For
|
|
# add-client the value is the date (for debugging) and for del-client
|
|
# the value is empty (representing a delete). These get pushed to
|
|
# $statd_callout_db during "update", which will generally be run once each
|
|
# "monitor" cycle. In this way we avoid scalability problems with
|
|
# flood of persistent transactions after a "notify" when all the
|
|
# clients reclaim their locks.
|
|
|
|
startup_persistent_db()
|
|
{
|
|
_config_file="$1"
|
|
|
|
create_add_del_client_dir "$statd_callout_queue_dir"
|
|
|
|
$CTDB attach "$statd_callout_db" persistent
|
|
|
|
cat >"$_config_file" <<EOF
|
|
persistent_db
|
|
${statd_callout_queue_dir}
|
|
${CTDB_MY_PUBLIC_IPS_CACHE}
|
|
EOF
|
|
}
|
|
|
|
# Used via 'grep -F -f "$persistent_db_grep_filter"' to match database
|
|
# keys currently hosted public IPs
|
|
persistent_db_grep_filter="${statd_callout_state_dir}/.grep_filter"
|
|
|
|
persistent_db_make_grep_filter()
|
|
{
|
|
while read -r _ip; do
|
|
echo "statd-state@${_ip}@"
|
|
done <"$CTDB_MY_PUBLIC_IPS_CACHE" >"$persistent_db_grep_filter"
|
|
}
|
|
|
|
update_persistent_db()
|
|
{
|
|
_files="${statd_callout_state_dir}/.file_list"
|
|
find "$statd_callout_queue_dir" -name "statd-state@*" >"$_files"
|
|
if [ ! -s "$_files" ]; then
|
|
# No files!
|
|
rm "$_files"
|
|
exit 0
|
|
fi
|
|
|
|
persistent_db_make_grep_filter
|
|
|
|
# Use cat instead of direct grep since POSIX grep does not
|
|
# have -h
|
|
_items="${statd_callout_state_dir}/.items"
|
|
xargs cat <"$_files" | grep -F -f "$persistent_db_grep_filter" >"$_items"
|
|
|
|
if [ -s "$_items" ]; then
|
|
if $CTDB ptrans "$statd_callout_db" <"$_items"; then
|
|
xargs rm -f <"$_files"
|
|
fi
|
|
fi
|
|
|
|
rm -f "$_files" "$persistent_db_grep_filter" "$_items"
|
|
}
|
|
|
|
list_records_persistent_db()
|
|
{
|
|
persistent_db_make_grep_filter
|
|
|
|
$CTDB catdb "$statd_callout_db" |
|
|
sed -n -e 's|^key([0-9]*) = "\([^"]*\)".*|\1|p' |
|
|
grep -F -f "$persistent_db_grep_filter" |
|
|
sed -e 's|statd-state@\([^@]*\)@\(.*\)|\1 \2|'
|
|
|
|
rm -f "$persistent_db_grep_filter"
|
|
}
|
|
|
|
delete_records_persistent_db()
|
|
{
|
|
while read -r _sip _cip; do
|
|
_key="statd-state@${_sip}@${_cip}"
|
|
echo "\"${_key}\" \"\""
|
|
done | $CTDB ptrans "$statd_callout_db"
|
|
}
|
|
|
|
cleanup_persistent_db()
|
|
{
|
|
# Remove any stale touch files (i.e. for IPs not currently
|
|
# hosted on this node and created since the last "update").
|
|
# There's nothing else we can do with them at this stage.
|
|
_pnn=$(ctdb_get_pnn)
|
|
_ctdb_all_ips=$($CTDB ip all | tail -n +2)
|
|
echo "$_ctdb_all_ips" |
|
|
awk -v pnn="$_pnn" 'pnn != $2 { print $1 }' |
|
|
while read -r _sip; do
|
|
rm -f "${statd_callout_queue_dir}/statd-state@${_sip}@"*
|
|
done
|
|
}
|
|
|
|
############################################################
|
|
|
|
# Use file/key names of the form statd-state@<server-IP>@<client-IP>
|
|
# to track the "add-client" and "del-client". statd_callout add and
|
|
# removes files directly in $statd_callout_shared_dir. This may
|
|
# result in performance problems if thousands of clients reclaim locks
|
|
# after failover and the cluster filesystem is unable to handle the
|
|
# load.
|
|
|
|
startup_shared_dir()
|
|
{
|
|
_config_file="$1"
|
|
|
|
create_add_del_client_dir "$statd_callout_shared_dir"
|
|
|
|
cat >"$_config_file" <<EOF
|
|
shared_dir
|
|
${statd_callout_shared_dir}
|
|
${CTDB_MY_PUBLIC_IPS_CACHE}
|
|
EOF
|
|
}
|
|
|
|
update_shared_dir()
|
|
{
|
|
:
|
|
}
|
|
|
|
list_records_shared_dir()
|
|
{
|
|
while read -r _ip; do
|
|
ls "${statd_callout_shared_dir}/statd-state@${_ip}@"*
|
|
done <"$CTDB_MY_PUBLIC_IPS_CACHE" |
|
|
while read -r _f; do
|
|
if [ ! -f "$_f" ]; then
|
|
continue
|
|
fi
|
|
_t="${_f#"${statd_callout_shared_dir}/statd-state@"}"
|
|
_sip="${_t%@*}"
|
|
_cip="${_t#*@}"
|
|
echo "$_sip" "$_cip"
|
|
done
|
|
}
|
|
|
|
delete_records_shared_dir()
|
|
{
|
|
while read -r _sip _cip; do
|
|
echo "${statd_callout_shared_dir}/statd-state@${_sip}@${_cip}"
|
|
done | xargs rm -f
|
|
}
|
|
|
|
cleanup_shared_dir()
|
|
{
|
|
:
|
|
}
|
|
|
|
############################################################
|
|
|
|
# No-op implementation
|
|
|
|
startup_none()
|
|
{
|
|
_config_file="$1"
|
|
|
|
cat >"$_config_file" <<EOF
|
|
none
|
|
EOF
|
|
}
|
|
|
|
update_none()
|
|
{
|
|
:
|
|
}
|
|
|
|
list_records_none()
|
|
{
|
|
:
|
|
}
|
|
|
|
delete_records_none()
|
|
{
|
|
:
|
|
}
|
|
|
|
cleanup_none()
|
|
{
|
|
:
|
|
}
|
|
|
|
############################################################
|
|
|
|
# Per-mode initialisation
|
|
startup()
|
|
{
|
|
_default="${CTDB_SCRIPT_VARDIR}/statd_callout.conf"
|
|
_config_file="${CTDB_STATD_CALLOUT_CONFIG_FILE:-"${_default}"}"
|
|
|
|
mkdir -p "$statd_callout_state_dir"
|
|
|
|
"startup_${statd_callout_mode}" "$_config_file"
|
|
}
|
|
|
|
# Process a record queue in local storage and use it to update cluster
|
|
# storage. For implementations that update cluster storage directly,
|
|
# this will be a no-op.
|
|
update()
|
|
{
|
|
"update_${statd_callout_mode}"
|
|
}
|
|
|
|
# Query cluster storage for entries matching this node's server IPs
|
|
# and Write pairs of:
|
|
# server-IP client-IP
|
|
# to stdout.
|
|
list_records()
|
|
{
|
|
"list_records_${statd_callout_mode}" | sort
|
|
}
|
|
|
|
# Read pairs of:
|
|
# server-IP client-IP
|
|
# from stdin and delete associated records during notify.
|
|
delete_records()
|
|
{
|
|
"delete_records_${statd_callout_mode}"
|
|
}
|
|
|
|
# Do any required cleanup
|
|
cleanup()
|
|
{
|
|
"cleanup_${statd_callout_mode}"
|
|
}
|
|
|
|
############################################################
|
|
|
|
case "$1" in
|
|
startup)
|
|
startup
|
|
;;
|
|
|
|
update)
|
|
update
|
|
;;
|
|
|
|
notify)
|
|
# we must restart the lockmanager (on all nodes) so that we get
|
|
# a clusterwide grace period (so other clients don't take out
|
|
# conflicting locks through other nodes before all locks have been
|
|
# reclaimed)
|
|
|
|
# Delete the notification list for statd, we don't want it to
|
|
# ping any clients
|
|
dir=$(find_statd_sm_dir)
|
|
rm -f "${dir}/"* "${dir}.bak/"*
|
|
|
|
# We must also let some time pass between stopping and
|
|
# restarting the lock manager. Otherwise there is a window
|
|
# where the lock manager will respond "strangely" immediately
|
|
# after restarting it, which causes clients to fail to reclaim
|
|
# their locks.
|
|
nfs_callout_init
|
|
"$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1
|
|
sleep 2
|
|
"$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1
|
|
|
|
statd_state="${statd_callout_state_dir}/.statd_state"
|
|
list_records >"$statd_state"
|
|
|
|
if [ ! -s "$statd_state" ]; then
|
|
rm -f "$statd_state"
|
|
exit 0
|
|
fi
|
|
|
|
delete_records <"$statd_state"
|
|
send_notifies <"$statd_state"
|
|
|
|
rm -f "$statd_state"
|
|
|
|
cleanup
|
|
;;
|
|
esac
|