1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-28 01:58:17 +03:00

ctdb-scripts: Make initial statistics output empty

This makes initial failure to retrieve statistics less likely to
result in a statistics change.  To help with this, statistics
retrieval stderr now goes to the log - only stdout goes to the file.

This means that the test code for checking statistics changes needs to
be redone to actually run the statistics command and check.  As with
rpcinfo output, this output needs to behave as deterministically in
the test code as it done in the event script.

Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
This commit is contained in:
Martin Schwenke 2024-06-29 19:24:25 +10:00 committed by Martin Schwenke
parent 032b7b49c9
commit 18a29ed367
7 changed files with 56 additions and 35 deletions

View File

@ -164,20 +164,22 @@ nfs_check_service()
if [ -f "$_curr" ]; then
mv -f "$_curr" "$_prev"
else
# Make initial stats empty, so a
# failed attempt to retrieve them on
# service stall is less likely to
# result in a false stats change
: >"$_prev"
fi
eval "$service_stats_cmd" >"$_curr" 2>&1
eval "$service_stats_cmd" >"$_curr"
# Only consider statistics on timeout. This
# is done below by checking if this string is
# contained in $_err.
_t="rpcinfo: RPC: Timed out"
if ! $_ok &&
[ "${_err#*"${_t}"}" != "$_err" ] &&
! cmp "$_prev" "$_curr" >/dev/null 2>&1; then
# Stats always implicitly change on
# the first monitor event, since
# previous stats don't exists...
echo "WARNING: statistics changed but ${_err}"
_ok=true
fi

View File

@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5"
service_stats_cmd="date --rfc-3339=ns | grep ."
EOF
# Test flag to indicate that stats are expected to change
nfs_stats_set_changed "nfs" "status"
nfs_iterate_test 10 "nfs"

View File

@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5"
service_stats_cmd="echo 'hello world' | grep ."
EOF
# Test flag to indicate that stats are expected to change
nfs_stats_set_changed "status"
nfs_iterate_test 10 "nfs"

View File

@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5"
service_stats_cmd="date --rfc-3339=ns | grep ."
EOF
# Test flag to indicate that stats are expected to change
nfs_stats_set_changed "nfs" "status"
nfs_iterate_test 10 "nfs:TIMEOUT"

View File

@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5"
service_stats_cmd="echo 'hello world' | grep ."
EOF
# Test flag to indicate that stats are expected to change
nfs_stats_set_changed "status"
nfs_iterate_test 10 "nfs:TIMEOUT"

View File

@ -0,0 +1,23 @@
#!/bin/sh
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "NFS RPC service timeout, silent stats error, 10 iterations"
# It would be nice to have a non-silent stats error... but that's a
# bit hard for the current test code to handle. :-(
setup
cat >"${CTDB_BASE}/nfs-checks.d/20.nfs.check" <<EOF
# nfs
version="3"
restart_every=10
unhealthy_after=2
service_stop_cmd="\$CTDB_NFS_CALLOUT stop nfs"
service_start_cmd="\$CTDB_NFS_CALLOUT start nfs"
service_debug_cmd="program_stack_traces nfsd 5"
service_stats_cmd="false"
EOF
nfs_iterate_test 10 "nfs:TIMEOUT"

View File

@ -136,29 +136,27 @@ nfs_setup_fake_threads()
esac
}
nfs_stats_set_changed()
{
FAKE_NFS_STATS_CHANGED=" $* "
}
nfs_stats_check_changed()
{
_rpc_service="$1"
_iteration="$2"
_cmd="$2"
_t="$FAKE_NFS_STATS_CHANGED"
if [ -z "$_t" ]; then
if [ -z "$_cmd" ]; then
# No stats command, statistics don't change...
return 1
fi
if [ "${_t#* "${_rpc_service}"}" != "$_t" ]; then
return 0
fi
# Statistics always change on the first iteration
if [ "$_iteration" -eq 1 ]; then
return 0
_curr="${CTDB_TEST_TMP_DIR}/${_rpc_service}.stats"
_prev="${_curr}.prev"
: >"$_prev"
if [ -e "$_curr" ]; then
mv "$_curr" "$_prev"
fi
return 1
eval "$_cmd" >"$_curr"
! diff "$_prev" "$_curr" >/dev/null
}
rpcinfo_timed_out()
@ -344,6 +342,7 @@ rpc_set_service_failure_response()
# Unused, but for completeness, possible future use
service_check_cmd=""
service_debug_cmd=""
service_stats_cmd=""
# Don't bother syntax checking, eventscript does that...
. "$_file"
@ -360,6 +359,17 @@ rpc_set_service_failure_response()
esac
fi
# It doesn't matter here if the statistics have
# changed. However, this generates the current
# statistics, which needs to happen, regardless of
# service health, so they can be compared when they
# matter...
_stats_changed=false
if nfs_stats_check_changed \
"$_rpc_service" "$service_stats_cmd"; then
_stats_changed=true
fi
_why=""
_ri_out=$(rpcinfo -T tcp localhost "$_rpc_service" 2>&1)
# Check exit code separately for readability
@ -370,9 +380,7 @@ rpc_set_service_failure_response()
elif rpcinfo_timed_out "$_ri_out"; then
_why="Timed out"
if nfs_stats_check_changed \
"$_rpc_service" "$_iteration"; then
if $_stats_changed; then
rpc_failure \
"WARNING: statistics changed but" \
"$_rpc_service" \