1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00

eventscripts: Become unhealthy faster on nfsd failure

Anecdotal evidence suggests that most nfsd RPC check failures are due
to cluster filesystem or storage problem.  Apparently these are rarely
helped by attempting to restart the NFS service because the restart
tends to hang.

Fail after 2 nfsd RPC check failures, instead of waiting for 6
failures.  Restart on every 10th failure to try to bring the node back
to good health.

Update unit tests to match.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit e9ef93f7b6dad59eabaa32124df81f3e74c651ef)
This commit is contained in:
Martin Schwenke 2013-08-12 11:36:25 +10:00 committed by Amitay Isaacs
parent 4cb3e2cd78
commit fec69034ee
4 changed files with 5 additions and 15 deletions

View File

@ -1,3 +1,2 @@
-ge 6 verbose unhealthy
-eq 4 verbose restart
-eq 2 restart:b
% 10 verbose restart:b unhealthy
-ge 2 verbose unhealthy

View File

@ -9,7 +9,4 @@ define_test "knfsd down, 6 iterations"
setup_nfs
rpc_services_down "nfs"
iterate_test 6 'ok_null' \
2 'rpc_set_service_failure_response "nfsd"' \
4 'rpc_set_service_failure_response "nfsd"' \
6 'rpc_set_service_failure_response "nfsd"'
iterate_test 10 'rpc_set_service_failure_response "nfsd"'

View File

@ -12,7 +12,4 @@ rpc_services_down "nfs"
CTDB_NFS_DUMP_STUCK_THREADS=5
FAKE_NFSD_THREAD_PIDS=""
iterate_test 6 'ok_null' \
2 'rpc_set_service_failure_response "nfsd"' \
4 'rpc_set_service_failure_response "nfsd"' \
6 'rpc_set_service_failure_response "nfsd"'
iterate_test 10 'rpc_set_service_failure_response "nfsd"'

View File

@ -12,7 +12,4 @@ rpc_services_down "nfs"
CTDB_NFS_DUMP_STUCK_THREADS=5
FAKE_NFSD_THREAD_PIDS="1001 1002 1003"
iterate_test 6 'ok_null' \
2 'rpc_set_service_failure_response "nfsd"' \
4 'rpc_set_service_failure_response "nfsd"' \
6 'rpc_set_service_failure_response "nfsd"'
iterate_test 10 'rpc_set_service_failure_response "nfsd"'