1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00

ctdb-scripts: Drop script configuration variable CTDB_MONITOR_SWAP_USAGE

CTDB's system memory monitoring in 05.system.script monitors both main
memory and swap.  The swap monitoring was originally based on
the (possibly incorrect, see below) idea that swap space stacks on top
of main memory, so that when a system starts filling swap space then
this is supposed to be a good sign that the system is running out of
memory.  Additionally, performance on a Linux system tends to be
destroyed by the I/O associated with a lot of swapping to spinning
disks.

However, some platforms default to creating only 4GB of swap space
even when there is 128GB of main memory.  With such a small swap to
main memory ratio, memory pressure can force swap to be nearly full
even when a significant amount of main memory is still available and
the system is performing well.  This suggests that checking swap
utilisation might be less than useful in many circumstances.

So, remove the separate swap space checking and change the memory
check to cover the total of main memory and swap space.

Test function set_mem_usage() still takes an argument for each of main
memory and swap space utilisation.  For simplicity, the same number is
now passed twice to make the intended results comprehensible.  This
could be changed later.

A couple of tests are cleaned up to no longer use hard-coded
/proc/meminfo and ps output.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
This commit is contained in:
Martin Schwenke 2019-03-29 11:19:55 +11:00 committed by Amitay Isaacs
parent 8108b3134c
commit b80967f5dc
11 changed files with 35 additions and 170 deletions

View File

@ -132,9 +132,6 @@ monitor_memory_usage ()
if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ] ; then
CTDB_MONITOR_MEMORY_USAGE=80
fi
if [ -z "$CTDB_MONITOR_SWAP_USAGE" ] ; then
CTDB_MONITOR_SWAP_USAGE=25
fi
_meminfo=$(get_proc "meminfo")
# Intentional word splitting here
@ -149,21 +146,19 @@ $1 == "SwapFree:" { swapfree = $2 }
$1 == "SwapTotal:" { swaptotal = $2 }
END {
if (memavail != 0) { memfree = memavail ; }
if (memtotal != 0) { print int((memtotal - memfree) / memtotal * 100) ; } else { print 0 ; }
if (swaptotal != 0) { print int((swaptotal - swapfree) / swaptotal * 100) ; } else { print 0 ; }
if (memtotal + swaptotal != 0) {
usedtotal = memtotal - memfree + swaptotal - swapfree
print int(usedtotal / (memtotal + swaptotal) * 100)
} else {
print 0
}
}')
_mem_usage="$1"
_swap_usage="$2"
check_thresholds "System memory" \
"$CTDB_MONITOR_MEMORY_USAGE" \
"$_mem_usage" \
dump_memory_info
check_thresholds "System swap" \
"$CTDB_MONITOR_SWAP_USAGE" \
"$_swap_usage" \
dump_memory_info
}

View File

@ -964,27 +964,6 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000
</listitem>
</varlistentry>
<varlistentry>
<term>
CTDB_MONITOR_SWAP_USAGE=<parameter>SWAP-LIMITS</parameter>
</term>
<listitem>
<para>
SWAP-LIMITS takes the form
<parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
indicating that warnings should be logged if
swap usage reaches WARN_LIMIT%. If usage reaches
UNHEALTHY_LIMIT then the node should be flagged
unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
left blank, meaning that check will be omitted.
</para>
<para>
Default is 25, so warnings will be logged when swap
usage reaches 25%.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect2>

View File

@ -209,6 +209,7 @@ CTDB_NOTIFY_SCRIPT
CTDB_PUBLIC_INTERFACE
CTDB_MAX_PERSISTENT_CHECK_ERRORS
CTDB_SHUTDOWN_TIMEOUT
CTDB_MONITOR_SWAP_USAGE
EOF
}
@ -262,7 +263,6 @@ CTDB_MAX_CORRUPT_DB_BACKUPS
# 05.system
CTDB_MONITOR_FILESYSTEM_USAGE
CTDB_MONITOR_MEMORY_USAGE
CTDB_MONITOR_SWAP_USAGE
# debug_hung_scripts.sh
CTDB_DEBUG_HUNG_SCRIPT_STACKPAT
EOF

View File

@ -2,13 +2,12 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, bad situation, default checks enabled"
define_test "Memory check (default), warning situation"
setup
set_mem_usage 100 100
ok <<EOF
WARNING: System memory utilization 100% >= threshold 80%
WARNING: System swap utilization 100% >= threshold 25%
EOF
simple_test

View File

@ -2,13 +2,12 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, good situation, all memory checks enabled"
define_test "Memory check (custom, both), good situation"
setup
setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="80:90"
CTDB_MONITOR_SWAP_USAGE="1:50"
EOF
ok_null

View File

@ -1,21 +0,0 @@
#!/bin/sh
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, bad situation, custom swap critical"
setup
setup_script_options <<EOF
CTDB_MONITOR_SWAP_USAGE=":50"
EOF
set_mem_usage 100 90
required_result 1 <<EOF
WARNING: System memory utilization 100% >= threshold 80%
ERROR: System swap utilization 90% >= threshold 50%
$FAKE_PROC_MEMINFO
$(ps foobar)
EOF
simple_test

View File

@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, bad memory situation, custom memory warning"
define_test "Memory check (custom, warning only), warning situation"
setup
@ -10,7 +10,7 @@ setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="85:"
EOF
set_mem_usage 90 10
set_mem_usage 90 90
ok <<EOF
WARNING: System memory utilization 90% >= threshold 85%
EOF

View File

@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, bad situation, custom memory critical"
define_test "Memory check (custom, error only), error situation"
setup
@ -10,7 +10,7 @@ setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE=":85"
EOF
set_mem_usage 90 0
set_mem_usage 90 90
required_result 1 <<EOF
ERROR: System memory utilization 90% >= threshold 85%
$FAKE_PROC_MEMINFO

View File

@ -1,19 +0,0 @@
#!/bin/sh
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, bad situation, both memory checks, causes warning"
setup
setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="80:90"
CTDB_MONITOR_SWAP_USAGE=""
EOF
set_mem_usage 87 0
ok <<EOF
WARNING: System memory utilization 87% >= threshold 80%
EOF
simple_test

View File

@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Memory check, bad situation, both custom memory checks, causes unhealthy"
define_test "Memory check (custom, both), error situation"
setup
@ -10,33 +10,11 @@ setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="70:80"
EOF
set_mem_usage 87 0
set_mem_usage 87 87
required_result 1 <<EOF
ERROR: System memory utilization 87% >= threshold 80%
MemTotal: 3940712 kB
MemFree: 225268 kB
Buffers: 146120 kB
Cached: 140904 kB
SwapCached: 56016 kB
Active: 2422104 kB
Inactive: 1019928 kB
Active(anon): 1917580 kB
Inactive(anon): 523080 kB
Active(file): 504524 kB
Inactive(file): 496848 kB
Unevictable: 4844 kB
Mlocked: 4844 kB
SwapTotal: 5857276 kB
SwapFree: 5857276 kB
...
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
...
root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
...
[MORE FAKE ps OUTPUT]
$FAKE_PROC_MEMINFO
$(ps foobar)
EOF
simple_test

View File

@ -2,126 +2,81 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "Check throttling of warnings"
define_test "Memory check (custom, both), check throttling of warnings"
setup
setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="70:80"
CTDB_MONITOR_SWAP_USAGE=""
EOF
# Below threshold, nothing logged
set_mem_usage 67 0
set_mem_usage 67 67
ok_null
simple_test
set_mem_usage 71 0
set_mem_usage 71 71
ok "WARNING: System memory utilization 71% >= threshold 70%"
simple_test
# 2nd time at same level, nothing logged
set_mem_usage 71 0
set_mem_usage 71 71
ok_null
simple_test
set_mem_usage 73 0
set_mem_usage 73 73
ok "WARNING: System memory utilization 73% >= threshold 70%"
simple_test
# 2nd time at same level, nothing logged
set_mem_usage 73 0
set_mem_usage 73 73
ok_null
simple_test
set_mem_usage 79 0
set_mem_usage 79 79
ok "WARNING: System memory utilization 79% >= threshold 70%"
simple_test
set_mem_usage 80 0
set_mem_usage 80 80
required_result 1 <<EOF
ERROR: System memory utilization 80% >= threshold 80%
MemTotal: 3940712 kB
MemFree: 225268 kB
Buffers: 146120 kB
Cached: 416754 kB
SwapCached: 56016 kB
Active: 2422104 kB
Inactive: 1019928 kB
Active(anon): 1917580 kB
Inactive(anon): 523080 kB
Active(file): 504524 kB
Inactive(file): 496848 kB
Unevictable: 4844 kB
Mlocked: 4844 kB
SwapTotal: 5857276 kB
SwapFree: 5857276 kB
...
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
...
root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
...
[MORE FAKE ps OUTPUT]
$FAKE_PROC_MEMINFO
$(ps foobar)
EOF
simple_test
# Fall back into warning at same level as last warning... should log
set_mem_usage 79 0
set_mem_usage 79 79
ok "WARNING: System memory utilization 79% >= threshold 70%"
simple_test
# Below threshold, notice
set_mem_usage 69 0
set_mem_usage 69 69
ok <<EOF
NOTICE: System memory utilization 69% < threshold 70%
EOF
simple_test
# Further reduction, nothing logged
set_mem_usage 68 0
set_mem_usage 68 68
ok_null
simple_test
# Back up into warning at same level as last warning... should log
set_mem_usage 79 0
set_mem_usage 79 79
ok "WARNING: System memory utilization 79% >= threshold 70%"
simple_test
# Back up above critical threshold... unhealthy
set_mem_usage 81 0
set_mem_usage 81 81
required_result 1 <<EOF
ERROR: System memory utilization 81% >= threshold 80%
MemTotal: 3940712 kB
MemFree: 225268 kB
Buffers: 146120 kB
Cached: 377347 kB
SwapCached: 56016 kB
Active: 2422104 kB
Inactive: 1019928 kB
Active(anon): 1917580 kB
Inactive(anon): 523080 kB
Active(file): 504524 kB
Inactive(file): 496848 kB
Unevictable: 4844 kB
Mlocked: 4844 kB
SwapTotal: 5857276 kB
SwapFree: 5857276 kB
...
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
...
root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
...
[MORE FAKE ps OUTPUT]
$FAKE_PROC_MEMINFO
$(ps foobar)
EOF
simple_test
# Straight back down to a good level... notice
set_mem_usage 65 0
set_mem_usage 65 65
ok "NOTICE: System memory utilization 65% < threshold 70%"
simple_test