mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
ctdb-scripts: Drop script configuration variable CTDB_MONITOR_SWAP_USAGE
CTDB's system memory monitoring in 05.system.script monitors both main memory and swap. The swap monitoring was originally based on the (possibly incorrect, see below) idea that swap space stacks on top of main memory, so that when a system starts filling swap space then this is supposed to be a good sign that the system is running out of memory. Additionally, performance on a Linux system tends to be destroyed by the I/O associated with a lot of swapping to spinning disks. However, some platforms default to creating only 4GB of swap space even when there is 128GB of main memory. With such a small swap to main memory ratio, memory pressure can force swap to be nearly full even when a significant amount of main memory is still available and the system is performing well. This suggests that checking swap utilisation might be less than useful in many circumstances. So, remove the separate swap space checking and change the memory check to cover the total of main memory and swap space. Test function set_mem_usage() still takes an argument for each of main memory and swap space utilisation. For simplicity, the same number is now passed twice to make the intended results comprehensible. This could be changed later. A couple of tests are cleaned up to no longer use hard-coded /proc/meminfo and ps output. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
This commit is contained in:
parent
8108b3134c
commit
b80967f5dc
@ -132,9 +132,6 @@ monitor_memory_usage ()
|
||||
if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ] ; then
|
||||
CTDB_MONITOR_MEMORY_USAGE=80
|
||||
fi
|
||||
if [ -z "$CTDB_MONITOR_SWAP_USAGE" ] ; then
|
||||
CTDB_MONITOR_SWAP_USAGE=25
|
||||
fi
|
||||
|
||||
_meminfo=$(get_proc "meminfo")
|
||||
# Intentional word splitting here
|
||||
@ -149,21 +146,19 @@ $1 == "SwapFree:" { swapfree = $2 }
|
||||
$1 == "SwapTotal:" { swaptotal = $2 }
|
||||
END {
|
||||
if (memavail != 0) { memfree = memavail ; }
|
||||
if (memtotal != 0) { print int((memtotal - memfree) / memtotal * 100) ; } else { print 0 ; }
|
||||
if (swaptotal != 0) { print int((swaptotal - swapfree) / swaptotal * 100) ; } else { print 0 ; }
|
||||
if (memtotal + swaptotal != 0) {
|
||||
usedtotal = memtotal - memfree + swaptotal - swapfree
|
||||
print int(usedtotal / (memtotal + swaptotal) * 100)
|
||||
} else {
|
||||
print 0
|
||||
}
|
||||
}')
|
||||
_mem_usage="$1"
|
||||
_swap_usage="$2"
|
||||
|
||||
check_thresholds "System memory" \
|
||||
"$CTDB_MONITOR_MEMORY_USAGE" \
|
||||
"$_mem_usage" \
|
||||
dump_memory_info
|
||||
|
||||
check_thresholds "System swap" \
|
||||
"$CTDB_MONITOR_SWAP_USAGE" \
|
||||
"$_swap_usage" \
|
||||
dump_memory_info
|
||||
}
|
||||
|
||||
|
||||
|
@ -964,27 +964,6 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>
|
||||
CTDB_MONITOR_SWAP_USAGE=<parameter>SWAP-LIMITS</parameter>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
SWAP-LIMITS takes the form
|
||||
<parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
|
||||
indicating that warnings should be logged if
|
||||
swap usage reaches WARN_LIMIT%. If usage reaches
|
||||
UNHEALTHY_LIMIT then the node should be flagged
|
||||
unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
|
||||
left blank, meaning that check will be omitted.
|
||||
</para>
|
||||
<para>
|
||||
Default is 25, so warnings will be logged when swap
|
||||
usage reaches 25%.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect2>
|
||||
|
||||
|
@ -209,6 +209,7 @@ CTDB_NOTIFY_SCRIPT
|
||||
CTDB_PUBLIC_INTERFACE
|
||||
CTDB_MAX_PERSISTENT_CHECK_ERRORS
|
||||
CTDB_SHUTDOWN_TIMEOUT
|
||||
CTDB_MONITOR_SWAP_USAGE
|
||||
EOF
|
||||
}
|
||||
|
||||
@ -262,7 +263,6 @@ CTDB_MAX_CORRUPT_DB_BACKUPS
|
||||
# 05.system
|
||||
CTDB_MONITOR_FILESYSTEM_USAGE
|
||||
CTDB_MONITOR_MEMORY_USAGE
|
||||
CTDB_MONITOR_SWAP_USAGE
|
||||
# debug_hung_scripts.sh
|
||||
CTDB_DEBUG_HUNG_SCRIPT_STACKPAT
|
||||
EOF
|
||||
|
@ -2,13 +2,12 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, bad situation, default checks enabled"
|
||||
define_test "Memory check (default), warning situation"
|
||||
|
||||
setup
|
||||
|
||||
set_mem_usage 100 100
|
||||
ok <<EOF
|
||||
WARNING: System memory utilization 100% >= threshold 80%
|
||||
WARNING: System swap utilization 100% >= threshold 25%
|
||||
EOF
|
||||
simple_test
|
||||
|
@ -2,13 +2,12 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, good situation, all memory checks enabled"
|
||||
define_test "Memory check (custom, both), good situation"
|
||||
|
||||
setup
|
||||
|
||||
setup_script_options <<EOF
|
||||
CTDB_MONITOR_MEMORY_USAGE="80:90"
|
||||
CTDB_MONITOR_SWAP_USAGE="1:50"
|
||||
EOF
|
||||
|
||||
ok_null
|
||||
|
@ -1,21 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, bad situation, custom swap critical"
|
||||
|
||||
setup
|
||||
|
||||
setup_script_options <<EOF
|
||||
CTDB_MONITOR_SWAP_USAGE=":50"
|
||||
EOF
|
||||
|
||||
set_mem_usage 100 90
|
||||
required_result 1 <<EOF
|
||||
WARNING: System memory utilization 100% >= threshold 80%
|
||||
ERROR: System swap utilization 90% >= threshold 50%
|
||||
$FAKE_PROC_MEMINFO
|
||||
$(ps foobar)
|
||||
EOF
|
||||
|
||||
simple_test
|
@ -2,7 +2,7 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, bad memory situation, custom memory warning"
|
||||
define_test "Memory check (custom, warning only), warning situation"
|
||||
|
||||
setup
|
||||
|
||||
@ -10,7 +10,7 @@ setup_script_options <<EOF
|
||||
CTDB_MONITOR_MEMORY_USAGE="85:"
|
||||
EOF
|
||||
|
||||
set_mem_usage 90 10
|
||||
set_mem_usage 90 90
|
||||
ok <<EOF
|
||||
WARNING: System memory utilization 90% >= threshold 85%
|
||||
EOF
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, bad situation, custom memory critical"
|
||||
define_test "Memory check (custom, error only), error situation"
|
||||
|
||||
setup
|
||||
|
||||
@ -10,7 +10,7 @@ setup_script_options <<EOF
|
||||
CTDB_MONITOR_MEMORY_USAGE=":85"
|
||||
EOF
|
||||
|
||||
set_mem_usage 90 0
|
||||
set_mem_usage 90 90
|
||||
required_result 1 <<EOF
|
||||
ERROR: System memory utilization 90% >= threshold 85%
|
||||
$FAKE_PROC_MEMINFO
|
||||
|
@ -1,19 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, bad situation, both memory checks, causes warning"
|
||||
|
||||
setup
|
||||
|
||||
setup_script_options <<EOF
|
||||
CTDB_MONITOR_MEMORY_USAGE="80:90"
|
||||
CTDB_MONITOR_SWAP_USAGE=""
|
||||
EOF
|
||||
|
||||
set_mem_usage 87 0
|
||||
ok <<EOF
|
||||
WARNING: System memory utilization 87% >= threshold 80%
|
||||
EOF
|
||||
|
||||
simple_test
|
@ -2,7 +2,7 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Memory check, bad situation, both custom memory checks, causes unhealthy"
|
||||
define_test "Memory check (custom, both), error situation"
|
||||
|
||||
setup
|
||||
|
||||
@ -10,33 +10,11 @@ setup_script_options <<EOF
|
||||
CTDB_MONITOR_MEMORY_USAGE="70:80"
|
||||
EOF
|
||||
|
||||
set_mem_usage 87 0
|
||||
set_mem_usage 87 87
|
||||
required_result 1 <<EOF
|
||||
ERROR: System memory utilization 87% >= threshold 80%
|
||||
MemTotal: 3940712 kB
|
||||
MemFree: 225268 kB
|
||||
Buffers: 146120 kB
|
||||
Cached: 140904 kB
|
||||
SwapCached: 56016 kB
|
||||
Active: 2422104 kB
|
||||
Inactive: 1019928 kB
|
||||
Active(anon): 1917580 kB
|
||||
Inactive(anon): 523080 kB
|
||||
Active(file): 504524 kB
|
||||
Inactive(file): 496848 kB
|
||||
Unevictable: 4844 kB
|
||||
Mlocked: 4844 kB
|
||||
SwapTotal: 5857276 kB
|
||||
SwapFree: 5857276 kB
|
||||
...
|
||||
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
||||
root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
|
||||
root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
|
||||
...
|
||||
root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
|
||||
root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
|
||||
...
|
||||
[MORE FAKE ps OUTPUT]
|
||||
$FAKE_PROC_MEMINFO
|
||||
$(ps foobar)
|
||||
EOF
|
||||
|
||||
simple_test
|
||||
|
@ -2,126 +2,81 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "Check throttling of warnings"
|
||||
define_test "Memory check (custom, both), check throttling of warnings"
|
||||
|
||||
setup
|
||||
|
||||
setup_script_options <<EOF
|
||||
CTDB_MONITOR_MEMORY_USAGE="70:80"
|
||||
CTDB_MONITOR_SWAP_USAGE=""
|
||||
EOF
|
||||
|
||||
# Below threshold, nothing logged
|
||||
set_mem_usage 67 0
|
||||
set_mem_usage 67 67
|
||||
ok_null
|
||||
simple_test
|
||||
|
||||
set_mem_usage 71 0
|
||||
set_mem_usage 71 71
|
||||
ok "WARNING: System memory utilization 71% >= threshold 70%"
|
||||
simple_test
|
||||
|
||||
# 2nd time at same level, nothing logged
|
||||
set_mem_usage 71 0
|
||||
set_mem_usage 71 71
|
||||
ok_null
|
||||
simple_test
|
||||
|
||||
set_mem_usage 73 0
|
||||
set_mem_usage 73 73
|
||||
ok "WARNING: System memory utilization 73% >= threshold 70%"
|
||||
simple_test
|
||||
|
||||
# 2nd time at same level, nothing logged
|
||||
set_mem_usage 73 0
|
||||
set_mem_usage 73 73
|
||||
ok_null
|
||||
simple_test
|
||||
|
||||
set_mem_usage 79 0
|
||||
set_mem_usage 79 79
|
||||
ok "WARNING: System memory utilization 79% >= threshold 70%"
|
||||
simple_test
|
||||
|
||||
set_mem_usage 80 0
|
||||
set_mem_usage 80 80
|
||||
required_result 1 <<EOF
|
||||
ERROR: System memory utilization 80% >= threshold 80%
|
||||
MemTotal: 3940712 kB
|
||||
MemFree: 225268 kB
|
||||
Buffers: 146120 kB
|
||||
Cached: 416754 kB
|
||||
SwapCached: 56016 kB
|
||||
Active: 2422104 kB
|
||||
Inactive: 1019928 kB
|
||||
Active(anon): 1917580 kB
|
||||
Inactive(anon): 523080 kB
|
||||
Active(file): 504524 kB
|
||||
Inactive(file): 496848 kB
|
||||
Unevictable: 4844 kB
|
||||
Mlocked: 4844 kB
|
||||
SwapTotal: 5857276 kB
|
||||
SwapFree: 5857276 kB
|
||||
...
|
||||
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
||||
root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
|
||||
root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
|
||||
...
|
||||
root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
|
||||
root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
|
||||
...
|
||||
[MORE FAKE ps OUTPUT]
|
||||
$FAKE_PROC_MEMINFO
|
||||
$(ps foobar)
|
||||
EOF
|
||||
simple_test
|
||||
|
||||
# Fall back into warning at same level as last warning... should log
|
||||
set_mem_usage 79 0
|
||||
set_mem_usage 79 79
|
||||
ok "WARNING: System memory utilization 79% >= threshold 70%"
|
||||
simple_test
|
||||
|
||||
# Below threshold, notice
|
||||
set_mem_usage 69 0
|
||||
set_mem_usage 69 69
|
||||
ok <<EOF
|
||||
NOTICE: System memory utilization 69% < threshold 70%
|
||||
EOF
|
||||
simple_test
|
||||
|
||||
# Further reduction, nothing logged
|
||||
set_mem_usage 68 0
|
||||
set_mem_usage 68 68
|
||||
ok_null
|
||||
simple_test
|
||||
|
||||
# Back up into warning at same level as last warning... should log
|
||||
set_mem_usage 79 0
|
||||
set_mem_usage 79 79
|
||||
ok "WARNING: System memory utilization 79% >= threshold 70%"
|
||||
simple_test
|
||||
|
||||
# Back up above critical threshold... unhealthy
|
||||
set_mem_usage 81 0
|
||||
set_mem_usage 81 81
|
||||
required_result 1 <<EOF
|
||||
ERROR: System memory utilization 81% >= threshold 80%
|
||||
MemTotal: 3940712 kB
|
||||
MemFree: 225268 kB
|
||||
Buffers: 146120 kB
|
||||
Cached: 377347 kB
|
||||
SwapCached: 56016 kB
|
||||
Active: 2422104 kB
|
||||
Inactive: 1019928 kB
|
||||
Active(anon): 1917580 kB
|
||||
Inactive(anon): 523080 kB
|
||||
Active(file): 504524 kB
|
||||
Inactive(file): 496848 kB
|
||||
Unevictable: 4844 kB
|
||||
Mlocked: 4844 kB
|
||||
SwapTotal: 5857276 kB
|
||||
SwapFree: 5857276 kB
|
||||
...
|
||||
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
||||
root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
|
||||
root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
|
||||
...
|
||||
root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
|
||||
root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
|
||||
...
|
||||
[MORE FAKE ps OUTPUT]
|
||||
$FAKE_PROC_MEMINFO
|
||||
$(ps foobar)
|
||||
EOF
|
||||
simple_test
|
||||
|
||||
# Straight back down to a good level... notice
|
||||
set_mem_usage 65 0
|
||||
set_mem_usage 65 65
|
||||
ok "NOTICE: System memory utilization 65% < threshold 70%"
|
||||
simple_test
|
||||
|
Loading…
Reference in New Issue
Block a user