2021-03-05 18:36:04 +09:00
#!/usr/bin/env bash
2021-10-17 18:13:06 +02:00
# SPDX-License-Identifier: LGPL-2.1-or-later
2021-04-09 19:39:41 +02:00
set -eux
2018-12-13 17:22:01 +01:00
set -o pipefail
2024-10-16 15:04:07 +09:00
# shellcheck source=test/units/test-control.sh
. " $( dirname " $0 " ) " /test-control.sh
2023-10-19 12:28:37 +02:00
# shellcheck source=test/units/util.sh
2024-10-16 15:04:07 +09:00
. " $( dirname " $0 " ) " /util.sh
2023-10-19 12:28:37 +02:00
2021-03-05 18:36:04 +09:00
systemd-analyze log-level debug
2018-12-13 17:22:01 +01:00
2023-01-20 23:00:38 +00:00
# Ensure that the init.scope.d drop-in is applied on boot
test " $( cat /sys/fs/cgroup/init.scope/memory.high) " != "max"
2021-03-05 18:36:04 +09:00
# Loose checks to ensure the environment has the necessary features for systemd-oomd
2021-04-08 00:09:55 +02:00
[ [ -e /proc/pressure ] ] || echo "no PSI" >>/skipped
2024-02-09 18:53:19 +01:00
[ [ " $( get_cgroup_hierarchy) " = = "unified" ] ] || echo "no cgroupsv2" >>/skipped
[ [ -x /usr/lib/systemd/systemd-oomd ] ] || echo "no oomd" >>/skipped
if [ [ -s /skipped ] ] ; then
2024-04-02 20:37:30 +02:00
exit 77
2021-04-09 19:49:32 +02:00
fi
2018-12-13 17:22:01 +01:00
2023-06-23 23:19:31 +01:00
# Activate swap file if we are in a VM
if systemd-detect-virt --vm --quiet; then
2024-02-19 17:28:00 +00:00
swapoff --all
2024-02-14 11:48:56 +01:00
if [ [ " $( findmnt -n -o FSTYPE /) " = = btrfs ] ] ; then
btrfs filesystem mkswapfile -s 64M /swapfile
else
dd if = /dev/zero of = /swapfile bs = 1M count = 64
chmod 0600 /swapfile
mkswap /swapfile
fi
2023-06-23 23:19:31 +01:00
swapon /swapfile
swapon --show
fi
2022-01-06 21:37:21 +01:00
# Configure oomd explicitly to avoid conflicts with distro dropins
2023-06-17 00:01:24 +01:00
mkdir -p /run/systemd/oomd.conf.d/
2023-06-17 00:06:38 +01:00
cat >/run/systemd/oomd.conf.d/99-oomd-test.conf <<EOF
[ OOM]
DefaultMemoryPressureDurationSec = 2s
EOF
2023-06-17 00:01:24 +01:00
mkdir -p /run/systemd/system/-.slice.d/
2023-06-17 00:06:38 +01:00
cat >/run/systemd/system/-.slice.d/99-oomd-test.conf <<EOF
[ Slice]
ManagedOOMSwap = auto
EOF
2023-06-17 00:01:24 +01:00
mkdir -p /run/systemd/system/user@.service.d/
2023-06-17 00:06:38 +01:00
cat >/run/systemd/system/user@.service.d/99-oomd-test.conf <<EOF
[ Service]
ManagedOOMMemoryPressure = auto
ManagedOOMMemoryPressureLimit = 0%
EOF
2018-12-13 17:22:01 +01:00
2023-06-17 00:01:24 +01:00
mkdir -p /run/systemd/system/systemd-oomd.service.d/
2023-06-17 00:06:38 +01:00
cat >/run/systemd/system/systemd-oomd.service.d/debug.conf <<EOF
[ Service]
Environment = SYSTEMD_LOG_LEVEL = debug
EOF
2021-07-03 02:04:31 +09:00
systemctl daemon-reload
2022-01-12 12:29:34 +01:00
# enable the service to ensure dbus-org.freedesktop.oom1.service exists
# and D-Bus activation works
systemctl enable systemd-oomd.service
2021-07-03 02:04:31 +09:00
# if oomd is already running for some reasons, then restart it to make sure the above settings to be applied
if systemctl is-active systemd-oomd.service; then
systemctl restart systemd-oomd.service
fi
2024-10-16 14:52:49 +09:00
# Check if the oomd.conf drop-in config is loaded.
assert_in 'Default Memory Pressure Duration: 2s' " $( oomctl) "
2024-02-09 18:44:58 +01:00
if [ [ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ] ] ; then
# If we're running with sanitizers, sd-executor might pull in quite a significant chunk of shared
# libraries, which in turn causes a lot of pressure that can put us in the front when sd-oomd decides to
# go on a killing spree. This fact is exacerbated further on Arch Linux which ships unstripped gcc-libs,
# so sd-executor pulls in over 30M of libs on startup. Let's make the MemoryHigh= limit a bit more
# generous when running with sanitizers to make the test happy.
2024-05-11 19:17:13 +02:00
systemctl edit --runtime --stdin --drop-in= 99-MemoryHigh.conf TEST-55-OOMD-testchill.service <<EOF
2024-02-09 18:44:58 +01:00
[ Service]
MemoryHigh = 60M
EOF
# Do the same for the user instance as well
mkdir -p /run/systemd/user/
2024-05-11 19:17:13 +02:00
cp -rfv /run/systemd/system/TEST-55-OOMD-testchill.service.d/ /run/systemd/user/
2024-02-09 18:44:58 +01:00
else
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip
# under sanitizers as they balloon memory usage.
core: add systemd-executor binary
Currently we spawn services by forking a child process, doing a bunch
of work, and then exec'ing the service executable.
There are some advantages to this approach:
- quick: we immediately have access to all the enourmous amount of
state simply by virtue of sharing the memory with the parent
- easy to refactor and add features
- part of the same binary, will never be out of sync
There are however significant drawbacks:
- doing work after fork and before exec is against glibc's supported
case for several APIs we call
- copy-on-write trap: anytime any memory is touched in either parent
or child, a copy of that page will be triggered
- memory footprint of the child process will be memory footprint of
PID1, but using the cgroup memory limits of the unit
The last issue is especially problematic on resource constrained
systems where hard memory caps are enforced and swap is not allowed.
As soon as PID1 is under load, with no page out due to no swap, and a
service with a low MemoryMax= tries to start, hilarity ensues.
Add a new systemd-executor binary, that is able to receive all the
required state via memfd, deserialize it, prepare the appropriate
data structures and call exec_child.
Use posix_spawn which uses CLONE_VM + CLONE_VFORK, to ensure there is
no copy-on-write (same address space will be used, and parent process
will be frozen, until exec).
The sd-executor binary is pinned by FD on startup, so that we can
guarantee there will be no incompatibilities during upgrades.
2023-06-01 19:51:42 +01:00
systemd-run -t -p MemoryMax = 10M -p MemorySwapMax = 0 -p MemoryZSwapMax = 0 /bin/true
fi
2024-10-12 02:44:22 +09:00
check_killed( ) {
local unit = " ${ 1 : ? } "
shift
systemctl " $@ " status " $unit " || return 0 # Yay! The service has been expectedly killed.
# Workaround for the regression in kernel 6.12-rcX, explained in issue #32730.
if journalctl --no-hostname -k -t kernel --grep 'psi: inconsistent task state!' ; then
echo " $unit is unexpectedly still alive, and inconsistency in PSI is reported by the kernel, skipping. " >/skipped
exit 77
fi
return 1 # Huh? Something borked.
}
2024-10-16 15:04:07 +09:00
test_basic( ) {
2024-10-16 15:12:32 +09:00
local cgroup_path = " ${ 1 : ? } "
shift
2024-10-16 15:04:07 +09:00
systemctl " $@ " start TEST-55-OOMD-testchill.service
2024-10-16 15:20:08 +09:00
systemctl " $@ " status TEST-55-OOMD-testchill.service
systemctl " $@ " status TEST-55-OOMD-workload.slice
2018-12-13 17:22:01 +01:00
2024-10-16 15:04:07 +09:00
# Verify systemd-oomd is monitoring the expected units.
2024-10-16 15:12:32 +09:00
timeout 1m bash -xec " until oomctl | grep -q -F 'Path: $cgroup_path '; do sleep 1; done "
assert_in 'Memory Pressure Limit: 20.00%' \
2024-10-14 20:49:54 -07:00
" $( oomctl | tac | sed -e '/Memory Pressure Monitored CGroups:/q' | tac | grep -A8 " Path: $cgroup_path " ) "
2018-12-13 17:22:01 +01:00
2024-10-16 15:20:08 +09:00
systemctl " $@ " start TEST-55-OOMD-testbloat.service
2021-07-03 02:24:30 +09:00
2024-10-16 15:04:07 +09:00
# systemd-oomd watches for elevated pressure for 2 seconds before acting.
# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
for _ in { 0..59} ; do
if ! systemctl " $@ " status TEST-55-OOMD-testbloat.service; then
break
fi
oomctl
sleep 2
done
2018-12-13 17:22:01 +01:00
2024-10-16 15:04:07 +09:00
# testbloat should be killed and testchill should be fine
2024-10-12 02:44:22 +09:00
if ! check_killed TEST-55-OOMD-testbloat.service " $@ " ; then exit 42; fi
2024-10-16 15:04:07 +09:00
if ! systemctl " $@ " status TEST-55-OOMD-testchill.service; then exit 24; fi
2024-10-16 15:18:41 +09:00
systemctl " $@ " kill --signal= KILL TEST-55-OOMD-testbloat.service || :
systemctl " $@ " stop TEST-55-OOMD-testbloat.service
systemctl " $@ " stop TEST-55-OOMD-testchill.service
systemctl " $@ " stop TEST-55-OOMD-workload.slice
2024-10-16 15:04:07 +09:00
}
2021-03-05 18:36:04 +09:00
2024-10-16 15:04:07 +09:00
testcase_basic_system( ) {
2024-10-16 15:12:32 +09:00
test_basic /TEST.slice/TEST-55.slice/TEST-55-OOMD.slice/TEST-55-OOMD-workload.slice
2024-10-16 15:04:07 +09:00
}
2021-09-09 16:12:55 +01:00
2024-10-16 15:04:07 +09:00
testcase_basic_user( ) {
# Make sure we also work correctly on user units.
loginctl enable-linger testuser
2021-09-09 16:12:55 +01:00
2024-10-16 15:12:32 +09:00
test_basic " /user.slice/user- $( id -u testuser) .slice/user@ $( id -u testuser) .service/TEST.slice/TEST-55.slice/TEST-55-OOMD.slice/TEST-55-OOMD-workload.slice " \
--machine "testuser@.host" --user
2021-09-09 16:12:55 +01:00
2024-10-16 15:04:07 +09:00
loginctl disable-linger testuser
}
2021-09-09 16:12:55 +01:00
2024-10-16 15:04:07 +09:00
testcase_preference_avoid( ) {
# only run this portion of the test if we can set xattrs
if ! cgroupfs_supports_user_xattrs; then
echo "cgroup does not support user xattrs, skipping test for ManagedOOMPreference=avoid"
return 0
2021-09-09 16:12:55 +01:00
fi
2024-02-21 15:42:35 +01:00
2024-05-11 19:17:13 +02:00
mkdir -p /run/systemd/system/TEST-55-OOMD-testbloat.service.d/
2024-10-11 16:09:11 +09:00
cat >/run/systemd/system/TEST-55-OOMD-testbloat.service.d/99-managed-oom-preference.conf <<EOF
2023-06-17 00:06:38 +01:00
[ Service]
ManagedOOMPreference = avoid
EOF
2021-03-05 18:36:04 +09:00
systemctl daemon-reload
2024-05-11 19:17:13 +02:00
systemctl start TEST-55-OOMD-testchill.service
systemctl start TEST-55-OOMD-testmunch.service
systemctl start TEST-55-OOMD-testbloat.service
2021-03-05 18:36:04 +09:00
2024-02-09 18:53:19 +01:00
for _ in { 0..59} ; do
2024-05-11 19:17:13 +02:00
if ! systemctl status TEST-55-OOMD-testmunch.service; then
2021-03-05 18:36:04 +09:00
break
fi
2023-06-17 00:07:32 +01:00
oomctl
2021-09-12 00:02:31 -07:00
sleep 2
2021-03-05 18:36:04 +09:00
done
# testmunch should be killed since testbloat had the avoid xattr on it
2024-05-11 19:17:13 +02:00
if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi
2024-10-12 02:44:22 +09:00
if ! check_killed TEST-55-OOMD-testmunch.service; then exit 43; fi
2024-05-11 19:17:13 +02:00
if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi
2024-10-16 15:18:41 +09:00
systemctl kill --signal= KILL TEST-55-OOMD-testbloat.service || :
systemctl kill --signal= KILL TEST-55-OOMD-testmunch.service || :
systemctl stop TEST-55-OOMD-testbloat.service
systemctl stop TEST-55-OOMD-testmunch.service
systemctl stop TEST-55-OOMD-testchill.service
systemctl stop TEST-55-OOMD-workload.slice
2024-10-14 20:49:54 -07:00
# clean up overrides since test cases can be run in any order
# and overrides shouldn't affect other tests
rm -rf /run/systemd/system/TEST-55-OOMD-testbloat.service.d
systemctl daemon-reload
}
testcase_duration_analyze( ) {
# Verify memory pressure duration is valid if >= 1 second
cat <<EOF >/tmp/TE ST-55-OOMD-valid-duration.service
[ Service]
ExecStart = echo hello
ManagedOOMMemoryPressureDurationSec = 1s
EOF
# Verify memory pressure duration is invalid if < 1 second
cat <<EOF >/tmp/TE ST-55-OOMD-invalid-duration.service
[ Service]
ExecStart = echo hello
ManagedOOMMemoryPressureDurationSec = 0
EOF
systemd-analyze --recursive-errors= no verify /tmp/TEST-55-OOMD-valid-duration.service
( ! systemd-analyze --recursive-errors= no verify /tmp/TEST-55-OOMD-invalid-duration.service)
rm -f /tmp/TEST-55-OOMD-valid-duration.service
rm -f /tmp/TEST-55-OOMD-invalid-duration.service
}
testcase_duration_override( ) {
# Verify memory pressure duration can be overriden to non-zero values
mkdir -p /run/systemd/system/TEST-55-OOMD-testmunch.service.d/
cat >/run/systemd/system/TEST-55-OOMD-testmunch.service.d/99-duration-test.conf <<EOF
[ Service]
ManagedOOMMemoryPressureDurationSec = 3s
ManagedOOMMemoryPressure = kill
EOF
# Verify memory pressure duration will use default if set to empty
mkdir -p /run/systemd/system/TEST-55-OOMD-testchill.service.d/
cat >/run/systemd/system/TEST-55-OOMD-testchill.service.d/99-duration-test.conf <<EOF
[ Service]
ManagedOOMMemoryPressureDurationSec =
ManagedOOMMemoryPressure = kill
EOF
systemctl daemon-reload
systemctl start TEST-55-OOMD-testmunch.service
systemctl start TEST-55-OOMD-testchill.service
timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-testmunch.service"; do sleep 1; done'
oomctl | grep -A 2 "/TEST-55-OOMD-testmunch.service" | grep "Memory Pressure Duration: 3s"
timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-testchill.service"; do sleep 1; done'
oomctl | grep -A 2 "/TEST-55-OOMD-testchill.service" | grep "Memory Pressure Duration: 2s"
[ [ " $( systemctl show -P ManagedOOMMemoryPressureDurationUSec TEST-55-OOMD-testmunch.service) " = = "3s" ] ]
[ [ " $( systemctl show -P ManagedOOMMemoryPressureDurationUSec TEST-55-OOMD-testchill.service) " = = "[not set]" ] ]
for _ in { 0..59} ; do
if ! systemctl status TEST-55-OOMD-testmunch.service; then
break
fi
oomctl
sleep 2
done
2024-10-12 02:44:22 +09:00
if ! check_killed TEST-55-OOMD-testmunch.service; then exit 44; fi
2024-10-14 20:49:54 -07:00
if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi
systemctl kill --signal= KILL TEST-55-OOMD-testmunch.service || :
systemctl stop TEST-55-OOMD-testmunch.service
systemctl stop TEST-55-OOMD-testchill.service
systemctl stop TEST-55-OOMD-workload.slice
# clean up overrides since test cases can be run in any order
# and overrides shouldn't affect other tests
rm -rf /run/systemd/system/TEST-55-OOMD-testmunch.service.d
rm -rf /run/systemd/system/TEST-55-OOMD-testchill.service.d
systemctl daemon-reload
2024-10-16 15:04:07 +09:00
}
run_testcases
2021-03-05 18:36:04 +09:00
systemd-analyze log-level info
2023-07-12 15:49:55 +02:00
touch /testok