mirror of
https://github.com/systemd/systemd.git
synced 2024-12-28 11:21:59 +03:00
f53d56f1a3
In a couple of recent CI runs I noticed TEST-36 failing due to a missed service exit notification and a subsequent fail of the `grep` command: ``` [ 257.112153] H systemd[1]: Started numa-test.service. [ 257.114343] H systemd[899]: numa-test.service: Failed to set NUMA memory policy: Invalid argument [ 257.118270] H systemd[899]: numa-test.service: Failed at step NUMA_POLICY spawning /bin/sleep: Invalid argument [ 257.126170] H systemd[1]: Bus private-bus-connection: changing state RUNNING → CLOSING [ 257.130290] H systemd[1]: numa-test.service: Failed to send unit change signal for numa-test.service: Connection reset by peer [ 257.131567] H systemd[1]: Received SIGCHLD from PID 899 ((sleep)). [ 257.132870] H systemd[1]: Got disconnect on private connection. [ 257.134299] H systemd[1]: systemd-journald.service: Got notification message from PID 498 (FDSTORE=1) [ 257.135611] H systemd[1]: systemd-journald.service: Added fd 38 (n/a) to fd store. [ 257.136999] H systemd[1]: systemd-journald.service: Received EPOLLHUP on stored fd 38 (stored), closing. [ 257.366996] H testsuite-36.sh[536]: + stopJournalctl [ 257.366996] H testsuite-36.sh[536]: + local unit=init.scope [ 257.366996] H testsuite-36.sh[536]: + echo 'Force journald to write all queued messages' [ 257.366996] H testsuite-36.sh[536]: Force journald to write all queued messages [ 257.366996] H testsuite-36.sh[536]: + journalctl --sync [ 257.488642] H systemd-journald[498]: Received client request to rotate journal. [ 257.520821] H testsuite-36.sh[536]: + journalctl -u init.scope --cursor-file=jounalCursorFile [ 257.981399] H testsuite-36.sh[536]: + pid1StopUnit numa-test.service [ 257.984533] H testsuite-36.sh[536]: + systemctl stop numa-test.service [ 258.173656] H systemd[1]: Bus private-bus-connection: changing state AUTHENTICATING → RUNNING [ 258.180710] H systemd[1]: numa-test.service: Trying to enqueue job numa-test.service/stop/replace [ 258.182424] H systemd[1]: Added job numa-test.service/stop to transaction. [ 258.185234] H systemd[1]: numa-test.service: Installed new job numa-test.service/stop as 738 [ 258.187017] H systemd[1]: numa-test.service: Enqueued job numa-test.service/stop as 738 [ 258.239930] H testsuite-36.sh[536]: + grep 'numa-test.service: Main process exited, code=exited, status=242/NUMA' journal.log ``` Let's mitigate this by checking the test service exit status directly instead of relying on the notification.
354 lines
12 KiB
Bash
Executable File
354 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# SPDX-License-Identifier: LGPL-2.1-or-later
|
|
set -eux
|
|
set -o pipefail
|
|
|
|
at_exit() {
|
|
# shellcheck disable=SC2181
|
|
if [[ $? -ne 0 ]]; then
|
|
# We're exiting with a non-zero EC, let's dump test artifacts
|
|
# for easier debugging
|
|
[[ -v straceLog && -f "$straceLog" ]] && cat "$straceLog"
|
|
[[ -v journalLog && -f "$journalLog" ]] && cat "$journalLog"
|
|
fi
|
|
}
|
|
|
|
trap at_exit EXIT
|
|
|
|
systemd-analyze log-level debug
|
|
systemd-analyze log-target journal
|
|
|
|
# Log files
|
|
straceLog='strace.log'
|
|
journalLog='journal.log'
|
|
|
|
# Systemd config files
|
|
testUnit='numa-test.service'
|
|
testUnitFile="/run/systemd/system/$testUnit"
|
|
testUnitNUMAConf="$testUnitFile.d/numa.conf"
|
|
|
|
# Sleep constants (we should probably figure out something better but nothing comes to mind)
|
|
sleepAfterStart=1
|
|
|
|
# Journal cursor for easier navigation
|
|
journalCursorFile="jounalCursorFile"
|
|
|
|
startStrace() {
|
|
coproc strace -qq -p 1 -o "$straceLog" -e set_mempolicy -s 1024 ${1:+"$1"}
|
|
# Wait for strace to properly "initialize", i.e. until PID 1 has the TracerPid
|
|
# field set to the current strace's PID
|
|
while ! awk -v spid="$COPROC_PID" '/^TracerPid:/ {exit !($2 == spid);}' /proc/1/status; do sleep 0.1; done
|
|
}
|
|
|
|
stopStrace() {
|
|
[[ -v COPROC_PID ]] || return
|
|
|
|
local PID=$COPROC_PID
|
|
kill -s TERM "$PID"
|
|
# Make sure the strace process is indeed dead
|
|
while kill -0 "$PID" 2>/dev/null; do sleep 0.1; done
|
|
}
|
|
|
|
startJournalctl() {
|
|
: >"$journalCursorFile"
|
|
# Save journal's cursor for later navigation
|
|
journalctl --no-pager --cursor-file="$journalCursorFile" -n0 -ocat
|
|
}
|
|
|
|
stopJournalctl() {
|
|
local unit="${1:-init.scope}"
|
|
# Using journalctl --sync should be better than using SIGRTMIN+1, as
|
|
# the --sync wait until the synchronization is complete
|
|
echo "Force journald to write all queued messages"
|
|
journalctl --sync
|
|
journalctl -u "$unit" --cursor-file="$journalCursorFile" >"$journalLog"
|
|
}
|
|
|
|
checkNUMA() {
|
|
# NUMA enabled system should have at least NUMA node0
|
|
test -e /sys/devices/system/node/node0
|
|
}
|
|
|
|
writePID1NUMAPolicy() {
|
|
cat >"$confDir/numa.conf" <<EOF
|
|
[Manager]
|
|
NUMAPolicy=${1:?}
|
|
NUMAMask=${2:-""}
|
|
EOF
|
|
}
|
|
|
|
writeTestUnit() {
|
|
mkdir -p "$testUnitFile.d/"
|
|
printf "[Service]\nExecStart=/bin/sleep 3600\n" >"$testUnitFile"
|
|
}
|
|
|
|
writeTestUnitNUMAPolicy() {
|
|
cat >"$testUnitNUMAConf" <<EOF
|
|
[Service]
|
|
NUMAPolicy=${1:?}
|
|
NUMAMask=${2:-""}
|
|
EOF
|
|
systemctl daemon-reload
|
|
}
|
|
|
|
pid1ReloadWithStrace() {
|
|
startStrace
|
|
systemctl daemon-reload
|
|
sleep $sleepAfterStart
|
|
stopStrace
|
|
}
|
|
|
|
pid1ReloadWithJournal() {
|
|
startJournalctl
|
|
systemctl daemon-reload
|
|
stopJournalctl
|
|
}
|
|
|
|
pid1StartUnitWithStrace() {
|
|
startStrace '-f'
|
|
systemctl start "${1:?}"
|
|
sleep $sleepAfterStart
|
|
stopStrace
|
|
}
|
|
|
|
pid1StartUnitWithJournal() {
|
|
startJournalctl
|
|
systemctl start "${1:?}"
|
|
sleep $sleepAfterStart
|
|
stopJournalctl
|
|
}
|
|
|
|
pid1StopUnit() {
|
|
systemctl stop "${1:?}"
|
|
}
|
|
|
|
systemctlCheckNUMAProperties() {
|
|
local UNIT_NAME="${1:?}"
|
|
local NUMA_POLICY="${2:?}"
|
|
local NUMA_MASK="${3:-""}"
|
|
local LOGFILE
|
|
|
|
LOGFILE="$(mktemp)"
|
|
|
|
systemctl show -p NUMAPolicy "$UNIT_NAME" >"$LOGFILE"
|
|
grep "NUMAPolicy=$NUMA_POLICY" "$LOGFILE"
|
|
|
|
: >"$LOGFILE"
|
|
|
|
if [ -n "$NUMA_MASK" ]; then
|
|
systemctl show -p NUMAMask "$UNIT_NAME" >"$LOGFILE"
|
|
grep "NUMAMask=$NUMA_MASK" "$LOGFILE"
|
|
fi
|
|
}
|
|
|
|
writeTestUnit
|
|
|
|
# Create systemd config drop-in directory
|
|
confDir="/run/systemd/system.conf.d/"
|
|
mkdir -p "$confDir"
|
|
|
|
if ! checkNUMA; then
|
|
echo >&2 "NUMA is not supported on this machine, switching to a simple sanity check"
|
|
|
|
echo "PID1 NUMAPolicy=default && NUMAMask=0 check without NUMA support"
|
|
writePID1NUMAPolicy "default" "0"
|
|
startJournalctl
|
|
systemctl daemon-reload
|
|
stopJournalctl
|
|
grep "NUMA support not available, ignoring" "$journalLog"
|
|
|
|
echo "systemd-run NUMAPolicy=default && NUMAMask=0 check without NUMA support"
|
|
runUnit='numa-systemd-run-test.service'
|
|
startJournalctl
|
|
systemd-run -p NUMAPolicy=default -p NUMAMask=0 --unit "$runUnit" sleep 1000
|
|
sleep $sleepAfterStart
|
|
pid1StopUnit "$runUnit"
|
|
stopJournalctl "$runUnit"
|
|
grep "NUMA support not available, ignoring" "$journalLog"
|
|
|
|
else
|
|
echo "PID1 NUMAPolicy support - Default policy w/o mask"
|
|
writePID1NUMAPolicy "default"
|
|
pid1ReloadWithStrace
|
|
# Kernel requires that nodemask argument is set to NULL when setting default policy
|
|
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Default policy w/ mask"
|
|
writePID1NUMAPolicy "default" "0"
|
|
pid1ReloadWithStrace
|
|
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Bind policy w/o mask"
|
|
writePID1NUMAPolicy "bind"
|
|
pid1ReloadWithJournal
|
|
grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Bind policy w/ mask"
|
|
writePID1NUMAPolicy "bind" "0"
|
|
pid1ReloadWithStrace
|
|
grep -P "set_mempolicy\(MPOL_BIND, \[0x0*1\]" "$straceLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Interleave policy w/o mask"
|
|
writePID1NUMAPolicy "interleave"
|
|
pid1ReloadWithJournal
|
|
grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Interleave policy w/ mask"
|
|
writePID1NUMAPolicy "interleave" "0"
|
|
pid1ReloadWithStrace
|
|
grep -P "set_mempolicy\(MPOL_INTERLEAVE, \[0x0*1\]" "$straceLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Preferred policy w/o mask"
|
|
writePID1NUMAPolicy "preferred"
|
|
pid1ReloadWithJournal
|
|
# Preferred policy with empty node mask is actually allowed and should reset allocation policy to default
|
|
grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog" && { echo >&2 "unexpected pass"; exit 1; }
|
|
|
|
echo "PID1 NUMAPolicy support - Preferred policy w/ mask"
|
|
writePID1NUMAPolicy "preferred" "0"
|
|
pid1ReloadWithStrace
|
|
grep -P "set_mempolicy\(MPOL_PREFERRED, \[0x0*1\]" "$straceLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Local policy w/o mask"
|
|
writePID1NUMAPolicy "local"
|
|
pid1ReloadWithStrace
|
|
# Kernel requires that nodemask argument is set to NULL when setting default policy
|
|
# The unpatched versions of strace don't recognize the MPOL_LOCAL constant and
|
|
# return a numerical constant instead (with a comment):
|
|
# set_mempolicy(0x4 /* MPOL_??? */, NULL, 0) = 0
|
|
# Let's cover this scenario as well
|
|
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
|
|
|
|
echo "PID1 NUMAPolicy support - Local policy w/ mask"
|
|
writePID1NUMAPolicy "local" "0"
|
|
pid1ReloadWithStrace
|
|
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Default policy w/o mask"
|
|
writeTestUnitNUMAPolicy "default"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "default"
|
|
pid1StopUnit "$testUnit"
|
|
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Default policy w/ mask"
|
|
writeTestUnitNUMAPolicy "default" "0"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "default" "0"
|
|
pid1StopUnit $testUnit
|
|
# Mask must be ignored
|
|
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Bind policy w/o mask"
|
|
writeTestUnitNUMAPolicy "bind"
|
|
pid1StartUnitWithJournal "$testUnit"
|
|
pid1StopUnit "$testUnit"
|
|
[[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]]
|
|
|
|
echo "Unit file NUMAPolicy support - Bind policy w/ mask"
|
|
writeTestUnitNUMAPolicy "bind" "0"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "bind" "0"
|
|
pid1StopUnit "$testUnit"
|
|
grep -P "set_mempolicy\(MPOL_BIND, \[0x0*1\]" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Interleave policy w/o mask"
|
|
writeTestUnitNUMAPolicy "interleave"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
pid1StopUnit "$testUnit"
|
|
[[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]]
|
|
|
|
echo "Unit file NUMAPolicy support - Interleave policy w/ mask"
|
|
writeTestUnitNUMAPolicy "interleave" "0"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "interleave" "0"
|
|
pid1StopUnit "$testUnit"
|
|
grep -P "set_mempolicy\(MPOL_INTERLEAVE, \[0x0*1\]" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Preferred policy w/o mask"
|
|
writeTestUnitNUMAPolicy "preferred"
|
|
pid1StartUnitWithJournal "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "preferred"
|
|
pid1StopUnit "$testUnit"
|
|
[[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]] && { echo >&2 "unexpected pass"; exit 1; }
|
|
|
|
echo "Unit file NUMAPolicy support - Preferred policy w/ mask"
|
|
writeTestUnitNUMAPolicy "preferred" "0"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "preferred" "0"
|
|
pid1StopUnit "$testUnit"
|
|
grep -P "set_mempolicy\(MPOL_PREFERRED, \[0x0*1\]" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Local policy w/o mask"
|
|
writeTestUnitNUMAPolicy "local"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "local"
|
|
pid1StopUnit "$testUnit"
|
|
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
|
|
|
|
echo "Unit file NUMAPolicy support - Local policy w/ mask"
|
|
writeTestUnitNUMAPolicy "local" "0"
|
|
pid1StartUnitWithStrace "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "local" "0"
|
|
pid1StopUnit "$testUnit"
|
|
# Mask must be ignored
|
|
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
|
|
|
|
echo "Unit file CPUAffinity=NUMA support"
|
|
writeTestUnitNUMAPolicy "bind" "0"
|
|
echo "CPUAffinity=numa" >>"$testUnitNUMAConf"
|
|
systemctl daemon-reload
|
|
systemctl start "$testUnit"
|
|
systemctlCheckNUMAProperties "$testUnit" "bind" "0"
|
|
cpulist="$(cat /sys/devices/system/node/node0/cpulist)"
|
|
affinity_systemd="$(systemctl show --value -p CPUAffinity "$testUnit")"
|
|
[ "$cpulist" = "$affinity_systemd" ]
|
|
pid1StopUnit "$testUnit"
|
|
|
|
echo "systemd-run NUMAPolicy support"
|
|
runUnit='numa-systemd-run-test.service'
|
|
|
|
systemd-run -p NUMAPolicy=default --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "default"
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=default -p NUMAMask=0 --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "default" ""
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=bind -p NUMAMask=0 --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "bind" "0"
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=interleave -p NUMAMask=0 --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "interleave" "0"
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=preferred -p NUMAMask=0 --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "preferred" "0"
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=local --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "local"
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "local" ""
|
|
pid1StopUnit "$runUnit"
|
|
|
|
systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit "$runUnit" sleep 1000
|
|
systemctlCheckNUMAProperties "$runUnit" "local" ""
|
|
systemctl cat "$runUnit" | grep -q 'CPUAffinity=numa'
|
|
pid1StopUnit "$runUnit"
|
|
fi
|
|
|
|
# Cleanup
|
|
rm -rf "$confDir"
|
|
systemctl daemon-reload
|
|
|
|
systemd-analyze log-level info
|
|
|
|
echo OK >/testok
|
|
|
|
exit 0
|