1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-12 13:18:14 +03:00
systemd/test/units/testsuite-04.bsod.sh
Frantisek Sumsal 7bd41e63f7 test: properly preserve journal from sd-bsod tests
I (incorrectly) assumed that --relinquish-var does everything --flush
does, including moving already existing stuff from /var/log/journal/ to
/run/log/journal/, but that's not the case. To actually do that we need
to shuffle things manually, so let's do just that.

This should make issues like #31334 easier to debug, since with this
patch we now have a coredump in the test journal as well:

~# make -C test/TEST-04-JOURNAL/ clean setup run TEST_MATCH_SUBTEST=bsod BUILD_DIR=$PWD/build TEST_NO_NSPAWN=1
...
[   12.176089] testsuite-04.sh[712]: + echo 'Subtest /usr/lib/systemd/tests/testdata/units/testsuite-04.bsod.sh failed'
[   12.176089] testsuite-04.sh[712]: Subtest /usr/lib/systemd/tests/testdata/units/testsuite-04.bsod.sh failed
[   12.176089] testsuite-04.sh[712]: + return 1
[   12.177347] systemd[1]: testsuite-04.service: Failed with result 'exit-code'.
[   12.220580] systemd[1]: Failed to start testsuite-04.service.
Spawning getter /home/mrc0mmand/repos/@systemd/systemd/build/journalctl -o export -D /var/tmp/systemd-tests/systemd-test.Qtqmmr/root/var/log/journal...
Finishing after writing 7649 entries
TEST-04-JOURNAL: (failed; see logs)
-rw-r----- 1 root root 16777216 Feb 15 21:13 /var/tmp/systemd-tests/systemd-test.Qtqmmr/system.journal
...

~# coredumpctl --file /var/tmp/systemd-tests/systemd-test.Qtqmmr/system.journal
TIME                        PID UID GID SIG     COREFILE EXE                           SIZE
Thu 2024-02-15 21:13:38 CET 812   0   0 SIGABRT journal  /usr/lib/systemd/systemd-bsod    -
2024-02-16 10:37:37 +01:00

119 lines
4.4 KiB
Bash
Executable File

#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
if systemd-detect-virt -cq; then
echo "This test requires a VM, skipping the test"
exit 0
fi
# shellcheck disable=SC2317
at_exit() {
local EC=$?
if [[ $EC -ne 0 ]] && [[ -e /tmp/console.dump ]]; then
cat /tmp/console.dump
fi
if mountpoint -q /var/log/journal; then
# In order to preserve the journal from the just run test we need to do a little dance, as
# --relinquish-var is not a "true" opposite of --flush, meaning that it won't move the existing
# journal(s) from /var/log/ to /run/log/. To do that, let's rotate the journal first, so all
# important bits are in the archived journal(s)...
journalctl --rotate
# ...then instruct sd-journald to write further entries to the runtime journal...
journalctl --relinquish-var
# ...make sure there are no outstanding writes to the persistent journal that might block us from
# unmounting the tmpfs...
journalctl --sync
# ...move the archived journals to the runtime storage...
mv -v "/var/log/journal/$(</etc/machine-id)"/system@*.journal "/run/log/journal/$(</etc/machine-id)/"
# ...get rid of the tmpfs on /var/log/journal/...
umount /var/log/journal
# ...and finally flush everything to the "real" persistent journal, so we can collect it after the
# test finishes.
journalctl --flush
fi
return 0
}
vcs_dump_and_check() {
local expected_message="${1:?}"
# It might take a while before the systemd-bsod stuff appears on the VCS,
# so try it a couple of times
for _ in {0..9}; do
setterm --term linux --dump --file /tmp/console.dump
if grep -aq "Press any key to exit" /tmp/console.dump &&
grep -aq "$expected_message" /tmp/console.dump &&
grep -aq "The current boot has failed" /tmp/console.dump; then
return 0
fi
sleep .5
done
return 1
}
# Since systemd-bsod always fetches only the first emergency message from the
# current boot, let's temporarily overmount /var/log/journal with a tmpfs,
# as we're going to wipe it multiple times, but we need to keep the original
# journal intact for the other tests to work correctly.
trap at_exit EXIT
mount -t tmpfs tmpfs /var/log/journal
systemctl restart systemd-journald
systemctl stop systemd-bsod
# Since we just wiped the journal, there should be no emergency messages and
# systemd-bsod should be just a no-op
timeout 10s /usr/lib/systemd/systemd-bsod
setterm --term linux --dump --file /tmp/console.dump
(! grep "The current boot has failed" /tmp/console.dump)
# systemd-bsod should pick up emergency messages only with UID=0, so let's check
# that as well
systemd-run --user --machine testuser@ --wait --pipe systemd-cat -p emerg echo "User emergency message"
systemd-cat -p emerg echo "Root emergency message"
journalctl --sync
# Set $SYSTEMD_COLORS so systemd-bsod also prints out the QR code
SYSTEMD_COLORS=256 /usr/lib/systemd/systemd-bsod &
PID=$!
vcs_dump_and_check "Root emergency message"
grep -aq "Scan the QR code" /tmp/console.dump
# TODO: check if systemd-bsod exits on a key press (didn't figure this one out yet)
kill $PID
timeout 10 bash -c "while kill -0 $PID; do sleep .5; done"
# Wipe the journal
journalctl --vacuum-size=1 --rotate
(! journalctl -q -b -p emerg --grep .)
# Check the systemd-bsod.service as well
# Note: the systemd-bsod.service unit has ConditionVirtualization=no, so let's
# temporarily override it just for the test
mkdir /run/systemd/system/systemd-bsod.service.d
printf '[Unit]\nConditionVirtualization=\n' >/run/systemd/system/systemd-bsod.service.d/99-override.conf
systemctl daemon-reload
systemctl start systemd-bsod
systemd-cat -p emerg echo "Service emergency message"
vcs_dump_and_check "Service emergency message"
systemctl status systemd-bsod
systemctl stop systemd-bsod
# Wipe the journal
journalctl --vacuum-size=1 --rotate
(! journalctl -q -b -p emerg --grep .)
# Same as above, but make sure the service responds to signals even when there are
# no "emerg" messages, see systemd/systemd#30084
(! systemctl is-active systemd-bsod)
systemctl start systemd-bsod
timeout 5s bash -xec 'until systemctl is-active systemd-bsod; do sleep .5; done'
timeout 5s systemctl stop systemd-bsod
timeout 5s bash -xec 'while systemctl is-active systemd-bsod; do sleep .5; done'