From b1fd5cd4eda02a323db93d7daa97f5138f89677d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sun, 6 Nov 2022 20:34:21 +0100 Subject: [PATCH 1/5] Revert "initrd: extend SYSTEMD_IN_INITRD to accept non-ramfs rootfs" This reverts commit 1f22621ba33f8089d2ae5fbcaf8b3970dd68aaf0. As described in the reverted commit, we don't want to get rid of the check completely. But the check requires opting-in by setting SYSTEMD_IN_INITRD=lenient, which is cumbersome and doesn't seem to actually happen. https://bugzilla.redhat.com/show_bug.cgi?id=2137631 is caused by systemd refusing to treat the system as an initrd because overlayfs is used. Let's revert this approach and do something that doesn't require opt-in instead. I don't think it makes sense to keep support for "SYSTEMD_IN_INITRD=lenient" or "SYSTEMD_IN_INITRD=auto". To get "auto" behaviour, just unset the option. And "lenient" will be reimplemented as a better check. Thus the changes to the option interface are completely reverted. --- docs/ENVIRONMENT.md | 10 +++------ src/basic/initrd-util.c | 47 +++++++---------------------------------- 2 files changed, 11 insertions(+), 46 deletions(-) diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md index a840dd0c90..ab3add6031 100644 --- a/docs/ENVIRONMENT.md +++ b/docs/ENVIRONMENT.md @@ -73,13 +73,9 @@ All tools: (relevant in particular for the system manager and `systemd-hostnamed`). Must be a valid hostname (either a single label or a FQDN). -* `$SYSTEMD_IN_INITRD=[auto|lenient|0|1]` — if set, specifies initrd detection - method. Defaults to `auto`. Behavior is defined as follows: - `auto`: Checks if `/etc/initrd-release` exists, and a temporary fs is mounted - on `/`. If both conditions meet, then it's in initrd. - `lenient`: Similar to `auto`, but the rootfs check is skipped. - `0|1`: Simply overrides initrd detection. This is useful for debugging and - testing initrd-only programs in the main system. +* `$SYSTEMD_IN_INITRD` — takes a boolean. If set, overrides initrd detection. + This is useful for debugging and testing initrd-only programs in the main + system. * `$SYSTEMD_BUS_TIMEOUT=SECS` — specifies the maximum time to wait for method call completion. If no time unit is specified, assumes seconds. The usual other units diff --git a/src/basic/initrd-util.c b/src/basic/initrd-util.c index 2b6809aea5..3624dcca56 100644 --- a/src/basic/initrd-util.c +++ b/src/basic/initrd-util.c @@ -12,13 +12,11 @@ static int saved_in_initrd = -1; bool in_initrd(void) { int r; - const char *e; - bool lenient = false; if (saved_in_initrd >= 0) return saved_in_initrd; - /* We have two checks here: + /* We make two checks here: * * 1. the flag file /etc/initrd-release must exist * 2. the root file system must be a memory file system @@ -26,46 +24,17 @@ bool in_initrd(void) { * The second check is extra paranoia, since misdetecting an * initrd can have bad consequences due the initrd * emptying when transititioning to the main systemd. - * - * If env var $SYSTEMD_IN_INITRD is not set or set to "auto", - * both checks are used. If it's set to "lenient", only check - * 1 is used. If set to a boolean value, then the boolean - * value is returned. */ - e = secure_getenv("SYSTEMD_IN_INITRD"); - if (e) { - if (streq(e, "lenient")) - lenient = true; - else if (!streq(e, "auto")) { - r = parse_boolean(e); - if (r >= 0) { - saved_in_initrd = r > 0; - return saved_in_initrd; - } - log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m"); - } - } - - if (!lenient) { - r = path_is_temporary_fs("/"); - if (r < 0) - log_debug_errno(r, "Couldn't determine if / is a temporary file system: %m"); + r = getenv_bool_secure("SYSTEMD_IN_INITRD"); + if (r < 0 && r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m"); + if (r >= 0) saved_in_initrd = r > 0; - } - - r = access("/etc/initrd-release", F_OK); - if (r >= 0) { - if (saved_in_initrd == 0) - log_debug("/etc/initrd-release exists, but it's not an initrd."); - else - saved_in_initrd = 1; - } else { - if (errno != ENOENT) - log_debug_errno(errno, "Failed to test if /etc/initrd-release exists: %m"); - saved_in_initrd = 0; - } + else + saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 && + path_is_temporary_fs("/") > 0; return saved_in_initrd; } From 3ca4ec20120a88de815200400607e679df7f0783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 7 Nov 2022 11:21:29 +0100 Subject: [PATCH 2/5] shared/mount-util: fix comment Just typos and grammar. In the end didn't add a use of a function, but I read the comment carefully, and this commit is the result of that. --- src/shared/mount-util.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 6742b7c755..1f827e2061 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -718,20 +718,18 @@ int mount_option_mangle( _cleanup_free_ char *ret = NULL; int r; - /* This extracts mount flags from the mount options, and store + /* This extracts mount flags from the mount options, and stores * non-mount-flag options to '*ret_remaining_options'. * E.g., * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000" * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and * "size=1630748k,mode=700,uid=1000,gid=1000". - * See more examples in test-mount-utils.c. + * See more examples in test-mount-util.c. * - * Note that if 'options' does not contain any non-mount-flag options, + * If 'options' does not contain any non-mount-flag options, * then '*ret_remaining_options' is set to NULL instead of empty string. - * Note that this does not check validity of options stored in - * '*ret_remaining_options'. - * Note that if 'options' is NULL, then this just copies 'mount_flags' - * to '*ret_mount_flags'. */ + * The validity of options stored in '*ret_remaining_options' is not checked. + * If 'options' is NULL, this just copies 'mount_flags' to *ret_mount_flags. */ assert(ret_mount_flags); assert(ret_remaining_options); From 08221c5743490f4a14f05767aa45de25a1827817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 7 Nov 2022 11:49:36 +0100 Subject: [PATCH 3/5] shared: make libmount_parse() non-inline Back in e2857b3d87306d93f0fba526f3e79f4f6806fb02 I added this function as static inline in order to avoid linking libmount into libshared. Nevertheless, a dependency on libmount was added to libbasic in 9e7f941acb0d8fe7a31eec7826ff2c9c6af7044f, and later moved to libshared in 77c772f227d866331560a8d0487fba12dd128dd4. So the shenanigan with an inline function is not useful, let's make it a normal function. --- src/shared/libmount-util.c | 40 ++++++++++++++++++++++++++++++++++++++ src/shared/libmount-util.h | 35 ++------------------------------- src/shared/meson.build | 1 + 3 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 src/shared/libmount-util.c diff --git a/src/shared/libmount-util.c b/src/shared/libmount-util.c new file mode 100644 index 0000000000..bcb21b5aba --- /dev/null +++ b/src/shared/libmount-util.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "libmount-util.h" + +int libmount_parse( + const char *path, + FILE *source, + struct libmnt_table **ret_table, + struct libmnt_iter **ret_iter) { + + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; + int r; + + /* Older libmount seems to require this. */ + assert(!source || path); + + table = mnt_new_table(); + iter = mnt_new_iter(MNT_ITER_FORWARD); + if (!table || !iter) + return -ENOMEM; + + /* If source or path are specified, we use on the functions which ignore utab. + * Only if both are empty, we use mnt_table_parse_mtab(). */ + + if (source) + r = mnt_table_parse_stream(table, source, path); + else if (path) + r = mnt_table_parse_file(table, path); + else + r = mnt_table_parse_mtab(table, NULL); + if (r < 0) + return r; + + *ret_table = TAKE_PTR(table); + *ret_iter = TAKE_PTR(iter); + return 0; +} diff --git a/src/shared/libmount-util.h b/src/shared/libmount-util.h index cf19c56ed1..6f6f36ad52 100644 --- a/src/shared/libmount-util.h +++ b/src/shared/libmount-util.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once -#include - /* This needs to be after sys/mount.h */ #include @@ -11,37 +9,8 @@ DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct libmnt_table*, mnt_free_table, NULL); DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct libmnt_iter*, mnt_free_iter, NULL); -static inline int libmount_parse( +int libmount_parse( const char *path, FILE *source, struct libmnt_table **ret_table, - struct libmnt_iter **ret_iter) { - - _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; - _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; - int r; - - /* Older libmount seems to require this. */ - assert(!source || path); - - table = mnt_new_table(); - iter = mnt_new_iter(MNT_ITER_FORWARD); - if (!table || !iter) - return -ENOMEM; - - /* If source or path are specified, we use on the functions which ignore utab. - * Only if both are empty, we use mnt_table_parse_mtab(). */ - - if (source) - r = mnt_table_parse_stream(table, source, path); - else if (path) - r = mnt_table_parse_file(table, path); - else - r = mnt_table_parse_mtab(table, NULL); - if (r < 0) - return r; - - *ret_table = TAKE_PTR(table); - *ret_iter = TAKE_PTR(iter); - return 0; -} + struct libmnt_iter **ret_iter); diff --git a/src/shared/meson.build b/src/shared/meson.build index bdf1701ba9..5f66b865de 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -193,6 +193,7 @@ shared_sources = files( 'libcrypt-util.h', 'libfido2-util.c', 'libfido2-util.h', + 'libmount-util.c', 'libmount-util.h', 'linux/auto_dev-ioctl.h', 'linux/bpf.h', From 3ed332e77a9c85a28529cdd267d8681cc652f42c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 8 Nov 2022 14:18:46 +0100 Subject: [PATCH 4/5] test-fd-util: fix typos and use log_tests_skipped() --- src/test/test-fd-util.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c index 5b5a712469..2b85ceab82 100644 --- a/src/test/test-fd-util.c +++ b/src/test/test-fd-util.c @@ -322,8 +322,8 @@ TEST(close_all_fds) { int r; /* Runs the test four times. Once as is. Once with close_range() syscall blocked via seccomp, once - * with /proc overmounted, and once with the combination of both. This should trigger all fallbacks in - * the close_range_all() function. */ + * with /proc/ overmounted, and once with the combination of both. This should trigger all fallbacks + * in the close_range_all() function. */ r = safe_fork("(caf-plain)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL); if (r == 0) { @@ -332,26 +332,22 @@ TEST(close_all_fds) { } assert_se(r >= 0); - if (geteuid() != 0) { - log_notice("Lacking privileges, skipping running tests with blocked close_range() and with /proc/ overnmounted."); - return; - } + if (geteuid() != 0) + return (void) log_tests_skipped("Lacking privileges for test with close_range() blocked and /proc/ overmounted"); r = safe_fork("(caf-noproc)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, NULL); if (r == 0) { r = mount_nofollow_verbose(LOG_WARNING, "tmpfs", "/proc", "tmpfs", 0, NULL); if (r < 0) - log_notice("Overmounting /proc didn#t work, skipping close_all_fds() with masked /proc/."); + log_notice("Overmounting /proc/ didn't work, skipping close_all_fds() with masked /proc/."); else test_close_all_fds_inner(); _exit(EXIT_SUCCESS); } assert_se(r >= 0); - if (!is_seccomp_available()) { - log_notice("Seccomp not available, skipping seccomp tests in %s", __func__); - return; - } + if (!is_seccomp_available()) + return (void) log_tests_skipped("Seccomp not available"); r = safe_fork("(caf-seccomp)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL); if (r == 0) { @@ -373,7 +369,7 @@ TEST(close_all_fds) { else { r = mount_nofollow_verbose(LOG_WARNING, "tmpfs", "/proc", "tmpfs", 0, NULL); if (r < 0) - log_notice("Overmounting /proc didn#t work, skipping close_all_fds() with masked /proc/."); + log_notice("Overmounting /proc/ didn't work, skipping close_all_fds() with masked /proc/."); else test_close_all_fds_inner(); } From a940f507fbe1c81d6787dc0b7ce232c39818eec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 7 Nov 2022 12:40:20 +0100 Subject: [PATCH 5/5] pid1: skip cleanup if root is not tmpfs/ramfs in_initrd() was really doing two things: checking if we're in the initrd, and also verifying that the initrd is set up correctly. But this second check is complicated, in particular it would return false for overlayfs, even with an upper tmpfs layer. It also doesn't support the use case of having an initial initrd with tmpfs, and then transitioning into an intermediate initrd that is e.g. a DDI, i.e. a filesystem possibly with verity arranged as a disk image. We don't need to check if we're in initrd in every program. Instead, concerns are separated: - in_initrd() just does a simple check for /etc/initrd-release. - When doing cleanup, pid1 checks if it's on a tmpfs before starting to wipe the old root. The only case where we want to remove the old root is when we're on a plain tempory filesystem. With an overlay, we'd be creating whiteout files, which is not very useful. (*) This should resolve https://bugzilla.redhat.com/show_bug.cgi?id=2137631 which is caused by systemd refusing to treat the system as an initrd because overlayfs is used. (*) I think the idea of keeping the initrd fs around for shutdown is outdated. We should just have a completely separate exitrd that is unpacked when we want to shut down. This way, we don't waste memory at runtime, and we also don't transition to a potentially older version of systemd. But we don't have support for this yet. This replaces 0fef5b0f0bd9ded1ae7bcb3e4e4b2893e36c51a6. --- src/basic/initrd-util.c | 20 +++++++++----------- src/shared/switch-root.c | 22 ++++++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/basic/initrd-util.c b/src/basic/initrd-util.c index 3624dcca56..03ccfbe483 100644 --- a/src/basic/initrd-util.c +++ b/src/basic/initrd-util.c @@ -3,6 +3,7 @@ #include #include "env-util.h" +#include "errno-util.h" #include "initrd-util.h" #include "parse-util.h" #include "stat-util.h" @@ -16,14 +17,8 @@ bool in_initrd(void) { if (saved_in_initrd >= 0) return saved_in_initrd; - /* We make two checks here: - * - * 1. the flag file /etc/initrd-release must exist - * 2. the root file system must be a memory file system - * - * The second check is extra paranoia, since misdetecting an - * initrd can have bad consequences due the initrd - * emptying when transititioning to the main systemd. + /* If /etc/initrd-release exists, we're in an initrd. + * This can be overridden by setting SYSTEMD_IN_INITRD=0|1. */ r = getenv_bool_secure("SYSTEMD_IN_INITRD"); @@ -32,9 +27,12 @@ bool in_initrd(void) { if (r >= 0) saved_in_initrd = r > 0; - else - saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 && - path_is_temporary_fs("/") > 0; + else { + r = RET_NERRNO(access("/etc/initrd-release", F_OK)); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to check if /etc/initrd-release exists, assuming it does not: %m"); + saved_in_initrd = r >= 0; + } return saved_in_initrd; } diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c index 0b93312bbf..8d4c07150e 100644 --- a/src/shared/switch-root.c +++ b/src/shared/switch-root.c @@ -32,7 +32,6 @@ int switch_root(const char *new_root, _cleanup_free_ char *resolved_old_root_after = NULL; _cleanup_close_ int old_root_fd = -1; - bool old_root_remove; int r; assert(new_root); @@ -42,12 +41,16 @@ int switch_root(const char *new_root, return 0; /* Check if we shall remove the contents of the old root */ - old_root_remove = in_initrd(); - if (old_root_remove) { - old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY); - if (old_root_fd < 0) - return log_error_errno(errno, "Failed to open root directory: %m"); - } + old_root_fd = open("/", O_RDONLY | O_CLOEXEC | O_DIRECTORY); + if (old_root_fd < 0) + return log_error_errno(errno, "Failed to open root directory: %m"); + r = fd_is_temporary_fs(old_root_fd); + if (r < 0) + return log_error_errno(r, "Failed to stat root directory: %m"); + if (r > 0) + log_debug("Root directory is on tmpfs, will do cleanup later."); + else + old_root_fd = safe_close(old_root_fd); /* Determine where we shall place the old root after the transition */ r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL); @@ -117,9 +120,8 @@ int switch_root(const char *new_root, struct stat rb; if (fstat(old_root_fd, &rb) < 0) - log_warning_errno(errno, "Failed to stat old root directory, leaving: %m"); - else - (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */ + return log_error_errno(errno, "Failed to stat old root directory: %m"); + (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */ } return 0;