1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-28 02:50:16 +03:00

mount-setup: generalize cgroupfs_recursiveprot_supported()

This commit is contained in:
Mike Yuan 2025-03-06 12:56:29 +01:00
parent 104587314f
commit 39b0e764bd
No known key found for this signature in database
GPG Key ID: 417471C0A40F58B3
4 changed files with 24 additions and 36 deletions

3
README
View File

@ -52,7 +52,8 @@ REQUIREMENTS:
# FIXME: actually drop compat glue before v258
Linux kernel ≥ 5.6 for getrandom() GRND_INSECURE
≥ 5.7 for CLONE_INTO_CGROUP, BPF links and the BPF LSM hook
≥ 5.7 for CLONE_INTO_CGROUP, cgroup2fs memory_recursiveprot option,
BPF links and the BPF LSM hook
⚠️ Kernel versions below 5.7 ("recommended baseline") have significant gaps
in functionality and are not recommended for use with this version

View File

@ -26,6 +26,7 @@
#include "loopback-setup.h"
#include "missing_syscall.h"
#include "mkdir-label.h"
#include "mount-setup.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "namespace-util.h"
@ -1373,34 +1374,16 @@ static int mount_private_sysfs(const MountEntry *m, const NamespaceParameters *p
return mount_private_apivfs("sysfs", mount_entry_path(m), "/sys", /* opts = */ NULL, p->runtime_scope);
}
static bool check_recursiveprot_supported(void) {
int r;
/* memory_recursiveprot is only supported for kernels >= 5.7. Note mount_option_supported uses fsopen()
* and fsconfig() which are supported for kernels >= 5.2. So if mount_option_supported() returns an
* error, we can assume memory_recursiveprot is not supported. */
r = mount_option_supported("cgroup2", "memory_recursiveprot", NULL);
if (r < 0)
log_debug_errno(r, "Failed to determine whether the 'memory_recursiveprot' mount option is supported, assuming not: %m");
else if (r == 0)
log_debug("This kernel version does not support 'memory_recursiveprot', not using mount option.");
return r > 0;
}
static int mount_private_cgroup2fs(const MountEntry *m, const NamespaceParameters *p) {
_cleanup_free_ char *opts = NULL;
assert(m);
assert(p);
if (check_recursiveprot_supported()) {
opts = strdup(strempty(mount_entry_options(m)));
if (cgroupfs_recursiveprot_supported()) {
opts = strextend_with_separator(NULL, ",", mount_entry_options(m) ?: POINTER_MAX, "memory_recursiveprot");
if (!opts)
return -ENOMEM;
if (!strextend_with_separator(&opts, ",", "memory_recursiveprot"))
return -ENOMEM;
}
return mount_private_apivfs("cgroup2", mount_entry_path(m), "/sys/fs/cgroup", opts ?: mount_entry_options(m), p->runtime_scope);

View File

@ -52,27 +52,19 @@ typedef struct MountPoint {
MountMode mode;
} MountPoint;
/* The first three entries we might need before SELinux is up. The
* fourth (securityfs) is needed by IMA to load a custom policy. The
* other ones we can delay until SELinux and IMA are loaded. When
* SMACK is enabled we need smackfs, too, so it's a fifth one. */
#if ENABLE_SMACK
#define N_EARLY_MOUNT 5
#else
#define N_EARLY_MOUNT 4
#endif
static bool check_recursiveprot_supported(void) {
bool cgroupfs_recursiveprot_supported(void) {
int r;
if (!cg_is_unified_wanted())
return false;
r = mount_option_supported("cgroup2", "memory_recursiveprot", NULL);
/* Added in kernel 5.7 */
r = mount_option_supported("cgroup2", "memory_recursiveprot", /* value = */ NULL);
if (r < 0)
log_debug_errno(r, "Failed to determine whether the 'memory_recursiveprot' mount option is supported, assuming not: %m");
log_debug_errno(r, "Failed to determine whether cgroupfs supports 'memory_recursiveprot' mount option, assuming not: %m");
else if (r == 0)
log_debug("This kernel version does not support 'memory_recursiveprot', not using mount option.");
log_debug("'memory_recursiveprot' not supported by cgroupfs, not using mount option.");
return r > 0;
}
@ -103,7 +95,7 @@ static const MountPoint mount_table[] = {
{ "tmpfs", "/run", "tmpfs", "mode=0755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
NULL, MNT_FATAL|MNT_IN_CONTAINER },
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate,memory_recursiveprot", MS_NOSUID|MS_NOEXEC|MS_NODEV,
check_recursiveprot_supported, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
cgroupfs_recursiveprot_supported, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
@ -120,6 +112,16 @@ static const MountPoint mount_table[] = {
NULL, MNT_NONE, },
};
/* The first three entries we might need before SELinux is up. The
* fourth (securityfs) is needed by IMA to load a custom policy. The
* other ones we can delay until SELinux and IMA are loaded. When
* SMACK is enabled we need smackfs, too, so it's a fifth one. */
#if ENABLE_SMACK
#define N_EARLY_MOUNT 5
#else
#define N_EARLY_MOUNT 4
#endif
assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
bool mount_point_is_api(const char *path) {

View File

@ -10,3 +10,5 @@ int mount_setup_early(void);
int mount_setup(bool loaded_policy, bool leave_propagation);
int mount_cgroup_legacy_controllers(bool loaded_policy);
bool cgroupfs_recursiveprot_supported(void);