1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-08 08:58:27 +03:00

Merge pull request #28764 from yuwata/core-namespace

core/namespace: cleanups
This commit is contained in:
Yu Watanabe 2023-08-23 12:43:29 +09:00 committed by GitHub
commit 244f518f1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 72 additions and 295 deletions

View File

@ -209,26 +209,30 @@ static const MountEntry protect_system_strict_table[] = {
};
static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
[INACCESSIBLE] = "inaccessible",
[OVERLAY_MOUNT] = "overlay",
[BIND_MOUNT] = "bind",
[BIND_MOUNT_RECURSIVE] = "rbind",
[PRIVATE_TMP] = "private-tmp",
[PRIVATE_DEV] = "private-dev",
[BIND_DEV] = "bind-dev",
[EMPTY_DIR] = "empty",
[PRIVATE_SYSFS] = "private-sysfs",
[BIND_SYSFS] = "bind-sysfs",
[PROCFS] = "procfs",
[READONLY] = "read-only",
[READWRITE] = "read-write",
[TMPFS] = "tmpfs",
[MOUNT_IMAGES] = "mount-images",
[READWRITE_IMPLICIT] = "rw-implicit",
[EXEC] = "exec",
[NOEXEC] = "noexec",
[MQUEUEFS] = "mqueuefs",
[MKDIR] = "mkdir",
[INACCESSIBLE] = "inaccessible",
[OVERLAY_MOUNT] = "overlay",
[MOUNT_IMAGES] = "mount-images",
[BIND_MOUNT] = "bind",
[BIND_MOUNT_RECURSIVE] = "rbind",
[PRIVATE_TMP] = "private-tmp",
[PRIVATE_TMP_READONLY] = "private-tmp-read-only",
[PRIVATE_DEV] = "private-dev",
[BIND_DEV] = "bind-dev",
[EMPTY_DIR] = "empty",
[PRIVATE_SYSFS] = "private-sysfs",
[BIND_SYSFS] = "bind-sysfs",
[PROCFS] = "procfs",
[READONLY] = "read-only",
[READWRITE] = "read-write",
[NOEXEC] = "noexec",
[EXEC] = "exec",
[TMPFS] = "tmpfs",
[RUN] = "run",
[EXTENSION_DIRECTORIES] = "extension-directories",
[EXTENSION_IMAGES] = "extension-images",
[MQUEUEFS] = "mqueuefs",
[READWRITE_IMPLICIT] = "read-write-implicit",
[MKDIR] = "mkdir",
};
/* Helper struct for naming simplicity and reusability */
@ -1047,34 +1051,7 @@ static int mount_bind_dev(const MountEntry *m) {
if (r > 0) /* make this a NOP if /dev is already a mount point */
return 0;
r = mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
if (r < 0)
return r;
return 1;
}
static int mount_private_sysfs(const MountEntry *m) {
const char *p = mount_entry_path(ASSERT_PTR(m));
int r;
(void) mkdir_p_label(p, 0755);
r = remount_sysfs(p);
if (r < 0 && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
/* Running with an unprivileged user (PrivateUsers=yes), or the kernel seems old. Falling
* back to bind mount the host's version so that we get all child mounts of it, too. */
log_debug_errno(r, "Failed to remount sysfs on %s, falling back to bind mount: %m", p);
(void) umount_recursive(p, 0);
r = mount_nofollow_verbose(LOG_DEBUG, "/sys", p, NULL, MS_BIND|MS_REC, NULL);
}
if (r < 0)
return log_debug_errno(r, "Failed to remount sysfs on %s: %m", p);
return 1;
return mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
}
static int mount_bind_sysfs(const MountEntry *m) {
@ -1091,11 +1068,34 @@ static int mount_bind_sysfs(const MountEntry *m) {
return 0;
/* Bind mount the host's version so that we get all child mounts of it, too. */
r = mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
if (r < 0)
return mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
}
static int mount_private_sysfs(const MountEntry *m) {
const char *entry_path = mount_entry_path(ASSERT_PTR(m));
int r, n;
(void) mkdir_p_label(entry_path, 0755);
n = umount_recursive(entry_path, 0);
r = mount_nofollow_verbose(LOG_DEBUG, "sysfs", entry_path, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
if (ERRNO_IS_NEG_PRIVILEGE(r)) {
/* When we do not have enough privileges to mount sysfs, fall back to use existing /sys. */
if (n > 0)
/* /sys or some of sub-mounts are umounted in the above. Refuse incomplete tree.
* Propagate the original error code returned by mount() in the above. */
return r;
return mount_bind_sysfs(m);
} else if (r < 0)
return r;
return 1;
/* We mounted a new instance now. Let's bind mount the children over now. */
(void) bind_mount_submounts("/sys", entry_path);
return 0;
}
static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
@ -1154,34 +1154,32 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
* means we really don't want to use it, since it would affect our host's /proc
* mount. Hence let's gracefully fallback to a classic, unrestricted version. */
r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
if (r == -EPERM) {
/* When we do not have enough privileges to mount /proc, fallback to use existing /proc. */
if (ERRNO_IS_NEG_PRIVILEGE(r)) {
/* When we do not have enough privileges to mount /proc, fall back to use existing /proc. */
if (n > 0)
/* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree.
* Propagate the original error code returned by mount() in the above. */
return -EPERM;
return r;
r = path_is_mount_point(entry_path, NULL, 0);
if (r < 0)
return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
if (r == 0) {
/* We lack permissions to mount a new instance of /proc, and it is not already
* mounted. But we can access the host's, so as a final fallback bind-mount it to
* the destination, as most likely we are inside a user manager in an unprivileged
* user namespace. */
r = mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL);
if (r < 0)
return -EPERM;
}
if (r > 0)
return 0;
/* We lack permissions to mount a new instance of /proc, and it is not already mounted. But
* we can access the host's, so as a final fallback bind-mount it to the destination, as most
* likely we are inside a user manager in an unprivileged user namespace. */
return mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL);
} else if (r < 0)
return r;
else
/* We mounted a new instance now. Let's bind mount the children over now. This matters for
* nspawn where a bunch of files are overmounted, in particular the boot id */
(void) bind_mount_submounts("/proc", entry_path);
return 1;
/* We mounted a new instance now. Let's bind mount the children over now. This matters for nspawn
* where a bunch of files are overmounted, in particular the boot id */
(void) bind_mount_submounts("/proc", entry_path);
return 0;
}
static int mount_tmpfs(const MountEntry *m) {
@ -1207,7 +1205,7 @@ static int mount_tmpfs(const MountEntry *m) {
if (r < 0)
return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path);
return 1;
return 0;
}
static int mount_run(const MountEntry *m) {
@ -1305,7 +1303,7 @@ static int mount_image(
if (r < 0)
return log_debug_errno(r, "Failed to mount image %s on %s: %m", mount_entry_source(m), mount_entry_path(m));
return 1;
return 0;
}
static int mount_overlay(const MountEntry *m) {
@ -1321,10 +1319,8 @@ static int mount_overlay(const MountEntry *m) {
r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options);
if (r == -ENOENT && m->ignore)
return 0;
if (r < 0)
return r;
return 1;
return r;
}
static int follow_symlink(

View File

@ -1235,7 +1235,6 @@ static void sub_mount_drop(SubMount *s, size_t n) {
static int get_sub_mounts(
const char *prefix,
bool clone_tree,
SubMount **ret_mounts,
size_t *ret_n_mounts) {
_cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
@ -1287,10 +1286,7 @@ static int get_sub_mounts(
continue;
}
if (clone_tree)
mount_fd = open_tree(AT_FDCWD, path, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_RECURSIVE);
else
mount_fd = open(path, O_CLOEXEC|O_PATH);
mount_fd = open(path, O_CLOEXEC|O_PATH);
if (mount_fd < 0) {
if (errno == ENOENT) /* The path may be hidden by another over-mount or already unmounted. */
continue;
@ -1319,66 +1315,6 @@ static int get_sub_mounts(
return 0;
}
static int move_sub_mounts(SubMount *mounts, size_t n) {
assert(mounts || n == 0);
for (size_t i = 0; i < n; i++) {
if (!mounts[i].path || mounts[i].mount_fd < 0)
continue;
(void) mkdir_p_label(mounts[i].path, 0755);
if (move_mount(mounts[i].mount_fd, "", AT_FDCWD, mounts[i].path, MOVE_MOUNT_F_EMPTY_PATH) < 0)
return log_debug_errno(errno, "Failed to move mount_fd to '%s': %m", mounts[i].path);
}
return 0;
}
int remount_and_move_sub_mounts(
const char *what,
const char *where,
const char *type,
unsigned long flags,
const char *options) {
SubMount *mounts = NULL;
size_t n = 0;
int r;
CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
assert(where);
/* This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs,
* otherwise properties of the network interfaces in the main network namespace are still accessible
* through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs
* are moved onto the new sysfs mount. */
r = path_is_mount_point(where, NULL, 0);
if (r < 0)
return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", where);
if (r == 0)
/* Shortcut. Simply mount the requested filesystem. */
return mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
/* Get the list of sub-mounts and duplicate them. */
r = get_sub_mounts(where, /* clone_tree= */ true, &mounts, &n);
if (r < 0)
return r;
/* Then, remount the mount and its sub-mounts. */
(void) umount_recursive(where, 0);
/* Remount the target filesystem. */
r = mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
if (r < 0)
return r;
/* Finally, move the all sub-mounts on the new target mount point. */
return move_sub_mounts(mounts, n);
}
int bind_mount_submounts(
const char *source,
const char *target) {
@ -1395,7 +1331,7 @@ int bind_mount_submounts(
CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
r = get_sub_mounts(source, /* clone_tree= */ false, &mounts, &n);
r = get_sub_mounts(source, &mounts, &n);
if (r < 0)
return r;
@ -1430,10 +1366,6 @@ int bind_mount_submounts(
return ret;
}
int remount_sysfs(const char *where) {
return remount_and_move_sub_mounts("sysfs", where, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
}
int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode) {
assert(st);
assert(dest);

View File

@ -124,14 +124,6 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping
int remount_idmap_fd(const char *p, int userns_fd);
int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping);
int remount_and_move_sub_mounts(
const char *what,
const char *where,
const char *type,
unsigned long flags,
const char *options);
int remount_sysfs(const char *where);
int bind_mount_submounts(
const char *source,
const char *target);

View File

@ -25,136 +25,6 @@
#include "tests.h"
#include "tmpfile-util.h"
TEST(remount_and_move_sub_mounts) {
int r;
if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
return (void) log_tests_skipped("not running privileged");
r = safe_fork("(remount-and-move-sub-mounts)",
FORK_RESET_SIGNALS |
FORK_CLOSE_ALL_FDS |
FORK_DEATHSIG |
FORK_WAIT |
FORK_REOPEN_LOG |
FORK_LOG |
FORK_NEW_MOUNTNS |
FORK_MOUNTNS_SLAVE,
NULL);
assert_se(r >= 0);
if (r == 0) {
_cleanup_free_ char *d = NULL, *fn = NULL;
assert_se(mkdtemp_malloc(NULL, &d) >= 0);
assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
assert_se(fn = path_join(d, "memo"));
assert_se(write_string_file(fn, d, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
assert_se(access(fn, F_OK) >= 0);
/* Create fs tree */
FOREACH_STRING(p, "sub1", "sub1/hoge", "sub1/foo", "sub2", "sub2/aaa", "sub2/bbb") {
_cleanup_free_ char *where = NULL, *filename = NULL;
assert_se(where = path_join(d, p));
assert_se(mkdir_p(where, 0755) >= 0);
assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
assert_se(filename = path_join(where, "memo"));
assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
assert_se(access(filename, F_OK) >= 0);
}
/* Hide sub1. */
FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo") {
_cleanup_free_ char *where = NULL, *filename = NULL;
assert_se(where = path_join(d, p));
assert_se(mkdir_p(where, 0755) >= 0);
assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
assert_se(filename = path_join(where, "memo"));
assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
assert_se(access(filename, F_OK) >= 0);
}
/* Remount the main fs. */
r = remount_and_move_sub_mounts("tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL);
if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
log_tests_skipped_errno(r, "The kernel seems too old: %m");
_exit(EXIT_SUCCESS);
}
/* Check the file in the main fs does not exist. */
assert_se(access(fn, F_OK) < 0 && errno == ENOENT);
/* Check the files in sub-mounts are kept. */
FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo", "sub2", "sub2/aaa", "sub2/bbb") {
_cleanup_free_ char *where = NULL, *filename = NULL, *content = NULL;
assert_se(where = path_join(d, p));
assert_se(filename = path_join(where, "memo"));
assert_se(read_full_file(filename, &content, NULL) >= 0);
assert_se(streq(content, where));
}
/* umount sub1, and check if the previously hidden sub-mounts are dropped. */
FOREACH_STRING(p, "sub1/hoge", "sub1/foo") {
_cleanup_free_ char *where = NULL;
assert_se(where = path_join(d, p));
assert_se(access(where, F_OK) < 0 && errno == ENOENT);
}
_exit(EXIT_SUCCESS);
}
}
TEST(remount_sysfs) {
int r;
if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
return (void) log_tests_skipped("not running privileged");
if (path_is_fs_type("/sys", SYSFS_MAGIC) <= 0)
return (void) log_tests_skipped("sysfs is not mounted on /sys");
if (access("/sys/class/net/dummy-test-mnt", F_OK) < 0)
return (void) log_tests_skipped_errno(errno, "The network interface dummy-test-mnt does not exit");
r = safe_fork("(remount-sysfs)",
FORK_RESET_SIGNALS |
FORK_CLOSE_ALL_FDS |
FORK_DEATHSIG |
FORK_WAIT |
FORK_REOPEN_LOG |
FORK_LOG |
FORK_NEW_MOUNTNS |
FORK_MOUNTNS_SLAVE,
NULL);
assert_se(r >= 0);
if (r == 0) {
assert_se(unshare(CLONE_NEWNET) >= 0);
/* Even unshare()ed, the interfaces in the main namespace can be accessed through sysfs. */
assert_se(access("/sys/class/net/lo", F_OK) >= 0);
assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) >= 0);
r = remount_sysfs("/sys");
if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
log_tests_skipped_errno(r, "The kernel seems too old: %m");
_exit(EXIT_SUCCESS);
}
/* After remounting sysfs, the interfaces in the main namespace cannot be accessed. */
assert_se(access("/sys/class/net/lo", F_OK) >= 0);
assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) < 0 && errno == ENOENT);
_exit(EXIT_SUCCESS);
}
}
TEST(mount_option_mangle) {
char *opts = NULL;
unsigned long f;
@ -636,17 +506,4 @@ TEST(bind_mount_submounts) {
assert_se(umount_recursive(b, 0) >= 0);
}
static int intro(void) {
/* Create a dummy network interface for testing remount_sysfs(). */
(void) system("ip link add dummy-test-mnt type dummy");
return 0;
}
static int outro(void) {
(void) system("ip link del dummy-test-mnt");
return 0;
}
DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, outro);
DEFINE_TEST_MAIN(LOG_DEBUG);