From 9014fd82f7ca088b5548d27138c7ffd78aa317e4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Sep 2022 14:59:38 +0200 Subject: [PATCH 1/6] missing_sched: add CLONE_NEWTIME --- src/basic/missing_sched.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/basic/missing_sched.h b/src/basic/missing_sched.h index 6a889f66d95..bcd5b771206 100644 --- a/src/basic/missing_sched.h +++ b/src/basic/missing_sched.h @@ -7,6 +7,11 @@ #define CLONE_NEWCGROUP 0x02000000 #endif +/* 769071ac9f20b6a447410c7eaa55d1a5233ef40c (5.8) */ +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + /* Not exposed yet. Defined at include/linux/sched.h */ #ifndef PF_KTHREAD #define PF_KTHREAD 0x00200000 From c3b9c418c0e688892284aa83fefaea313fdabccc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Sep 2022 14:21:01 +0200 Subject: [PATCH 2/6] namespace-util: add namespace_info --- src/basic/namespace-util.c | 26 ++++++++++++++++++++++---- src/basic/namespace-util.h | 19 +++++++++++++++++++ src/core/namespace.c | 1 + src/core/namespace.h | 13 +------------ 4 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 4da9cb4cae7..b330e2a11d1 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -9,12 +9,30 @@ #include "fileio.h" #include "missing_fs.h" #include "missing_magic.h" +#include "missing_sched.h" #include "namespace-util.h" #include "process-util.h" #include "stat-util.h" #include "stdio-util.h" #include "user-util.h" +const struct namespace_info namespace_info[] = { + [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, }, + [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, }, + [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, }, + /* So, the mount namespace flag is called CLONE_NEWNS for historical + * reasons. Let's expose it here under a more explanatory name: "mnt". + * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ + [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, }, + [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, }, + [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, }, + [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, }, + [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, }, + { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, +}; + +#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path) + int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; int rfd = -1; @@ -24,7 +42,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int * if (mntns_fd) { const char *mntns; - mntns = procfs_file_alloca(pid, "ns/mnt"); + mntns = pid_namespace_path(pid, NAMESPACE_MOUNT); mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (mntnsfd < 0) return -errno; @@ -33,7 +51,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int * if (pidns_fd) { const char *pidns; - pidns = procfs_file_alloca(pid, "ns/pid"); + pidns = pid_namespace_path(pid, NAMESPACE_PID); pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (pidnsfd < 0) return -errno; @@ -42,7 +60,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int * if (netns_fd) { const char *netns; - netns = procfs_file_alloca(pid, "ns/net"); + netns = pid_namespace_path(pid, NAMESPACE_NET); netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (netnsfd < 0) return -errno; @@ -51,7 +69,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int * if (userns_fd) { const char *userns; - userns = procfs_file_alloca(pid, "ns/user"); + userns = pid_namespace_path(pid, NAMESPACE_USER); usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (usernsfd < 0 && errno != ENOENT) return -errno; diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 24dce0939ec..5c1912985d8 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -3,6 +3,25 @@ #include +typedef enum NamespaceType { + NAMESPACE_CGROUP, + NAMESPACE_IPC, + NAMESPACE_NET, + NAMESPACE_MOUNT, + NAMESPACE_PID, + NAMESPACE_USER, + NAMESPACE_UTS, + NAMESPACE_TIME, + _NAMESPACE_TYPE_MAX, + _NAMESPACE_TYPE_INVALID = -EINVAL, +} NamespaceType; + +extern const struct namespace_info { + const char *proc_name; + const char *proc_path; + unsigned int clone_flag; +} namespace_info[_NAMESPACE_TYPE_MAX + 1]; + int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd); int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd); diff --git a/src/core/namespace.c b/src/core/namespace.c index 1911c413917..b66340437ae 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2952,6 +2952,7 @@ static const char* const namespace_type_table[] = { [NAMESPACE_USER] = "user", [NAMESPACE_PID] = "pid", [NAMESPACE_NET] = "net", + [NAMESPACE_TIME] = "time", }; DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType); diff --git a/src/core/namespace.h b/src/core/namespace.h index 3ef41d2c628..2ba59701597 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -15,6 +15,7 @@ typedef struct MountImage MountImage; #include "dissect-image.h" #include "fs-util.h" #include "macro.h" +#include "namespace-util.h" #include "string-util.h" typedef enum ProtectHome { @@ -26,18 +27,6 @@ typedef enum ProtectHome { _PROTECT_HOME_INVALID = -EINVAL, } ProtectHome; -typedef enum NamespaceType { - NAMESPACE_MOUNT, - NAMESPACE_CGROUP, - NAMESPACE_UTS, - NAMESPACE_IPC, - NAMESPACE_USER, - NAMESPACE_PID, - NAMESPACE_NET, - _NAMESPACE_TYPE_MAX, - _NAMESPACE_TYPE_INVALID = -EINVAL, -} NamespaceType; - typedef enum ProtectSystem { PROTECT_SYSTEM_NO, PROTECT_SYSTEM_YES, From 241b15779be7621db5ea20a9c5611c6c8082afd9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Sep 2022 15:02:18 +0200 Subject: [PATCH 3/6] nsflags: replace namespace_flag_map with general namespace_info introduced earlier --- src/shared/nsflags.c | 32 ++++++++++---------------------- src/shared/nsflags.h | 7 ------- src/shared/seccomp-util.c | 9 +++++---- 3 files changed, 15 insertions(+), 33 deletions(-) diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c index b5bba809151..d4cee069dd6 100644 --- a/src/shared/nsflags.c +++ b/src/shared/nsflags.c @@ -4,22 +4,10 @@ #include "alloc-util.h" #include "extract-word.h" +#include "namespace-util.h" #include "nsflags.h" #include "string-util.h" -const struct namespace_flag_map namespace_flag_map[] = { - { CLONE_NEWCGROUP, "cgroup" }, - { CLONE_NEWIPC, "ipc" }, - { CLONE_NEWNET, "net" }, - /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more - * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ - { CLONE_NEWNS, "mnt" }, - { CLONE_NEWPID, "pid" }, - { CLONE_NEWUSER, "user" }, - { CLONE_NEWUTS, "uts" }, - {} -}; - int namespace_flags_from_string(const char *name, unsigned long *ret) { unsigned long flags = 0; int r; @@ -37,9 +25,9 @@ int namespace_flags_from_string(const char *name, unsigned long *ret) { if (r == 0) break; - for (i = 0; namespace_flag_map[i].name; i++) - if (streq(word, namespace_flag_map[i].name)) { - f = namespace_flag_map[i].flag; + for (i = 0; namespace_info[i].proc_name; i++) + if (streq(word, namespace_info[i].proc_name)) { + f = namespace_info[i].clone_flag; break; } @@ -57,11 +45,11 @@ int namespace_flags_to_string(unsigned long flags, char **ret) { _cleanup_free_ char *s = NULL; unsigned i; - for (i = 0; namespace_flag_map[i].name; i++) { - if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag) + for (i = 0; namespace_info[i].proc_name; i++) { + if ((flags & namespace_info[i].clone_flag) != namespace_info[i].clone_flag) continue; - if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name)) + if (!strextend_with_separator(&s, " ", namespace_info[i].proc_name)) return -ENOMEM; } @@ -71,9 +59,9 @@ int namespace_flags_to_string(unsigned long flags, char **ret) { } const char *namespace_single_flag_to_string(unsigned long flag) { - for (unsigned i = 0; namespace_flag_map[i].name; i++) - if (namespace_flag_map[i].flag == flag) - return namespace_flag_map[i].name; + for (unsigned i = 0; namespace_info[i].proc_name; i++) + if (namespace_info[i].clone_flag == flag) + return namespace_info[i].proc_name; return NULL; } diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h index a35332dd970..b59740c0095 100644 --- a/src/shared/nsflags.h +++ b/src/shared/nsflags.h @@ -21,10 +21,3 @@ int namespace_flags_from_string(const char *name, unsigned long *ret); int namespace_flags_to_string(unsigned long flags, char **ret); const char *namespace_single_flag_to_string(unsigned long flag); - -struct namespace_flag_map { - unsigned long flag; - const char *name; -}; - -extern const struct namespace_flag_map namespace_flag_map[]; diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index cd0915e2b26..52ee315dda7 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -18,6 +18,7 @@ #include "env-util.h" #include "errno-list.h" #include "macro.h" +#include "namespace-util.h" #include "nsflags.h" #include "nulstr-util.h" #include "process-util.h" @@ -1289,16 +1290,16 @@ int seccomp_restrict_namespaces(unsigned long retain) { continue; } - for (unsigned i = 0; namespace_flag_map[i].name; i++) { + for (unsigned i = 0; namespace_info[i].proc_name; i++) { unsigned long f; - f = namespace_flag_map[i].flag; + f = namespace_info[i].clone_flag; if (FLAGS_SET(retain, f)) { - log_debug("Permitting %s.", namespace_flag_map[i].name); + log_debug("Permitting %s.", namespace_info[i].proc_name); continue; } - log_debug("Blocking %s.", namespace_flag_map[i].name); + log_debug("Blocking %s.", namespace_info[i].proc_name); r = seccomp_rule_add_exact( seccomp, From 2fe299a320757bf5a3e0362a00e570d3bf713eab Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Sep 2022 15:02:52 +0200 Subject: [PATCH 4/6] namespace-util: add in_same_namespace() Add a helper for the canonical way to determine whether two namespaces are identical. --- src/basic/namespace-util.c | 24 ++++++++++++++++++++++++ src/basic/namespace-util.h | 1 + 2 files changed, 25 insertions(+) diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index b330e2a11d1..a87a875943c 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -236,3 +236,27 @@ int userns_acquire(const char *uid_map, const char *gid_map) { return TAKE_FD(userns_fd); } + +int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) { + const char *ns_path; + struct stat ns_st1, ns_st2; + + if (pid1 == 0) + pid1 = getpid_cached(); + + if (pid2 == 0) + pid2 = getpid_cached(); + + if (pid1 == pid2) + return 1; + + ns_path = pid_namespace_path(pid1, type); + if (stat(ns_path, &ns_st1) < 0) + return -errno; + + ns_path = pid_namespace_path(pid2, type); + if (stat(ns_path, &ns_st2) < 0) + return -errno; + + return stat_inode_same(&ns_st1, &ns_st2); +} diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 5c1912985d8..be5b2281d3c 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -45,3 +45,4 @@ static inline bool userns_shift_range_valid(uid_t shift, uid_t range) { } int userns_acquire(const char *uid_map, const char *gid_map); +int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type); From f7a2dc3dd507ffa04b2c337c163f0a6e523eb765 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Sep 2022 15:05:02 +0200 Subject: [PATCH 5/6] nspawn: use in_same_namespace() helper --- src/nspawn/nspawn.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 056d4f1bc5d..01a67b5553a 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -5387,8 +5387,6 @@ static int initialize_rlimits(void) { } static int cant_be_in_netns(void) { - char udev_path[STRLEN("/proc//ns/net") + DECIMAL_STR_MAX(pid_t)]; - _cleanup_free_ char *udev_ns = NULL, *our_ns = NULL; _cleanup_close_ int fd = -1; struct ucred ucred; int r; @@ -5417,16 +5415,10 @@ static int cant_be_in_netns(void) { if (r < 0) return log_error_errno(r, "Failed to determine peer of udev control socket: %m"); - xsprintf(udev_path, "/proc/" PID_FMT "/ns/net", ucred.pid); - r = readlink_malloc(udev_path, &udev_ns); + r = in_same_namespace(ucred.pid, 0, NAMESPACE_NET); if (r < 0) - return log_error_errno(r, "Failed to read network namespace of udev: %m"); - - r = readlink_malloc("/proc/self/ns/net", &our_ns); - if (r < 0) - return log_error_errno(r, "Failed to read our own network namespace: %m"); - - if (!streq(our_ns, udev_ns)) + return log_error_errno(r, "Failed to determine network namespace of udev: %m"); + if (r == 0) return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Sorry, but --image= is only supported in the main network namespace, since we need access to udev/AF_NETLINK."); return 0; From 4b00e738d57b607643ba1ebafbfae1e217785b32 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 1 Oct 2022 12:03:25 +0200 Subject: [PATCH 6/6] mount-util: use in_same_namespace() Signed-off-by: Christian Brauner (Microsoft) --- src/shared/mount-util.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index d6466341384..c504ce8feef 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -787,13 +787,13 @@ static int mount_in_namespace( bool is_image) { _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; - _cleanup_close_ int self_mntns_fd = -1, mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1; + _cleanup_close_ int mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1; char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; bool mount_slave_created = false, mount_slave_mounted = false, mount_tmp_created = false, mount_tmp_mounted = false, mount_outside_created = false, mount_outside_mounted = false; _cleanup_free_ char *chased_src_path = NULL; - struct stat st, self_mntns_st; + struct stat st; pid_t child; int r; @@ -808,18 +808,11 @@ static int mount_in_namespace( if (r < 0) return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m"); - if (fstat(mntns_fd, &st) < 0) - return log_debug_errno(errno, "Failed to fstat mount namespace FD of target process: %m"); - - r = namespace_open(0, NULL, &self_mntns_fd, NULL, NULL, NULL); + r = in_same_namespace(target, 0, NAMESPACE_MOUNT); if (r < 0) - return log_debug_errno(r, "Failed to retrieve FDs of systemd's namespace: %m"); - - if (fstat(self_mntns_fd, &self_mntns_st) < 0) - return log_debug_errno(errno, "Failed to fstat mount namespace FD of systemd: %m"); - + return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m"); /* We can't add new mounts at runtime if the process wasn't started in a namespace */ - if (stat_inode_same(&st, &self_mntns_st)) + if (r > 0) return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace"); /* One day, when bind mounting /proc/self/fd/n works across namespace boundaries we should rework