1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-11 09:18:07 +03:00

Merge pull request #24877 from brauner/namespace_utils

namespace-util: add an initial set of tweaks
This commit is contained in:
Luca Boccassi 2022-10-04 21:59:48 +01:00 committed by GitHub
commit e96180a88d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 96 additions and 72 deletions

View File

@ -7,6 +7,11 @@
#define CLONE_NEWCGROUP 0x02000000
#endif
/* 769071ac9f20b6a447410c7eaa55d1a5233ef40c (5.8) */
#ifndef CLONE_NEWTIME
#define CLONE_NEWTIME 0x00000080
#endif
/* Not exposed yet. Defined at include/linux/sched.h */
#ifndef PF_KTHREAD
#define PF_KTHREAD 0x00200000

View File

@ -9,12 +9,30 @@
#include "fileio.h"
#include "missing_fs.h"
#include "missing_magic.h"
#include "missing_sched.h"
#include "namespace-util.h"
#include "process-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "user-util.h"
const struct namespace_info namespace_info[] = {
[NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, },
[NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, },
[NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, },
/* So, the mount namespace flag is called CLONE_NEWNS for historical
* reasons. Let's expose it here under a more explanatory name: "mnt".
* This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
[NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, },
[NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, },
[NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, },
[NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, },
[NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, },
{ /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ },
};
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
int rfd = -1;
@ -24,7 +42,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
if (mntns_fd) {
const char *mntns;
mntns = procfs_file_alloca(pid, "ns/mnt");
mntns = pid_namespace_path(pid, NAMESPACE_MOUNT);
mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (mntnsfd < 0)
return -errno;
@ -33,7 +51,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
if (pidns_fd) {
const char *pidns;
pidns = procfs_file_alloca(pid, "ns/pid");
pidns = pid_namespace_path(pid, NAMESPACE_PID);
pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (pidnsfd < 0)
return -errno;
@ -42,7 +60,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
if (netns_fd) {
const char *netns;
netns = procfs_file_alloca(pid, "ns/net");
netns = pid_namespace_path(pid, NAMESPACE_NET);
netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (netnsfd < 0)
return -errno;
@ -51,7 +69,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
if (userns_fd) {
const char *userns;
userns = procfs_file_alloca(pid, "ns/user");
userns = pid_namespace_path(pid, NAMESPACE_USER);
usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (usernsfd < 0 && errno != ENOENT)
return -errno;
@ -218,3 +236,27 @@ int userns_acquire(const char *uid_map, const char *gid_map) {
return TAKE_FD(userns_fd);
}
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
const char *ns_path;
struct stat ns_st1, ns_st2;
if (pid1 == 0)
pid1 = getpid_cached();
if (pid2 == 0)
pid2 = getpid_cached();
if (pid1 == pid2)
return 1;
ns_path = pid_namespace_path(pid1, type);
if (stat(ns_path, &ns_st1) < 0)
return -errno;
ns_path = pid_namespace_path(pid2, type);
if (stat(ns_path, &ns_st2) < 0)
return -errno;
return stat_inode_same(&ns_st1, &ns_st2);
}

View File

@ -3,6 +3,25 @@
#include <sys/types.h>
typedef enum NamespaceType {
NAMESPACE_CGROUP,
NAMESPACE_IPC,
NAMESPACE_NET,
NAMESPACE_MOUNT,
NAMESPACE_PID,
NAMESPACE_USER,
NAMESPACE_UTS,
NAMESPACE_TIME,
_NAMESPACE_TYPE_MAX,
_NAMESPACE_TYPE_INVALID = -EINVAL,
} NamespaceType;
extern const struct namespace_info {
const char *proc_name;
const char *proc_path;
unsigned int clone_flag;
} namespace_info[_NAMESPACE_TYPE_MAX + 1];
int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd);
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd);
@ -26,3 +45,4 @@ static inline bool userns_shift_range_valid(uid_t shift, uid_t range) {
}
int userns_acquire(const char *uid_map, const char *gid_map);
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type);

View File

@ -2952,6 +2952,7 @@ static const char* const namespace_type_table[] = {
[NAMESPACE_USER] = "user",
[NAMESPACE_PID] = "pid",
[NAMESPACE_NET] = "net",
[NAMESPACE_TIME] = "time",
};
DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType);

View File

@ -15,6 +15,7 @@ typedef struct MountImage MountImage;
#include "dissect-image.h"
#include "fs-util.h"
#include "macro.h"
#include "namespace-util.h"
#include "string-util.h"
typedef enum ProtectHome {
@ -26,18 +27,6 @@ typedef enum ProtectHome {
_PROTECT_HOME_INVALID = -EINVAL,
} ProtectHome;
typedef enum NamespaceType {
NAMESPACE_MOUNT,
NAMESPACE_CGROUP,
NAMESPACE_UTS,
NAMESPACE_IPC,
NAMESPACE_USER,
NAMESPACE_PID,
NAMESPACE_NET,
_NAMESPACE_TYPE_MAX,
_NAMESPACE_TYPE_INVALID = -EINVAL,
} NamespaceType;
typedef enum ProtectSystem {
PROTECT_SYSTEM_NO,
PROTECT_SYSTEM_YES,

View File

@ -5387,8 +5387,6 @@ static int initialize_rlimits(void) {
}
static int cant_be_in_netns(void) {
char udev_path[STRLEN("/proc//ns/net") + DECIMAL_STR_MAX(pid_t)];
_cleanup_free_ char *udev_ns = NULL, *our_ns = NULL;
_cleanup_close_ int fd = -1;
struct ucred ucred;
int r;
@ -5417,16 +5415,10 @@ static int cant_be_in_netns(void) {
if (r < 0)
return log_error_errno(r, "Failed to determine peer of udev control socket: %m");
xsprintf(udev_path, "/proc/" PID_FMT "/ns/net", ucred.pid);
r = readlink_malloc(udev_path, &udev_ns);
r = in_same_namespace(ucred.pid, 0, NAMESPACE_NET);
if (r < 0)
return log_error_errno(r, "Failed to read network namespace of udev: %m");
r = readlink_malloc("/proc/self/ns/net", &our_ns);
if (r < 0)
return log_error_errno(r, "Failed to read our own network namespace: %m");
if (!streq(our_ns, udev_ns))
return log_error_errno(r, "Failed to determine network namespace of udev: %m");
if (r == 0)
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
"Sorry, but --image= is only supported in the main network namespace, since we need access to udev/AF_NETLINK.");
return 0;

View File

@ -787,13 +787,13 @@ static int mount_in_namespace(
bool is_image) {
_cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
_cleanup_close_ int self_mntns_fd = -1, mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1;
_cleanup_close_ int mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1;
char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
bool mount_slave_created = false, mount_slave_mounted = false,
mount_tmp_created = false, mount_tmp_mounted = false,
mount_outside_created = false, mount_outside_mounted = false;
_cleanup_free_ char *chased_src_path = NULL;
struct stat st, self_mntns_st;
struct stat st;
pid_t child;
int r;
@ -808,18 +808,11 @@ static int mount_in_namespace(
if (r < 0)
return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m");
if (fstat(mntns_fd, &st) < 0)
return log_debug_errno(errno, "Failed to fstat mount namespace FD of target process: %m");
r = namespace_open(0, NULL, &self_mntns_fd, NULL, NULL, NULL);
r = in_same_namespace(target, 0, NAMESPACE_MOUNT);
if (r < 0)
return log_debug_errno(r, "Failed to retrieve FDs of systemd's namespace: %m");
if (fstat(self_mntns_fd, &self_mntns_st) < 0)
return log_debug_errno(errno, "Failed to fstat mount namespace FD of systemd: %m");
return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m");
/* We can't add new mounts at runtime if the process wasn't started in a namespace */
if (stat_inode_same(&st, &self_mntns_st))
if (r > 0)
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace");
/* One day, when bind mounting /proc/self/fd/n works across namespace boundaries we should rework

View File

@ -4,22 +4,10 @@
#include "alloc-util.h"
#include "extract-word.h"
#include "namespace-util.h"
#include "nsflags.h"
#include "string-util.h"
const struct namespace_flag_map namespace_flag_map[] = {
{ CLONE_NEWCGROUP, "cgroup" },
{ CLONE_NEWIPC, "ipc" },
{ CLONE_NEWNET, "net" },
/* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more
* explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
{ CLONE_NEWNS, "mnt" },
{ CLONE_NEWPID, "pid" },
{ CLONE_NEWUSER, "user" },
{ CLONE_NEWUTS, "uts" },
{}
};
int namespace_flags_from_string(const char *name, unsigned long *ret) {
unsigned long flags = 0;
int r;
@ -37,9 +25,9 @@ int namespace_flags_from_string(const char *name, unsigned long *ret) {
if (r == 0)
break;
for (i = 0; namespace_flag_map[i].name; i++)
if (streq(word, namespace_flag_map[i].name)) {
f = namespace_flag_map[i].flag;
for (i = 0; namespace_info[i].proc_name; i++)
if (streq(word, namespace_info[i].proc_name)) {
f = namespace_info[i].clone_flag;
break;
}
@ -57,11 +45,11 @@ int namespace_flags_to_string(unsigned long flags, char **ret) {
_cleanup_free_ char *s = NULL;
unsigned i;
for (i = 0; namespace_flag_map[i].name; i++) {
if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag)
for (i = 0; namespace_info[i].proc_name; i++) {
if ((flags & namespace_info[i].clone_flag) != namespace_info[i].clone_flag)
continue;
if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name))
if (!strextend_with_separator(&s, " ", namespace_info[i].proc_name))
return -ENOMEM;
}
@ -71,9 +59,9 @@ int namespace_flags_to_string(unsigned long flags, char **ret) {
}
const char *namespace_single_flag_to_string(unsigned long flag) {
for (unsigned i = 0; namespace_flag_map[i].name; i++)
if (namespace_flag_map[i].flag == flag)
return namespace_flag_map[i].name;
for (unsigned i = 0; namespace_info[i].proc_name; i++)
if (namespace_info[i].clone_flag == flag)
return namespace_info[i].proc_name;
return NULL;
}

View File

@ -21,10 +21,3 @@
int namespace_flags_from_string(const char *name, unsigned long *ret);
int namespace_flags_to_string(unsigned long flags, char **ret);
const char *namespace_single_flag_to_string(unsigned long flag);
struct namespace_flag_map {
unsigned long flag;
const char *name;
};
extern const struct namespace_flag_map namespace_flag_map[];

View File

@ -18,6 +18,7 @@
#include "env-util.h"
#include "errno-list.h"
#include "macro.h"
#include "namespace-util.h"
#include "nsflags.h"
#include "nulstr-util.h"
#include "process-util.h"
@ -1289,16 +1290,16 @@ int seccomp_restrict_namespaces(unsigned long retain) {
continue;
}
for (unsigned i = 0; namespace_flag_map[i].name; i++) {
for (unsigned i = 0; namespace_info[i].proc_name; i++) {
unsigned long f;
f = namespace_flag_map[i].flag;
f = namespace_info[i].clone_flag;
if (FLAGS_SET(retain, f)) {
log_debug("Permitting %s.", namespace_flag_map[i].name);
log_debug("Permitting %s.", namespace_info[i].proc_name);
continue;
}
log_debug("Blocking %s.", namespace_flag_map[i].name);
log_debug("Blocking %s.", namespace_info[i].proc_name);
r = seccomp_rule_add_exact(
seccomp,