mirror of
https://github.com/systemd/systemd.git
synced 2024-12-22 17:35:35 +03:00
Merge pull request #24877 from brauner/namespace_utils
namespace-util: add an initial set of tweaks
This commit is contained in:
commit
e96180a88d
@ -7,6 +7,11 @@
|
||||
#define CLONE_NEWCGROUP 0x02000000
|
||||
#endif
|
||||
|
||||
/* 769071ac9f20b6a447410c7eaa55d1a5233ef40c (5.8) */
|
||||
#ifndef CLONE_NEWTIME
|
||||
#define CLONE_NEWTIME 0x00000080
|
||||
#endif
|
||||
|
||||
/* Not exposed yet. Defined at include/linux/sched.h */
|
||||
#ifndef PF_KTHREAD
|
||||
#define PF_KTHREAD 0x00200000
|
||||
|
@ -9,12 +9,30 @@
|
||||
#include "fileio.h"
|
||||
#include "missing_fs.h"
|
||||
#include "missing_magic.h"
|
||||
#include "missing_sched.h"
|
||||
#include "namespace-util.h"
|
||||
#include "process-util.h"
|
||||
#include "stat-util.h"
|
||||
#include "stdio-util.h"
|
||||
#include "user-util.h"
|
||||
|
||||
const struct namespace_info namespace_info[] = {
|
||||
[NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, },
|
||||
[NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, },
|
||||
[NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, },
|
||||
/* So, the mount namespace flag is called CLONE_NEWNS for historical
|
||||
* reasons. Let's expose it here under a more explanatory name: "mnt".
|
||||
* This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
|
||||
[NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, },
|
||||
[NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, },
|
||||
[NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, },
|
||||
[NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, },
|
||||
[NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, },
|
||||
{ /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ },
|
||||
};
|
||||
|
||||
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
|
||||
|
||||
int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
|
||||
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
|
||||
int rfd = -1;
|
||||
@ -24,7 +42,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
|
||||
if (mntns_fd) {
|
||||
const char *mntns;
|
||||
|
||||
mntns = procfs_file_alloca(pid, "ns/mnt");
|
||||
mntns = pid_namespace_path(pid, NAMESPACE_MOUNT);
|
||||
mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
|
||||
if (mntnsfd < 0)
|
||||
return -errno;
|
||||
@ -33,7 +51,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
|
||||
if (pidns_fd) {
|
||||
const char *pidns;
|
||||
|
||||
pidns = procfs_file_alloca(pid, "ns/pid");
|
||||
pidns = pid_namespace_path(pid, NAMESPACE_PID);
|
||||
pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
|
||||
if (pidnsfd < 0)
|
||||
return -errno;
|
||||
@ -42,7 +60,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
|
||||
if (netns_fd) {
|
||||
const char *netns;
|
||||
|
||||
netns = procfs_file_alloca(pid, "ns/net");
|
||||
netns = pid_namespace_path(pid, NAMESPACE_NET);
|
||||
netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
|
||||
if (netnsfd < 0)
|
||||
return -errno;
|
||||
@ -51,7 +69,7 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
|
||||
if (userns_fd) {
|
||||
const char *userns;
|
||||
|
||||
userns = procfs_file_alloca(pid, "ns/user");
|
||||
userns = pid_namespace_path(pid, NAMESPACE_USER);
|
||||
usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
|
||||
if (usernsfd < 0 && errno != ENOENT)
|
||||
return -errno;
|
||||
@ -218,3 +236,27 @@ int userns_acquire(const char *uid_map, const char *gid_map) {
|
||||
return TAKE_FD(userns_fd);
|
||||
|
||||
}
|
||||
|
||||
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
|
||||
const char *ns_path;
|
||||
struct stat ns_st1, ns_st2;
|
||||
|
||||
if (pid1 == 0)
|
||||
pid1 = getpid_cached();
|
||||
|
||||
if (pid2 == 0)
|
||||
pid2 = getpid_cached();
|
||||
|
||||
if (pid1 == pid2)
|
||||
return 1;
|
||||
|
||||
ns_path = pid_namespace_path(pid1, type);
|
||||
if (stat(ns_path, &ns_st1) < 0)
|
||||
return -errno;
|
||||
|
||||
ns_path = pid_namespace_path(pid2, type);
|
||||
if (stat(ns_path, &ns_st2) < 0)
|
||||
return -errno;
|
||||
|
||||
return stat_inode_same(&ns_st1, &ns_st2);
|
||||
}
|
||||
|
@ -3,6 +3,25 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
typedef enum NamespaceType {
|
||||
NAMESPACE_CGROUP,
|
||||
NAMESPACE_IPC,
|
||||
NAMESPACE_NET,
|
||||
NAMESPACE_MOUNT,
|
||||
NAMESPACE_PID,
|
||||
NAMESPACE_USER,
|
||||
NAMESPACE_UTS,
|
||||
NAMESPACE_TIME,
|
||||
_NAMESPACE_TYPE_MAX,
|
||||
_NAMESPACE_TYPE_INVALID = -EINVAL,
|
||||
} NamespaceType;
|
||||
|
||||
extern const struct namespace_info {
|
||||
const char *proc_name;
|
||||
const char *proc_path;
|
||||
unsigned int clone_flag;
|
||||
} namespace_info[_NAMESPACE_TYPE_MAX + 1];
|
||||
|
||||
int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd);
|
||||
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd);
|
||||
|
||||
@ -26,3 +45,4 @@ static inline bool userns_shift_range_valid(uid_t shift, uid_t range) {
|
||||
}
|
||||
|
||||
int userns_acquire(const char *uid_map, const char *gid_map);
|
||||
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type);
|
||||
|
@ -2952,6 +2952,7 @@ static const char* const namespace_type_table[] = {
|
||||
[NAMESPACE_USER] = "user",
|
||||
[NAMESPACE_PID] = "pid",
|
||||
[NAMESPACE_NET] = "net",
|
||||
[NAMESPACE_TIME] = "time",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType);
|
||||
|
@ -15,6 +15,7 @@ typedef struct MountImage MountImage;
|
||||
#include "dissect-image.h"
|
||||
#include "fs-util.h"
|
||||
#include "macro.h"
|
||||
#include "namespace-util.h"
|
||||
#include "string-util.h"
|
||||
|
||||
typedef enum ProtectHome {
|
||||
@ -26,18 +27,6 @@ typedef enum ProtectHome {
|
||||
_PROTECT_HOME_INVALID = -EINVAL,
|
||||
} ProtectHome;
|
||||
|
||||
typedef enum NamespaceType {
|
||||
NAMESPACE_MOUNT,
|
||||
NAMESPACE_CGROUP,
|
||||
NAMESPACE_UTS,
|
||||
NAMESPACE_IPC,
|
||||
NAMESPACE_USER,
|
||||
NAMESPACE_PID,
|
||||
NAMESPACE_NET,
|
||||
_NAMESPACE_TYPE_MAX,
|
||||
_NAMESPACE_TYPE_INVALID = -EINVAL,
|
||||
} NamespaceType;
|
||||
|
||||
typedef enum ProtectSystem {
|
||||
PROTECT_SYSTEM_NO,
|
||||
PROTECT_SYSTEM_YES,
|
||||
|
@ -5387,8 +5387,6 @@ static int initialize_rlimits(void) {
|
||||
}
|
||||
|
||||
static int cant_be_in_netns(void) {
|
||||
char udev_path[STRLEN("/proc//ns/net") + DECIMAL_STR_MAX(pid_t)];
|
||||
_cleanup_free_ char *udev_ns = NULL, *our_ns = NULL;
|
||||
_cleanup_close_ int fd = -1;
|
||||
struct ucred ucred;
|
||||
int r;
|
||||
@ -5417,16 +5415,10 @@ static int cant_be_in_netns(void) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine peer of udev control socket: %m");
|
||||
|
||||
xsprintf(udev_path, "/proc/" PID_FMT "/ns/net", ucred.pid);
|
||||
r = readlink_malloc(udev_path, &udev_ns);
|
||||
r = in_same_namespace(ucred.pid, 0, NAMESPACE_NET);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to read network namespace of udev: %m");
|
||||
|
||||
r = readlink_malloc("/proc/self/ns/net", &our_ns);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to read our own network namespace: %m");
|
||||
|
||||
if (!streq(our_ns, udev_ns))
|
||||
return log_error_errno(r, "Failed to determine network namespace of udev: %m");
|
||||
if (r == 0)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"Sorry, but --image= is only supported in the main network namespace, since we need access to udev/AF_NETLINK.");
|
||||
return 0;
|
||||
|
@ -787,13 +787,13 @@ static int mount_in_namespace(
|
||||
bool is_image) {
|
||||
|
||||
_cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
|
||||
_cleanup_close_ int self_mntns_fd = -1, mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1;
|
||||
_cleanup_close_ int mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1;
|
||||
char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
|
||||
bool mount_slave_created = false, mount_slave_mounted = false,
|
||||
mount_tmp_created = false, mount_tmp_mounted = false,
|
||||
mount_outside_created = false, mount_outside_mounted = false;
|
||||
_cleanup_free_ char *chased_src_path = NULL;
|
||||
struct stat st, self_mntns_st;
|
||||
struct stat st;
|
||||
pid_t child;
|
||||
int r;
|
||||
|
||||
@ -808,18 +808,11 @@ static int mount_in_namespace(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m");
|
||||
|
||||
if (fstat(mntns_fd, &st) < 0)
|
||||
return log_debug_errno(errno, "Failed to fstat mount namespace FD of target process: %m");
|
||||
|
||||
r = namespace_open(0, NULL, &self_mntns_fd, NULL, NULL, NULL);
|
||||
r = in_same_namespace(target, 0, NAMESPACE_MOUNT);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to retrieve FDs of systemd's namespace: %m");
|
||||
|
||||
if (fstat(self_mntns_fd, &self_mntns_st) < 0)
|
||||
return log_debug_errno(errno, "Failed to fstat mount namespace FD of systemd: %m");
|
||||
|
||||
return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m");
|
||||
/* We can't add new mounts at runtime if the process wasn't started in a namespace */
|
||||
if (stat_inode_same(&st, &self_mntns_st))
|
||||
if (r > 0)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace");
|
||||
|
||||
/* One day, when bind mounting /proc/self/fd/n works across namespace boundaries we should rework
|
||||
|
@ -4,22 +4,10 @@
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "extract-word.h"
|
||||
#include "namespace-util.h"
|
||||
#include "nsflags.h"
|
||||
#include "string-util.h"
|
||||
|
||||
const struct namespace_flag_map namespace_flag_map[] = {
|
||||
{ CLONE_NEWCGROUP, "cgroup" },
|
||||
{ CLONE_NEWIPC, "ipc" },
|
||||
{ CLONE_NEWNET, "net" },
|
||||
/* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more
|
||||
* explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
|
||||
{ CLONE_NEWNS, "mnt" },
|
||||
{ CLONE_NEWPID, "pid" },
|
||||
{ CLONE_NEWUSER, "user" },
|
||||
{ CLONE_NEWUTS, "uts" },
|
||||
{}
|
||||
};
|
||||
|
||||
int namespace_flags_from_string(const char *name, unsigned long *ret) {
|
||||
unsigned long flags = 0;
|
||||
int r;
|
||||
@ -37,9 +25,9 @@ int namespace_flags_from_string(const char *name, unsigned long *ret) {
|
||||
if (r == 0)
|
||||
break;
|
||||
|
||||
for (i = 0; namespace_flag_map[i].name; i++)
|
||||
if (streq(word, namespace_flag_map[i].name)) {
|
||||
f = namespace_flag_map[i].flag;
|
||||
for (i = 0; namespace_info[i].proc_name; i++)
|
||||
if (streq(word, namespace_info[i].proc_name)) {
|
||||
f = namespace_info[i].clone_flag;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -57,11 +45,11 @@ int namespace_flags_to_string(unsigned long flags, char **ret) {
|
||||
_cleanup_free_ char *s = NULL;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; namespace_flag_map[i].name; i++) {
|
||||
if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag)
|
||||
for (i = 0; namespace_info[i].proc_name; i++) {
|
||||
if ((flags & namespace_info[i].clone_flag) != namespace_info[i].clone_flag)
|
||||
continue;
|
||||
|
||||
if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name))
|
||||
if (!strextend_with_separator(&s, " ", namespace_info[i].proc_name))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -71,9 +59,9 @@ int namespace_flags_to_string(unsigned long flags, char **ret) {
|
||||
}
|
||||
|
||||
const char *namespace_single_flag_to_string(unsigned long flag) {
|
||||
for (unsigned i = 0; namespace_flag_map[i].name; i++)
|
||||
if (namespace_flag_map[i].flag == flag)
|
||||
return namespace_flag_map[i].name;
|
||||
for (unsigned i = 0; namespace_info[i].proc_name; i++)
|
||||
if (namespace_info[i].clone_flag == flag)
|
||||
return namespace_info[i].proc_name;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -21,10 +21,3 @@
|
||||
int namespace_flags_from_string(const char *name, unsigned long *ret);
|
||||
int namespace_flags_to_string(unsigned long flags, char **ret);
|
||||
const char *namespace_single_flag_to_string(unsigned long flag);
|
||||
|
||||
struct namespace_flag_map {
|
||||
unsigned long flag;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
extern const struct namespace_flag_map namespace_flag_map[];
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "env-util.h"
|
||||
#include "errno-list.h"
|
||||
#include "macro.h"
|
||||
#include "namespace-util.h"
|
||||
#include "nsflags.h"
|
||||
#include "nulstr-util.h"
|
||||
#include "process-util.h"
|
||||
@ -1289,16 +1290,16 @@ int seccomp_restrict_namespaces(unsigned long retain) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; namespace_flag_map[i].name; i++) {
|
||||
for (unsigned i = 0; namespace_info[i].proc_name; i++) {
|
||||
unsigned long f;
|
||||
|
||||
f = namespace_flag_map[i].flag;
|
||||
f = namespace_info[i].clone_flag;
|
||||
if (FLAGS_SET(retain, f)) {
|
||||
log_debug("Permitting %s.", namespace_flag_map[i].name);
|
||||
log_debug("Permitting %s.", namespace_info[i].proc_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_debug("Blocking %s.", namespace_flag_map[i].name);
|
||||
log_debug("Blocking %s.", namespace_info[i].proc_name);
|
||||
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
|
Loading…
Reference in New Issue
Block a user