diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 7df1562c8ab..048bd37a6c8 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -536,8 +536,17 @@ int mount_setup(bool loaded_policy, bool leave_propagation) { (void) mkdir_label("/run/systemd/system", 0755); /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount - * inaccessible nodes from. */ - (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID); + * inaccessible nodes from. If we run in a container the host might have created these for us already + * in /run/host/inaccessible/. Use those if we can, since tht way we likely get access to block/char + * device nodes that are inaccessible, and if userns is used to nodes that are on mounts owned by a + * userns outside the container and thus nicely read-only and not remountable. */ + if (access("/run/host/inaccessible/", F_OK) < 0) { + if (errno != ENOENT) + log_debug_errno(errno, "Failed to check if /run/host/inaccessible exists, ignoring: %m"); + + (void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID); + } else + (void) symlink("../host/inaccessible", "/run/systemd/inaccessible"); return 0; } diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c index 38058d7b2a3..a56c527df8a 100644 --- a/src/login/user-runtime-dir.c +++ b/src/login/user-runtime-dir.c @@ -49,6 +49,7 @@ static int user_mkdir_runtime_path( uint64_t runtime_dir_size, uint64_t runtime_dir_inodes) { + const char *p; int r; assert(runtime_path); @@ -99,7 +100,9 @@ static int user_mkdir_runtime_path( } /* Set up inaccessible nodes now so they're available if we decide to use them with user namespaces. */ - (void) make_inaccessible_nodes(runtime_path, uid, gid); + p = strjoina(runtime_path, "/systemd"); + (void) mkdir(p, 0755); + (void) make_inaccessible_nodes(p, uid, gid); return 0; fail: diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index c28868d4155..c263b0ff708 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3523,7 +3523,7 @@ static int outer_child( (void) dev_setup(directory, arg_uid_shift, arg_uid_shift); - p = prefix_roota(directory, "/run"); + p = prefix_roota(directory, "/run/host"); (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift); r = setup_pts(directory); diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c index 6e57e2a99d3..528440b82fa 100644 --- a/src/shared/dev-setup.c +++ b/src/shared/dev-setup.c @@ -57,7 +57,7 @@ int dev_setup(const char *prefix, uid_t uid, gid_t gid) { } int make_inaccessible_nodes( - const char *runtime_dir, + const char *parent_dir, uid_t uid, gid_t gid) { @@ -65,28 +65,26 @@ int make_inaccessible_nodes( const char *name; mode_t mode; } table[] = { - { "/systemd", S_IFDIR | 0755 }, - { "/systemd/inaccessible", S_IFDIR | 0000 }, - { "/systemd/inaccessible/reg", S_IFREG | 0000 }, - { "/systemd/inaccessible/dir", S_IFDIR | 0000 }, - { "/systemd/inaccessible/fifo", S_IFIFO | 0000 }, - { "/systemd/inaccessible/sock", S_IFSOCK | 0000 }, + { "inaccessible", S_IFDIR | 0755 }, + { "inaccessible/reg", S_IFREG | 0000 }, + { "inaccessible/dir", S_IFDIR | 0000 }, + { "inaccessible/fifo", S_IFIFO | 0000 }, + { "inaccessible/sock", S_IFSOCK | 0000 }, /* The following two are likely to fail if we lack the privs for it (for example in an userns * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 * device nodes to be created). But that's entirely fine. Consumers of these files should carry * fallback to use a different node then, for example /inaccessible/sock, which is close * enough in behaviour and semantics for most uses. */ - { "/systemd/inaccessible/chr", S_IFCHR | 0000 }, - { "/systemd/inaccessible/blk", S_IFBLK | 0000 }, + { "inaccessible/chr", S_IFCHR | 0000 }, + { "inaccessible/blk", S_IFBLK | 0000 }, }; _cleanup_umask_ mode_t u; - size_t i; int r; - if (!runtime_dir) - runtime_dir = "/run"; + if (!parent_dir) + parent_dir = "/run/systemd"; u = umask(0000); @@ -95,10 +93,10 @@ int make_inaccessible_nodes( * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the * underlying file, i.e. in the best case we offer the same node type as the underlying node. */ - for (i = 0; i < ELEMENTSOF(table); i++) { + for (size_t i = 0; i < ELEMENTSOF(table); i++) { _cleanup_free_ char *path = NULL; - path = path_join(runtime_dir, table[i].name); + path = path_join(parent_dir, table[i].name); if (!path) return log_oom(); @@ -107,8 +105,7 @@ int make_inaccessible_nodes( else r = mknod_label(path, table[i].mode, makedev(0, 0)); if (r < 0) { - if (r != -EEXIST) - log_debug_errno(r, "Failed to create '%s', ignoring: %m", path); + log_debug_errno(r, "Failed to create '%s', ignoring: %m", path); continue; } diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h index 72b90ec4de8..437c0e96e65 100644 --- a/src/shared/dev-setup.h +++ b/src/shared/dev-setup.h @@ -5,4 +5,4 @@ int dev_setup(const char *prefix, uid_t uid, gid_t gid); -int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid); +int make_inaccessible_nodes(const char *parent_dir, uid_t uid, gid_t gid); diff --git a/src/test/test-dev-setup.c b/src/test/test-dev-setup.c index 038484e4759..11196cd4d6a 100644 --- a/src/test/test-dev-setup.c +++ b/src/test/test-dev-setup.c @@ -3,6 +3,7 @@ #include "capability-util.h" #include "dev-setup.h" #include "fs-util.h" +#include "mkdir.h" #include "path-util.h" #include "rm-rf.h" #include "tmpfile-util.h" @@ -17,8 +18,8 @@ int main(int argc, char *argv[]) { assert_se(mkdtemp_malloc("/tmp/test-dev-setupXXXXXX", &p) >= 0); - f = prefix_roota(p, "/run"); - assert_se(mkdir(f, 0755) >= 0); + f = prefix_roota(p, "/run/systemd"); + assert_se(mkdir_p(f, 0755) >= 0); assert_se(make_inaccessible_nodes(f, 1, 1) >= 0);