mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-12-23 17:34:00 +03:00
nspawn,pid1: pass "inaccessible" nodes from cntr mgr to pid1 payload via /run/host
Let's make /run/host the sole place we pass stuff from host to container in and place the "inaccessible" nodes in /run/host too. In contrast to the previous two commits this is a minor compat break, but not a relevant one I think. Previously the container manager would place these nodes in /run/systemd/inaccessible/ and that's where PID 1 in the container would try to add them too when missing. Container manager and PID 1 in the container would thus manage the same dir together. With this change the container manager now passes an immutable directory to the container and leaves /run/systemd entirely untouched, and managed exclusively by PID 1 inside the container, which is nice to have clear separation on who manages what. In order to make sure systemd then usses the /run/host/inaccesible/ nodes this commit changes PID 1 to look for that dir and if it exists will symlink it to /run/systemd/inaccessible. Now, this will work fine if new nspawn and new pid 1 in the container work together. as then the symlink is created and the difference between the two dirs won't matter. For the case where an old nspawn invokes a new PID 1: in this case things work as they always worked: the dir is managed together. For the case where different container manager invokes a new PID 1: in this case the nodes aren't typically passed in, and PID 1 in the container will try to create them and will likely fail partially (though gracefully) when trying to create char/block device nodes. THis is fine though as there are fallbacks in place for that case. For the case where a new nspawn invokes an old PID1: this is were the (minor) incompatibily happens: in this case new nspawn will place the nodes in the /run/host/inaccessible/ subdir, but the PID 1 in the container won't look for them there. Since the nodes are also not pre-created in /run/systed/inaccessible/ PID 1 will try to create them there as if a different container manager sets them up. This is of course not sexy, but is not a total loss, since as mentioned fallbacks are in place anyway. Hence I think it's OK to accept this minor incompatibility.
This commit is contained in:
parent
e96ceabac9
commit
9fac502920
@ -536,8 +536,17 @@ int mount_setup(bool loaded_policy, bool leave_propagation) {
|
||||
(void) mkdir_label("/run/systemd/system", 0755);
|
||||
|
||||
/* Also create /run/systemd/inaccessible nodes, so that we always have something to mount
|
||||
* inaccessible nodes from. */
|
||||
(void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID);
|
||||
* inaccessible nodes from. If we run in a container the host might have created these for us already
|
||||
* in /run/host/inaccessible/. Use those if we can, since tht way we likely get access to block/char
|
||||
* device nodes that are inaccessible, and if userns is used to nodes that are on mounts owned by a
|
||||
* userns outside the container and thus nicely read-only and not remountable. */
|
||||
if (access("/run/host/inaccessible/", F_OK) < 0) {
|
||||
if (errno != ENOENT)
|
||||
log_debug_errno(errno, "Failed to check if /run/host/inaccessible exists, ignoring: %m");
|
||||
|
||||
(void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID);
|
||||
} else
|
||||
(void) symlink("../host/inaccessible", "/run/systemd/inaccessible");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ static int user_mkdir_runtime_path(
|
||||
uint64_t runtime_dir_size,
|
||||
uint64_t runtime_dir_inodes) {
|
||||
|
||||
const char *p;
|
||||
int r;
|
||||
|
||||
assert(runtime_path);
|
||||
@ -99,7 +100,9 @@ static int user_mkdir_runtime_path(
|
||||
}
|
||||
|
||||
/* Set up inaccessible nodes now so they're available if we decide to use them with user namespaces. */
|
||||
(void) make_inaccessible_nodes(runtime_path, uid, gid);
|
||||
p = strjoina(runtime_path, "/systemd");
|
||||
(void) mkdir(p, 0755);
|
||||
(void) make_inaccessible_nodes(p, uid, gid);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
|
@ -3523,7 +3523,7 @@ static int outer_child(
|
||||
|
||||
(void) dev_setup(directory, arg_uid_shift, arg_uid_shift);
|
||||
|
||||
p = prefix_roota(directory, "/run");
|
||||
p = prefix_roota(directory, "/run/host");
|
||||
(void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift);
|
||||
|
||||
r = setup_pts(directory);
|
||||
|
@ -57,7 +57,7 @@ int dev_setup(const char *prefix, uid_t uid, gid_t gid) {
|
||||
}
|
||||
|
||||
int make_inaccessible_nodes(
|
||||
const char *runtime_dir,
|
||||
const char *parent_dir,
|
||||
uid_t uid,
|
||||
gid_t gid) {
|
||||
|
||||
@ -65,28 +65,26 @@ int make_inaccessible_nodes(
|
||||
const char *name;
|
||||
mode_t mode;
|
||||
} table[] = {
|
||||
{ "/systemd", S_IFDIR | 0755 },
|
||||
{ "/systemd/inaccessible", S_IFDIR | 0000 },
|
||||
{ "/systemd/inaccessible/reg", S_IFREG | 0000 },
|
||||
{ "/systemd/inaccessible/dir", S_IFDIR | 0000 },
|
||||
{ "/systemd/inaccessible/fifo", S_IFIFO | 0000 },
|
||||
{ "/systemd/inaccessible/sock", S_IFSOCK | 0000 },
|
||||
{ "inaccessible", S_IFDIR | 0755 },
|
||||
{ "inaccessible/reg", S_IFREG | 0000 },
|
||||
{ "inaccessible/dir", S_IFDIR | 0000 },
|
||||
{ "inaccessible/fifo", S_IFIFO | 0000 },
|
||||
{ "inaccessible/sock", S_IFSOCK | 0000 },
|
||||
|
||||
/* The following two are likely to fail if we lack the privs for it (for example in an userns
|
||||
* environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0
|
||||
* device nodes to be created). But that's entirely fine. Consumers of these files should carry
|
||||
* fallback to use a different node then, for example <root>/inaccessible/sock, which is close
|
||||
* enough in behaviour and semantics for most uses. */
|
||||
{ "/systemd/inaccessible/chr", S_IFCHR | 0000 },
|
||||
{ "/systemd/inaccessible/blk", S_IFBLK | 0000 },
|
||||
{ "inaccessible/chr", S_IFCHR | 0000 },
|
||||
{ "inaccessible/blk", S_IFBLK | 0000 },
|
||||
};
|
||||
|
||||
_cleanup_umask_ mode_t u;
|
||||
size_t i;
|
||||
int r;
|
||||
|
||||
if (!runtime_dir)
|
||||
runtime_dir = "/run";
|
||||
if (!parent_dir)
|
||||
parent_dir = "/run/systemd";
|
||||
|
||||
u = umask(0000);
|
||||
|
||||
@ -95,10 +93,10 @@ int make_inaccessible_nodes(
|
||||
* to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the
|
||||
* underlying file, i.e. in the best case we offer the same node type as the underlying node. */
|
||||
|
||||
for (i = 0; i < ELEMENTSOF(table); i++) {
|
||||
for (size_t i = 0; i < ELEMENTSOF(table); i++) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
|
||||
path = path_join(runtime_dir, table[i].name);
|
||||
path = path_join(parent_dir, table[i].name);
|
||||
if (!path)
|
||||
return log_oom();
|
||||
|
||||
@ -107,8 +105,7 @@ int make_inaccessible_nodes(
|
||||
else
|
||||
r = mknod_label(path, table[i].mode, makedev(0, 0));
|
||||
if (r < 0) {
|
||||
if (r != -EEXIST)
|
||||
log_debug_errno(r, "Failed to create '%s', ignoring: %m", path);
|
||||
log_debug_errno(r, "Failed to create '%s', ignoring: %m", path);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -5,4 +5,4 @@
|
||||
|
||||
int dev_setup(const char *prefix, uid_t uid, gid_t gid);
|
||||
|
||||
int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid);
|
||||
int make_inaccessible_nodes(const char *parent_dir, uid_t uid, gid_t gid);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "capability-util.h"
|
||||
#include "dev-setup.h"
|
||||
#include "fs-util.h"
|
||||
#include "mkdir.h"
|
||||
#include "path-util.h"
|
||||
#include "rm-rf.h"
|
||||
#include "tmpfile-util.h"
|
||||
@ -17,8 +18,8 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
assert_se(mkdtemp_malloc("/tmp/test-dev-setupXXXXXX", &p) >= 0);
|
||||
|
||||
f = prefix_roota(p, "/run");
|
||||
assert_se(mkdir(f, 0755) >= 0);
|
||||
f = prefix_roota(p, "/run/systemd");
|
||||
assert_se(mkdir_p(f, 0755) >= 0);
|
||||
|
||||
assert_se(make_inaccessible_nodes(f, 1, 1) >= 0);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user