From 90adaa25e894a580930ef2c3e65ab8db8295515a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 17 Feb 2015 17:19:57 +0100 Subject: [PATCH] machined: move logic for bind mounting into containers from machinectl to machined This extends the bus interface, adding BindMountMachine() for bind mounting directories from the host into the container. --- src/machine/machine-dbus.c | 208 +++++++++++++++++++++++++++++ src/machine/machine-dbus.h | 1 + src/machine/machinectl.c | 211 ++++-------------------------- src/machine/machined-dbus.c | 22 ++++ units/systemd-machined.service.in | 11 +- 5 files changed, 263 insertions(+), 190 deletions(-) diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index b46f0a8dac..246c2cfd05 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "bus-util.h" #include "bus-label.h" @@ -32,6 +33,7 @@ #include "in-addr-util.h" #include "local-addresses.h" #include "path-util.h" +#include "mkdir.h" #include "bus-internal.h" #include "machine.h" #include "machine-dbus.h" @@ -518,6 +520,211 @@ int bus_machine_method_open_login(sd_bus *bus, sd_bus_message *message, void *us return sd_bus_send(bus, reply, NULL); } +int bus_machine_method_bind_mount(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; + char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; + bool mount_slave_created = false, mount_slave_mounted = false, + mount_tmp_created = false, mount_tmp_mounted = false, + mount_outside_created = false, mount_outside_mounted = false; + const char *dest, *src; + Machine *m = userdata; + int read_only, make_directory; + pid_t child; + siginfo_t si; + int r; + + if (m->class != MACHINE_CONTAINER) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Bind mounting is only supported on container machines."); + + r = sd_bus_message_read(message, "ssbb", &src, &dest, &read_only, &make_directory); + if (r < 0) + return r; + + if (!path_is_absolute(src) || !path_is_safe(src)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and not contain ../."); + + if (isempty(dest)) + dest = src; + else if (!path_is_absolute(dest) || !path_is_safe(dest)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and not contain ../."); + + /* One day, when bind mounting /proc/self/fd/n works across + * namespace boundaries we should rework this logic to make + * use of it... */ + + p = strjoina("/run/systemd/nspawn/propagate/", m->name, "/"); + if (laccess(p, F_OK) < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Container does not allow propagation of mount points."); + + /* Our goal is to install a new bind mount into the container, + possibly read-only. This is irritatingly complex + unfortunately, currently. + + First, we start by creating a private playground in /tmp, + that we can mount MS_SLAVE. (Which is necessary, since + MS_MOUNT cannot be applied to mounts with MS_SHARED parent + mounts.) */ + + if (!mkdtemp(mount_slave)) + return sd_bus_error_set_errnof(error, errno, "Failed to create playground %s: %m", mount_slave); + + mount_slave_created = true; + + if (mount(mount_slave, mount_slave, NULL, MS_BIND, NULL) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to make bind mount %s: %m", mount_slave); + goto finish; + } + + mount_slave_mounted = true; + + if (mount(NULL, mount_slave, NULL, MS_SLAVE, NULL) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to remount slave %s: %m", mount_slave); + goto finish; + } + + /* Second, we mount the source directory to a directory inside + of our MS_SLAVE playground. */ + mount_tmp = strjoina(mount_slave, "/mount"); + if (mkdir(mount_tmp, 0700) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount point %s: %m", mount_tmp); + goto finish; + } + + mount_tmp_created = true; + + if (mount(src, mount_tmp, NULL, MS_BIND, NULL) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to overmount %s: %m", mount_tmp); + goto finish; + } + + mount_tmp_mounted = true; + + /* Third, we remount the new bind mount read-only if requested. */ + if (read_only) + if (mount(NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to remount read-only %s: %m", mount_tmp); + goto finish; + } + + /* Fourth, we move the new bind mount into the propagation + * directory. This way it will appear there read-only + * right-away. */ + + mount_outside = strjoina("/run/systemd/nspawn/propagate/", m->name, "/XXXXXX"); + if (!mkdtemp(mount_outside)) { + r = sd_bus_error_set_errnof(error, errno, "Cannot create propagation directory %s: %m", mount_outside); + goto finish; + } + + mount_outside_created = true; + + if (mount(mount_tmp, mount_outside, NULL, MS_MOVE, NULL) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to move %s to %s: %m", mount_tmp, mount_outside); + goto finish; + } + + mount_outside_mounted = true; + mount_tmp_mounted = false; + + (void) rmdir(mount_tmp); + mount_tmp_created = false; + + (void) umount(mount_slave); + mount_slave_mounted = false; + + (void) rmdir(mount_slave); + mount_slave_created = false; + + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m"); + goto finish; + } + + child = fork(); + if (child < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to fork(): %m"); + goto finish; + } + + if (child == 0) { + const char *mount_inside; + int mntfd; + const char *q; + + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + q = procfs_file_alloca(m->leader, "ns/mnt"); + mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (mntfd < 0) { + r = log_error_errno(errno, "Failed to open mount namespace of leader: %m"); + goto child_fail; + } + + if (setns(mntfd, CLONE_NEWNS) < 0) { + r = log_error_errno(errno, "Failed to join namespace of leader: %m"); + goto child_fail; + } + + if (make_directory) + (void) mkdir_p(dest, 0755); + + /* Fifth, move the mount to the right place inside */ + mount_inside = strjoina("/run/systemd/nspawn/incoming/", basename(mount_outside)); + if (mount(mount_inside, dest, NULL, MS_MOVE, NULL) < 0) { + r = log_error_errno(errno, "Failed to mount: %m"); + goto child_fail; + } + + _exit(EXIT_SUCCESS); + + child_fail: + (void) write(errno_pipe_fd[1], &r, sizeof(r)); + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + _exit(EXIT_FAILURE); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = wait_for_terminate(child, &si); + if (r < 0) { + r = sd_bus_error_set_errnof(error, errno, "Failed to wait for client: %m"); + goto finish; + } + if (si.si_code != CLD_EXITED) { + r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Client died abnormally."); + goto finish; + } + if (si.si_status != EXIT_SUCCESS) { + + if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) + r = sd_bus_error_set_errnof(error, r, "Failed to mount in container: %m"); + else + r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Client failed."); + goto finish; + } + + r = sd_bus_reply_method_return(message, NULL); + +finish: + if (mount_outside_mounted) + umount(mount_outside); + if (mount_outside_created) + rmdir(mount_outside); + + if (mount_tmp_mounted) + umount(mount_tmp); + if (mount_tmp_created) + rmdir(mount_tmp); + + if (mount_slave_mounted) + umount(mount_slave); + if (mount_slave_created) + rmdir(mount_slave); + + return r; +} + const sd_bus_vtable machine_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Machine, name), SD_BUS_VTABLE_PROPERTY_CONST), @@ -537,6 +744,7 @@ const sd_bus_vtable machine_vtable[] = { SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_machine_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("OpenPTY", NULL, "hs", bus_machine_method_open_pty, 0), SD_BUS_METHOD("OpenLogin", NULL, "hs", bus_machine_method_open_login, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("BindMount", "ssbb", NULL, bus_machine_method_bind_mount, 0), SD_BUS_VTABLE_END }; diff --git a/src/machine/machine-dbus.h b/src/machine/machine-dbus.h index 601252722d..474fec7b10 100644 --- a/src/machine/machine-dbus.h +++ b/src/machine/machine-dbus.h @@ -36,6 +36,7 @@ int bus_machine_method_get_addresses(sd_bus *bus, sd_bus_message *message, void int bus_machine_method_get_os_release(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_machine_method_open_pty(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_machine_method_open_login(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_machine_method_bind_mount(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error); int machine_send_signal(Machine *m, bool new_machine); int machine_send_create_reply(Machine *m, sd_bus_error *error); diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 9f8c68b184..c1cc5e7eb5 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -901,14 +901,14 @@ static int show_image(int argc, char *argv[], void *userdata) { const char *path = NULL; r = sd_bus_call_method( - bus, - "org.freedesktop.machine1", - "/org/freedesktop/machine1", - "org.freedesktop.machine1.Manager", - "GetImage", - &error, - &reply, - "s", argv[i]); + bus, + "org.freedesktop.machine1", + "/org/freedesktop/machine1", + "org.freedesktop.machine1.Manager", + "GetImage", + &error, + &reply, + "s", argv[i]); if (r < 0) { log_error("Could not get path to image: %s", bus_error_message(&error, -r)); return r; @@ -930,7 +930,7 @@ static int show_image(int argc, char *argv[], void *userdata) { static int kill_machine(int argc, char *argv[], void *userdata) { _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus *bus = userdata; - int i; + int r, i; assert(bus); @@ -940,8 +940,6 @@ static int kill_machine(int argc, char *argv[], void *userdata) { arg_kill_who = "all"; for (i = 1; i < argc; i++) { - int r; - r = sd_bus_call_method( bus, "org.freedesktop.machine1", @@ -1143,187 +1141,32 @@ static int copy_files(int argc, char *argv[], void *userdata) { } static int bind_mount(int argc, char *argv[], void *userdata) { - char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; + _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus *bus = userdata; - pid_t child, leader; - const char *dest; - siginfo_t si; - bool mount_slave_created = false, mount_slave_mounted = false, - mount_tmp_created = false, mount_tmp_mounted = false, - mount_outside_created = false, mount_outside_mounted = false; int r; assert(bus); - /* One day, when bind mounting /proc/self/fd/n works across - * namespace boundaries we should rework this logic to make - * use of it... */ - - dest = argv[3] ?: argv[2]; - if (!path_is_absolute(dest)) { - log_error("Destination path not absolute."); - return -EINVAL; - } - - p = strjoina("/run/systemd/nspawn/propagate/", argv[1], "/"); - if (access(p, F_OK) < 0) { - log_error("Container does not allow propagation of mount points."); - return -ENOTSUP; - } - - r = machine_get_leader(bus, argv[1], &leader); - if (r < 0) - return r; - - /* Our goal is to install a new bind mount into the container, - possibly read-only. This is irritatingly complex - unfortunately, currently. - - First, we start by creating a private playground in /tmp, - that we can mount MS_SLAVE. (Which is necessary, since - MS_MOUNT cannot be applied to mounts with MS_SHARED parent - mounts.) */ - - if (!mkdtemp(mount_slave)) - return log_error_errno(errno, "Failed to create playground: %m"); - - mount_slave_created = true; - - if (mount(mount_slave, mount_slave, NULL, MS_BIND, NULL) < 0) { - r = log_error_errno(errno, "Failed to make bind mount: %m"); - goto finish; - } - - mount_slave_mounted = true; - - if (mount(NULL, mount_slave, NULL, MS_SLAVE, NULL) < 0) { - r = log_error_errno(errno, "Failed to remount slave: %m"); - goto finish; - } - - /* Second, we mount the source directory to a directory inside - of our MS_SLAVE playground. */ - mount_tmp = strjoina(mount_slave, "/mount"); - if (mkdir(mount_tmp, 0700) < 0) { - r = log_error_errno(errno, "Failed to create temporary mount: %m"); - goto finish; - } - - mount_tmp_created = true; - - if (mount(argv[2], mount_tmp, NULL, MS_BIND, NULL) < 0) { - r = log_error_errno(errno, "Failed to overmount: %m"); - goto finish; - } - - mount_tmp_mounted = true; - - /* Third, we remount the new bind mount read-only if requested. */ - if (arg_read_only) - if (mount(NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) { - r = log_error_errno(errno, "Failed to mark read-only: %m"); - goto finish; - } - - /* Fourth, we move the new bind mount into the propagation - * directory. This way it will appear there read-only - * right-away. */ - - mount_outside = strjoina("/run/systemd/nspawn/propagate/", argv[1], "/XXXXXX"); - if (!mkdtemp(mount_outside)) { - r = log_error_errno(errno, "Cannot create propagation directory: %m"); - goto finish; - } - - mount_outside_created = true; - - if (mount(mount_tmp, mount_outside, NULL, MS_MOVE, NULL) < 0) { - r = log_error_errno(errno, "Failed to move: %m"); - goto finish; - } - - mount_outside_mounted = true; - mount_tmp_mounted = false; - - (void) rmdir(mount_tmp); - mount_tmp_created = false; - - (void) umount(mount_slave); - mount_slave_mounted = false; - - (void) rmdir(mount_slave); - mount_slave_created = false; - - child = fork(); - if (child < 0) { - r = log_error_errno(errno, "Failed to fork(): %m"); - goto finish; - } - - if (child == 0) { - const char *mount_inside; - int mntfd; - const char *q; - - q = procfs_file_alloca(leader, "ns/mnt"); - mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (mntfd < 0) { - log_error_errno(errno, "Failed to open mount namespace of leader: %m"); - _exit(EXIT_FAILURE); - } - - if (setns(mntfd, CLONE_NEWNS) < 0) { - log_error_errno(errno, "Failed to join namespace of leader: %m"); - _exit(EXIT_FAILURE); - } - - if (arg_mkdir) - mkdir_p(dest, 0755); - - /* Fifth, move the mount to the right place inside */ - mount_inside = strjoina("/run/systemd/nspawn/incoming/", basename(mount_outside)); - if (mount(mount_inside, dest, NULL, MS_MOVE, NULL) < 0) { - log_error_errno(errno, "Failed to mount: %m"); - _exit(EXIT_FAILURE); - } - - _exit(EXIT_SUCCESS); - } - - r = wait_for_terminate(child, &si); + r = sd_bus_call_method( + bus, + "org.freedesktop.machine1", + "/org/freedesktop/machine1", + "org.freedesktop.machine1.Manager", + "BindMountMachine", + &error, + NULL, + "sssbb", + argv[1], + argv[2], + argv[3], + arg_read_only, + arg_mkdir); if (r < 0) { - log_error_errno(r, "Failed to wait for client: %m"); - goto finish; - } - if (si.si_code != CLD_EXITED) { - log_error("Client died abnormally."); - r = -EIO; - goto finish; - } - if (si.si_status != EXIT_SUCCESS) { - r = -EIO; - goto finish; + log_error("Failed to bind mount: %s", bus_error_message(&error, -r)); + return r; } - r = 0; - -finish: - if (mount_outside_mounted) - umount(mount_outside); - if (mount_outside_created) - rmdir(mount_outside); - - if (mount_tmp_mounted) - umount(mount_tmp); - if (mount_tmp_created) - umount(mount_tmp); - - if (mount_slave_mounted) - umount(mount_slave); - if (mount_slave_created) - umount(mount_slave); - - return r; + return 0; } static int on_machine_removed(sd_bus *bus, sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c index ac19695c92..2b06d994a8 100644 --- a/src/machine/machined-dbus.c +++ b/src/machine/machined-dbus.c @@ -559,6 +559,27 @@ static int method_open_machine_login(sd_bus *bus, sd_bus_message *message, void return bus_machine_method_open_login(bus, message, machine, error); } +static int method_bind_mount_machine(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + Machine *machine; + const char *name; + int r; + + assert(bus); + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + machine = hashmap_get(m->machines, name); + if (!machine) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name); + + return bus_machine_method_bind_mount(bus, message, machine, error); +} + static int method_remove_image(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error) { _cleanup_(image_unrefp) Image* i = NULL; const char *name; @@ -672,6 +693,7 @@ const sd_bus_vtable manager_vtable[] = { SD_BUS_METHOD("RenameImage", "ss", NULL, method_rename_image, 0), SD_BUS_METHOD("CloneImage", "ssb", NULL, method_clone_image, 0), SD_BUS_METHOD("MarkImageReadOnly", "sb", NULL, method_mark_image_read_only, 0), + SD_BUS_METHOD("BindMountMachine", "sssbb", NULL, method_bind_mount_machine, 0), SD_BUS_SIGNAL("MachineNew", "so", 0), SD_BUS_SIGNAL("MachineRemoved", "so", 0), SD_BUS_VTABLE_END diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in index 15f34d9db7..19c33959d6 100644 --- a/units/systemd-machined.service.in +++ b/units/systemd-machined.service.in @@ -15,10 +15,9 @@ After=machine.slice [Service] ExecStart=@rootlibexecdir@/systemd-machined BusName=org.freedesktop.machine1 -CapabilityBoundingSet=CAP_KILL CAP_SYS_PTRACE CAP_SYS_ADMIN CAP_SETGID CAP_SYS_CHROOT CAP_DAC_READ_SEARCH +CapabilityBoundingSet=CAP_KILL CAP_SYS_PTRACE CAP_SYS_ADMIN CAP_SETGID CAP_SYS_CHROOT CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE WatchdogSec=1min -PrivateTmp=yes -PrivateDevices=yes -PrivateNetwork=yes -ProtectSystem=full -ProtectHome=yes + +# Note that machined cannot be placed in a mount namespace, since it +# needs access to the host's mount namespace in order to implement the +# "machinectl bind" operation.