1
1
mirror of https://github.com/systemd/systemd-stable.git synced 2024-12-23 17:34:00 +03:00

machine/basic: factor out helper function to add airlocked mount to namespace

This commit is contained in:
Luca Boccassi 2020-08-13 14:01:34 +01:00
parent 2a613b34cc
commit 6af52c3a45
3 changed files with 227 additions and 206 deletions

View File

@ -810,17 +810,9 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu
}
int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) {
_cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
bool mount_slave_created = false, mount_slave_mounted = false,
mount_tmp_created = false, mount_tmp_mounted = false,
mount_outside_created = false, mount_outside_mounted = false;
_cleanup_free_ char *chased_src = NULL;
int read_only, make_file_or_directory;
const char *dest, *src;
const char *dest, *src, *propagate_directory;
Machine *m = userdata;
struct stat st;
pid_t child;
uid_t uid;
int r;
@ -862,205 +854,15 @@ int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bu
if (uid != 0)
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Can't bind mount on container with user namespacing applied.");
/* One day, when bind mounting /proc/self/fd/n works across
* namespace boundaries we should rework this logic to make
* use of it... */
p = strjoina("/run/systemd/nspawn/propagate/", m->name, "/");
if (laccess(p, F_OK) < 0)
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Container does not allow propagation of mount points.");
r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src, NULL);
propagate_directory = strjoina("/run/systemd/nspawn/propagate/", m->name);
r = bind_mount_in_namespace(m->leader,
propagate_directory,
"/run/host/incoming/",
src, dest, read_only, make_file_or_directory);
if (r < 0)
return sd_bus_error_set_errnof(error, r, "Failed to resolve source path: %m");
return sd_bus_error_set_errnof(error, r, "Failed to mount %s on %s in machine's namespace: %m", src, dest);
if (lstat(chased_src, &st) < 0)
return sd_bus_error_set_errnof(error, errno, "Failed to stat() source path: %m");
if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Source directory can't be a symbolic link");
/* Our goal is to install a new bind mount into the container,
possibly read-only. This is irritatingly complex
unfortunately, currently.
First, we start by creating a private playground in /tmp,
that we can mount MS_SLAVE. (Which is necessary, since
MS_MOVE cannot be applied to mounts with MS_SHARED parent
mounts.) */
if (!mkdtemp(mount_slave))
return sd_bus_error_set_errnof(error, errno, "Failed to create playground %s: %m", mount_slave);
mount_slave_created = true;
r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to make bind mount %s: %m", mount_slave);
goto finish;
}
mount_slave_mounted = true;
r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to remount slave %s: %m", mount_slave);
goto finish;
}
/* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */
mount_tmp = strjoina(mount_slave, "/mount");
r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to create temporary mount point %s: %m", mount_tmp);
goto finish;
}
mount_tmp_created = true;
r = mount_nofollow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to mount %s: %m", chased_src);
goto finish;
}
mount_tmp_mounted = true;
/* Third, we remount the new bind mount read-only if requested. */
if (read_only) {
r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to remount read-only %s: %m", mount_tmp);
goto finish;
}
}
/* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only
* right-away. */
mount_outside = strjoina("/run/systemd/nspawn/propagate/", m->name, "/XXXXXX");
if (S_ISDIR(st.st_mode))
r = mkdtemp(mount_outside) ? 0 : -errno;
else {
r = mkostemp_safe(mount_outside);
safe_close(r);
}
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Cannot create propagation file or directory %s: %m", mount_outside);
goto finish;
}
mount_outside_created = true;
r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to move %s to %s: %m", mount_tmp, mount_outside);
goto finish;
}
mount_outside_mounted = true;
mount_tmp_mounted = false;
if (S_ISDIR(st.st_mode))
(void) rmdir(mount_tmp);
else
(void) unlink(mount_tmp);
mount_tmp_created = false;
(void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
mount_slave_mounted = false;
(void) rmdir(mount_slave);
mount_slave_created = false;
if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
goto finish;
}
r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
goto finish;
}
if (r == 0) {
const char *mount_inside, *q;
int mntfd;
errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
q = procfs_file_alloca(m->leader, "ns/mnt");
mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (mntfd < 0) {
r = log_error_errno(errno, "Failed to open mount namespace of leader: %m");
goto child_fail;
}
if (setns(mntfd, CLONE_NEWNS) < 0) {
r = log_error_errno(errno, "Failed to join namespace of leader: %m");
goto child_fail;
}
if (make_file_or_directory) {
(void) mkdir_parents(dest, 0755);
(void) make_mount_point_inode_from_stat(&st, dest, 0700);
}
mount_inside = strjoina("/run/host/incoming/", basename(mount_outside));
r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL);
if (r < 0)
goto child_fail;
_exit(EXIT_SUCCESS);
child_fail:
(void) write(errno_pipe_fd[1], &r, sizeof(r));
errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
_exit(EXIT_FAILURE);
}
errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0);
if (r < 0) {
r = sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
goto finish;
}
if (r != EXIT_SUCCESS) {
if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
r = sd_bus_error_set_errnof(error, r, "Failed to mount: %m");
else
r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child failed.");
goto finish;
}
r = sd_bus_reply_method_return(message, NULL);
finish:
if (mount_outside_mounted)
(void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW);
if (mount_outside_created) {
if (S_ISDIR(st.st_mode))
(void) rmdir(mount_outside);
else
(void) unlink(mount_outside);
}
if (mount_tmp_mounted)
(void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW);
if (mount_tmp_created) {
if (S_ISDIR(st.st_mode))
(void) rmdir(mount_tmp);
else
(void) unlink(mount_tmp);
}
if (mount_slave_mounted)
(void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
if (mount_slave_created)
(void) rmdir(mount_slave);
return r;
return sd_bus_reply_method_return(message, NULL);
}
int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error) {

View File

@ -14,15 +14,18 @@
#include "fs-util.h"
#include "hashmap.h"
#include "libmount-util.h"
#include "mkdir.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "set.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "string-util.h"
#include "strv.h"
#include "tmpfile-util.h"
int mount_fd(const char *source,
int target_fd,
@ -742,3 +745,217 @@ int mount_option_mangle(
return 0;
}
int bind_mount_in_namespace(
pid_t target,
const char *propagate_path,
const char *incoming_path,
const char *src,
const char *dest,
bool read_only,
bool make_file_or_directory) {
_cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
bool mount_slave_created = false, mount_slave_mounted = false,
mount_tmp_created = false, mount_tmp_mounted = false,
mount_outside_created = false, mount_outside_mounted = false;
_cleanup_free_ char *chased_src = NULL;
struct stat st;
pid_t child;
int r;
assert(target > 0);
assert(propagate_path);
assert(incoming_path);
assert(src);
assert(dest);
/* One day, when bind mounting /proc/self/fd/n works across
* namespace boundaries we should rework this logic to make
* use of it... */
p = strjoina(propagate_path, "/");
r = laccess(p, F_OK);
if (r < 0)
return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points");
r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src, NULL);
if (r < 0)
return log_debug_errno(r, "Failed to resolve source path of %s: %m", src);
if (lstat(chased_src, &st) < 0)
return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", chased_src);
if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", chased_src);
/* Our goal is to install a new bind mount into the container,
possibly read-only. This is irritatingly complex
unfortunately, currently.
First, we start by creating a private playground in /tmp,
that we can mount MS_SLAVE. (Which is necessary, since
MS_MOVE cannot be applied to mounts with MS_SHARED parent
mounts.) */
if (!mkdtemp(mount_slave))
return log_debug_errno(errno, "Failed to create playground %s: %m", mount_slave);
mount_slave_created = true;
r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL);
if (r < 0)
goto finish;
mount_slave_mounted = true;
r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL);
if (r < 0)
goto finish;
/* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */
mount_tmp = strjoina(mount_slave, "/mount");
r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700);
if (r < 0) {
log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp);
goto finish;
}
mount_tmp_created = true;
r = mount_nofollow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL);
if (r < 0)
goto finish;
mount_tmp_mounted = true;
/* Third, we remount the new bind mount read-only if requested. */
if (read_only) {
r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
if (r < 0)
goto finish;
}
/* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only
* right-away. */
mount_outside = strjoina(propagate_path, "/XXXXXX");
if (S_ISDIR(st.st_mode))
r = mkdtemp(mount_outside) ? 0 : -errno;
else {
r = mkostemp_safe(mount_outside);
safe_close(r);
}
if (r < 0) {
log_debug_errno(r, "Cannot create propagation file or directory %s: %m", mount_outside);
goto finish;
}
mount_outside_created = true;
r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL);
if (r < 0)
goto finish;
mount_outside_mounted = true;
mount_tmp_mounted = false;
if (S_ISDIR(st.st_mode))
(void) rmdir(mount_tmp);
else
(void) unlink(mount_tmp);
mount_tmp_created = false;
(void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
mount_slave_mounted = false;
(void) rmdir(mount_slave);
mount_slave_created = false;
if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) {
log_debug_errno(errno, "Failed to create pipe: %m");
goto finish;
}
r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child);
if (r < 0)
goto finish;
if (r == 0) {
const char *mount_inside, *q;
int mntfd;
errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
q = procfs_file_alloca(target, "ns/mnt");
mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (mntfd < 0) {
r = log_error_errno(errno, "Failed to open mount namespace of leader: %m");
goto child_fail;
}
if (setns(mntfd, CLONE_NEWNS) < 0) {
r = log_error_errno(errno, "Failed to join namespace of leader: %m");
goto child_fail;
}
if (make_file_or_directory) {
(void) mkdir_parents(dest, 0755);
(void) make_mount_point_inode_from_stat(&st, dest, 0700);
}
/* Fifth, move the mount to the right place inside */
mount_inside = strjoina(incoming_path, basename(mount_outside));
r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL);
if (r < 0)
goto child_fail;
_exit(EXIT_SUCCESS);
child_fail:
(void) write(errno_pipe_fd[1], &r, sizeof(r));
errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
_exit(EXIT_FAILURE);
}
errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0);
if (r < 0) {
log_debug_errno(r, "Failed to wait for child: %m");
goto finish;
}
if (r != EXIT_SUCCESS) {
if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
log_debug_errno(r, "Failed to mount: %m");
else
log_debug("Child failed.");
goto finish;
}
finish:
if (mount_outside_mounted)
(void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW);
if (mount_outside_created) {
if (S_ISDIR(st.st_mode))
(void) rmdir(mount_outside);
else
(void) unlink(mount_outside);
}
if (mount_tmp_mounted)
(void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW);
if (mount_tmp_created) {
if (S_ISDIR(st.st_mode))
(void) rmdir(mount_tmp);
else
(void) unlink(mount_tmp);
}
if (mount_slave_mounted)
(void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
if (mount_slave_created)
(void) rmdir(mount_slave);
return r;
}

View File

@ -97,3 +97,5 @@ static inline char* umount_and_rmdir_and_free(char *p) {
return mfree(p);
}
DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free);
int bind_mount_in_namespace(pid_t target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory);