1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-10 16:58:28 +03:00

Small fixes to nspawn and other stuff (#35686)

Split out ouf #35685.
This commit is contained in:
Yu Watanabe 2024-12-20 11:03:59 +09:00 committed by GitHub
commit a3fecea5e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 32 additions and 19 deletions

View File

@ -475,7 +475,8 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
if (!full)
return log_oom();
(void) mkdir(full, 0755);
if (mkdir(full, 0755) < 0 && errno != EEXIST)
return log_error_errno(errno, "Failed to create directory '%s': %m", full);
if (FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO))
extra_flags |= MS_RDONLY;
@ -1405,9 +1406,11 @@ done:
#define NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS "/run/host/proc"
#define NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS "/run/host/sys"
int pin_fully_visible_fs(void) {
int pin_fully_visible_api_fs(void) {
int r;
log_debug("Pinning fully visible API FS");
(void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, 0755);
(void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, 0755);
@ -1422,7 +1425,7 @@ int pin_fully_visible_fs(void) {
return 0;
}
static int do_wipe_fully_visible_fs(void) {
static int do_wipe_fully_visible_api_fs(void) {
if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, MNT_DETACH) < 0)
return log_error_errno(errno, "Failed to unmount temporary proc: %m");
@ -1438,10 +1441,12 @@ static int do_wipe_fully_visible_fs(void) {
return 0;
}
int wipe_fully_visible_fs(int mntns_fd) {
int wipe_fully_visible_api_fs(int mntns_fd) {
_cleanup_close_ int orig_mntns_fd = -EBADF;
int r, rr;
log_debug("Wiping fully visible API FS");
r = namespace_open(0,
/* ret_pidns_fd = */ NULL,
&orig_mntns_fd,
@ -1459,7 +1464,7 @@ int wipe_fully_visible_fs(int mntns_fd) {
if (r < 0)
return log_error_errno(r, "Failed to enter mount namespace: %m");
rr = do_wipe_fully_visible_fs();
rr = do_wipe_fully_visible_api_fs();
r = namespace_enter(/* pidns_fd = */ -EBADF,
orig_mntns_fd,

View File

@ -73,5 +73,6 @@ int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
int tmpfs_patch_options(const char *options,uid_t uid_shift, const char *selinux_apifs_context, char **ret);
int pin_fully_visible_fs(void);
int wipe_fully_visible_fs(int mntns_fd);
int pin_fully_visible_api_fs(void);
int wipe_fully_visible_api_fs(int mntns_fd);

View File

@ -42,6 +42,7 @@
#include "copy.h"
#include "cpu-set-util.h"
#include "dev-setup.h"
#include "devnum-util.h"
#include "discover-image.h"
#include "dissect-image.h"
#include "env-util.h"
@ -1250,7 +1251,7 @@ static int parse_argv(int argc, char *argv[]) {
arg_uid_range = UINT32_C(0x10000);
} else if (streq(optarg, "identity")) {
/* identity: User namespaces on, UID range is map the 0…0xFFFF range to
/* identity: User namespaces on, UID range is map of the 0…0xFFFF range to
* itself, i.e. we don't actually map anything, but do take benefit of
* isolation of capability sets. */
arg_userns_mode = USER_NAMESPACE_FIXED;
@ -2323,7 +2324,7 @@ static int copy_devnode_one(const char *dest, const char *node, bool ignore_mkno
return log_error_errno(r, "Failed to create '%s': %m", dn);
_cleanup_free_ char *sl = NULL;
if (asprintf(&sl, "%s/%u:%u", dn, major(st.st_rdev), minor(st.st_rdev)) < 0)
if (asprintf(&sl, "%s/" DEVNUM_FORMAT_STR, dn, DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
return log_oom();
_cleanup_free_ char *prefixed = path_join(dest, sl);
@ -2847,7 +2848,7 @@ static int reset_audit_loginuid(void) {
if (!arg_privileged)
return 0;
r = read_one_line_file("/proc/self/loginuid", &p);
r = read_virtual_file("/proc/self/loginuid", SIZE_MAX, &p, /* ret_size= */ NULL);
if (r == -ENOENT)
return 0;
if (r < 0)
@ -3254,6 +3255,7 @@ static int chase_and_update(char **p, unsigned flags) {
}
static int determine_uid_shift(const char *directory) {
assert(directory);
if (arg_userns_mode == USER_NAMESPACE_NO) {
arg_uid_shift = 0;
@ -3932,7 +3934,7 @@ static int outer_child(
_cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
_cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -EBADF, mntns_fd = -EBADF;
_cleanup_close_ int mntns_fd = -EBADF;
bool idmap = false, enable_fuse;
const char *p;
pid_t pid;
@ -4325,6 +4327,7 @@ static int outer_child(
* visible. Hence there we do it the other way round: we first allocate a new set of namespaces
* (and fork for it) for which we then mount sysfs/procfs, and only then switch root. */
_cleanup_close_ int notify_fd = -EBADF;
if (arg_privileged) {
/* Mark everything as shared so our mounts get propagated down. This is required to make new
* bind mounts available in systemd services inside the container that create a new mount
@ -4355,16 +4358,16 @@ static int outer_child(
* Note, the inner child wouldn't be able to unmount the instances on its own since
* it doesn't own the originating mount namespace. IOW, the outer child needs to do
* this. */
r = pin_fully_visible_fs();
r = pin_fully_visible_api_fs();
if (r < 0)
return r;
}
fd = setup_notify_child(NULL);
notify_fd = setup_notify_child(NULL);
} else
fd = setup_notify_child(directory);
if (fd < 0)
return fd;
notify_fd = setup_notify_child(directory);
if (notify_fd < 0)
return notify_fd;
pid = raw_clone(SIGCHLD|CLONE_NEWNS|
arg_clone_ns_flags |
@ -4429,7 +4432,7 @@ static int outer_child(
return log_error_errno(SYNTHETIC_ERRNO(EIO),
"Short write while sending machine ID.");
l = send_one_fd(fd_outer_socket, fd, 0);
l = send_one_fd(fd_outer_socket, notify_fd, 0);
if (l < 0)
return log_error_errno(l, "Failed to send notify fd: %m");
@ -5623,7 +5626,7 @@ static int run_container(
return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
if (arg_userns_mode != USER_NAMESPACE_NO) {
r = wipe_fully_visible_fs(mntns_fd);
r = wipe_fully_visible_api_fs(mntns_fd);
if (r < 0)
return r;
mntns_fd = safe_close(mntns_fd);

View File

@ -536,6 +536,8 @@ int mount_switch_root_full(const char *path, unsigned long mount_propagation_fla
}
}
log_debug("Successfully switched root to '%s'.", path);
/* Finally, let's establish the requested propagation flags. */
if (mount_propagation_flag == 0)
return 0;
@ -1319,7 +1321,7 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest
* process whose only purpose is to give us a new user namespace. It's killed when we got it. */
if (!userns_shift_range_valid(uid_shift, uid_range))
return -EINVAL;
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid UID range for user namespace.");
if (IN_SET(idmapping, REMOUNT_IDMAPPING_NONE, REMOUNT_IDMAPPING_HOST_ROOT)) {
if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0u, uid_shift, uid_range) < 0)

View File

@ -87,6 +87,8 @@ int nsresource_allocate_userns(const char *name, uint64_t size) {
SD_JSON_BUILD_PAIR("userNamespaceFileDescriptor", SD_JSON_BUILD_UNSIGNED(userns_fd_idx)));
if (r < 0)
return log_debug_errno(r, "Failed to call AllocateUserRange() varlink call: %m");
if (streq_ptr(error_id, "io.systemd.NamespaceResource.UserNamespaceInterfaceNotSupported"))
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Unprivileged user namespace delegation is not supported on this system.");
if (error_id)
return log_debug_errno(sd_varlink_error_to_errno(error_id, reply), "Failed to allocate user namespace with %" PRIu64 " users: %s", size, error_id);