mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-12-31 17:17:43 +03:00
nspawn: add new --bind-user= option for binding a host user into the container
This new option does three things for a host user specified via --bind-user=: 1. Bind mount the home directory from the host directory into /run/host/home/<username> 2. Install an additional user namepace UID/GID mapping mapping the host UID/GID of the host user to an unused one from the container in the range 60514…60577. 3. Synthesize a user/group record for the user/group under the same name as on the host, with minimized information, and the UID/GID set to the mapped UID/GID. This data is written to /run/host/userdb/ where nss-system will pick it up. This should make sharing users and home directories from host into the container pretty seamless, under some conditions: 1. User namespacing must be used. 2. The host UID/GID of the user/group cannot be in the range assigned to the container (kernel already refuses this, as this would mean two host UIDs/GIDs might end up being mapped to the same continer UID/GID. 3. There's a free UID/GID in the aforementioned range in the container, and the name of the user/group is not used in the container. 4. Container payload is new enough to include an nss-systemd version that picks up records from /run/host/userdb/
This commit is contained in:
parent
91181e075b
commit
2f89304490
@ -1,6 +1,8 @@
|
||||
# SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
|
||||
libnspawn_core_sources = files('''
|
||||
nspawn-bind-user.c
|
||||
nspawn-bind-user.h
|
||||
nspawn-cgroup.c
|
||||
nspawn-cgroup.h
|
||||
nspawn-creds.c
|
||||
|
479
src/nspawn/nspawn-bind-user.c
Normal file
479
src/nspawn/nspawn-bind-user.c
Normal file
@ -0,0 +1,479 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
#include "format-util.h"
|
||||
#include "fs-util.h"
|
||||
#include "nspawn-bind-user.h"
|
||||
#include "nspawn.h"
|
||||
#include "path-util.h"
|
||||
#include "user-util.h"
|
||||
#include "userdb.h"
|
||||
|
||||
#define MAP_UID_START 60514
|
||||
#define MAP_UID_END 60577
|
||||
|
||||
static int check_etc_passwd_collisions(
|
||||
const char *directory,
|
||||
const char *name,
|
||||
uid_t uid) {
|
||||
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
int r;
|
||||
|
||||
assert(directory);
|
||||
assert(name || uid_is_valid(uid));
|
||||
|
||||
r = chase_symlinks_and_fopen_unlocked("/etc/passwd", directory, CHASE_PREFIX_ROOT, "re", &f, NULL);
|
||||
if (r == -ENOENT)
|
||||
return 0; /* no user database? then no user, hence no collision */
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to open /etc/passwd of container: %m");
|
||||
|
||||
for (;;) {
|
||||
struct passwd *pw;
|
||||
|
||||
r = fgetpwent_sane(f, &pw);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to iterate through /etc/passwd of container: %m");
|
||||
if (r == 0) /* EOF */
|
||||
return 0; /* no collision */
|
||||
|
||||
if (name && streq_ptr(pw->pw_name, name))
|
||||
return 1; /* name collision */
|
||||
if (uid_is_valid(uid) && pw->pw_uid == uid)
|
||||
return 1; /* UID collision */
|
||||
}
|
||||
}
|
||||
|
||||
static int check_etc_group_collisions(
|
||||
const char *directory,
|
||||
const char *name,
|
||||
gid_t gid) {
|
||||
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
int r;
|
||||
|
||||
assert(directory);
|
||||
assert(name || gid_is_valid(gid));
|
||||
|
||||
r = chase_symlinks_and_fopen_unlocked("/etc/group", directory, CHASE_PREFIX_ROOT, "re", &f, NULL);
|
||||
if (r == -ENOENT)
|
||||
return 0; /* no group database? then no group, hence no collision */
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to open /etc/group of container: %m");
|
||||
|
||||
for (;;) {
|
||||
struct group *gr;
|
||||
|
||||
r = fgetgrent_sane(f, &gr);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to iterate through /etc/group of container: %m");
|
||||
if (r == 0)
|
||||
return 0; /* no collision */
|
||||
|
||||
if (name && streq_ptr(gr->gr_name, name))
|
||||
return 1; /* name collision */
|
||||
if (gid_is_valid(gid) && gr->gr_gid == gid)
|
||||
return 1; /* gid collision */
|
||||
}
|
||||
}
|
||||
|
||||
static int convert_user(
|
||||
const char *directory,
|
||||
UserRecord *u,
|
||||
GroupRecord *g,
|
||||
uid_t allocate_uid,
|
||||
UserRecord **ret_converted_user,
|
||||
GroupRecord **ret_converted_group) {
|
||||
|
||||
_cleanup_(group_record_unrefp) GroupRecord *converted_group = NULL;
|
||||
_cleanup_(user_record_unrefp) UserRecord *converted_user = NULL;
|
||||
_cleanup_free_ char *h = NULL;
|
||||
JsonVariant *p, *hp = NULL;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
assert(g);
|
||||
assert(u->gid == g->gid);
|
||||
|
||||
r = check_etc_passwd_collisions(directory, u->user_name, UID_INVALID);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
|
||||
"Sorry, the user '%s' already exists in the container.", u->user_name);
|
||||
|
||||
r = check_etc_group_collisions(directory, g->group_name, GID_INVALID);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
|
||||
"Sorry, the group '%s' already exists in the container.", g->group_name);
|
||||
|
||||
h = path_join("/run/host/home/", u->user_name);
|
||||
if (!h)
|
||||
return log_oom();
|
||||
|
||||
/* Acquire the source hashed password array as-is, so that it retains the JSON_VARIANT_SENSITIVE flag */
|
||||
p = json_variant_by_key(u->json, "privileged");
|
||||
if (p)
|
||||
hp = json_variant_by_key(p, "hashedPassword");
|
||||
|
||||
r = user_record_build(
|
||||
&converted_user,
|
||||
JSON_BUILD_OBJECT(
|
||||
JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(u->user_name)),
|
||||
JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(allocate_uid)),
|
||||
JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(allocate_uid)),
|
||||
JSON_BUILD_PAIR_CONDITION(u->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(u->disposition))),
|
||||
JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING(h)),
|
||||
JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.NSpawn")),
|
||||
JSON_BUILD_PAIR_CONDITION(!strv_isempty(u->hashed_password), "privileged", JSON_BUILD_OBJECT(
|
||||
JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_VARIANT(hp))))));
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to build container user record: %m");
|
||||
|
||||
r = group_record_build(
|
||||
&converted_group,
|
||||
JSON_BUILD_OBJECT(
|
||||
JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name)),
|
||||
JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(allocate_uid)),
|
||||
JSON_BUILD_PAIR_CONDITION(g->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(g->disposition))),
|
||||
JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.NSpawn"))));
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to build container group record: %m");
|
||||
|
||||
*ret_converted_user = TAKE_PTR(converted_user);
|
||||
*ret_converted_group = TAKE_PTR(converted_group);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int find_free_uid(const char *directory, uid_t max_uid, uid_t *current_uid) {
|
||||
int r;
|
||||
|
||||
assert(directory);
|
||||
assert(current_uid);
|
||||
|
||||
for (;; (*current_uid) ++) {
|
||||
if (*current_uid > MAP_UID_END || *current_uid > max_uid)
|
||||
return log_error_errno(
|
||||
SYNTHETIC_ERRNO(EBUSY),
|
||||
"No suitable available UID in range " UID_FMT "…" UID_FMT " in container detected, can't map user.",
|
||||
MAP_UID_START, MAP_UID_END);
|
||||
|
||||
r = check_etc_passwd_collisions(directory, NULL, *current_uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0) /* already used */
|
||||
continue;
|
||||
|
||||
/* We want to use the UID also as GID, hence check for it in /etc/group too */
|
||||
r = check_etc_group_collisions(directory, NULL, (gid_t) *current_uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) /* free! yay! */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
BindUserContext* bind_user_context_free(BindUserContext *c) {
|
||||
if (!c)
|
||||
return NULL;
|
||||
|
||||
assert(c->n_data == 0 || c->data);
|
||||
|
||||
for (size_t i = 0; i < c->n_data; i++) {
|
||||
user_record_unref(c->data[i].host_user);
|
||||
group_record_unref(c->data[i].host_group);
|
||||
user_record_unref(c->data[i].payload_user);
|
||||
group_record_unref(c->data[i].payload_group);
|
||||
}
|
||||
|
||||
return mfree(c);
|
||||
}
|
||||
|
||||
int bind_user_prepare(
|
||||
const char *directory,
|
||||
char **bind_user,
|
||||
uid_t uid_shift,
|
||||
uid_t uid_range,
|
||||
CustomMount **custom_mounts,
|
||||
size_t *n_custom_mounts,
|
||||
BindUserContext **ret) {
|
||||
|
||||
_cleanup_(bind_user_context_freep) BindUserContext *c = NULL;
|
||||
uid_t current_uid = MAP_UID_START;
|
||||
size_t n_allocated = 0;
|
||||
char **n;
|
||||
int r;
|
||||
|
||||
assert(custom_mounts);
|
||||
assert(n_custom_mounts);
|
||||
assert(ret);
|
||||
|
||||
/* This resolves the users specified in 'bind_user', generates a minimalized JSON user + group record
|
||||
* for it to stick in the container, allocates a UID/GID for it, and updates the custom mount table,
|
||||
* to include an appropriate bind mount mapping.
|
||||
*
|
||||
* This extends the passed custom_mounts/n_custom_mounts with the home directories, and allocates a
|
||||
* new BindUserContext for the user records */
|
||||
|
||||
if (strv_isempty(bind_user)) {
|
||||
*ret = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
c = new0(BindUserContext, 1);
|
||||
if (!c)
|
||||
return log_oom();
|
||||
|
||||
STRV_FOREACH(n, bind_user) {
|
||||
_cleanup_(user_record_unrefp) UserRecord *u = NULL, *cu = NULL;
|
||||
_cleanup_(group_record_unrefp) GroupRecord *g = NULL, *cg = NULL;
|
||||
_cleanup_free_ char *sm = NULL, *sd = NULL;
|
||||
CustomMount *cm;
|
||||
|
||||
r = userdb_by_name(*n, USERDB_DONT_SYNTHESIZE, &u);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to resolve user '%s': %m", *n);
|
||||
|
||||
/* For now, let's refuse mapping the root/nobody users explicitly. The records we generate
|
||||
* are strictly additive, nss-systemd is typically placed last in /etc/nsswitch.conf. Thus
|
||||
* even if we wanted, we couldn't override the root or nobody user records. Note we also
|
||||
* check for name conflicts in /etc/passwd + /etc/group later on, which would usually filter
|
||||
* out root/nobody too, hence these checks might appear redundant — but they actually are
|
||||
* not, as we want to support environments where /etc/passwd and /etc/group are non-existent,
|
||||
* and the user/group databases fully synthesized at runtime. Moreover, the name of the
|
||||
* user/group name of the "nobody" account differs between distros, hence a check by numeric
|
||||
* UID is safer. */
|
||||
if (u->uid == 0 || streq(u->user_name, "root"))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mapping 'root' user not supported, sorry.");
|
||||
if (u->uid == UID_NOBODY || STR_IN_SET(u->user_name, NOBODY_USER_NAME, "nobody"))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mapping 'nobody' user not supported, sorry.");
|
||||
|
||||
if (u->uid >= uid_shift && u->uid < uid_shift + uid_range)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID of user '%s' to map is already in container UID range, refusing.", u->user_name);
|
||||
|
||||
r = groupdb_by_gid(u->gid, USERDB_DONT_SYNTHESIZE, &g);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to resolve group of user '%s': %m", u->user_name);
|
||||
|
||||
if (g->gid >= uid_shift && g->gid < uid_shift + uid_range)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "GID of group '%s' to map is already in container GID range, refusing.", g->group_name);
|
||||
|
||||
/* We want to synthesize exactly one user + group from the host into the container. This only
|
||||
* makes sense if the user on the host has its own private group. We can't reasonably check
|
||||
* this, so we just check of the name of user and group match.
|
||||
*
|
||||
* One of these days we might want to support users in a shared/common group too, but it's
|
||||
* not clear to me how this would have to be mapped, precisely given that the common group
|
||||
* probably already exists in the container. */
|
||||
if (!streq(u->user_name, g->group_name))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"Sorry, mapping users without private groups is currently not supported.");
|
||||
|
||||
r = find_free_uid(directory, uid_range, ¤t_uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = convert_user(directory, u, g, current_uid, &cu, &cg);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!GREEDY_REALLOC(c->data, n_allocated, c->n_data + 1))
|
||||
return log_oom();
|
||||
|
||||
sm = strdup(u->home_directory);
|
||||
if (!sm)
|
||||
return log_oom();
|
||||
|
||||
sd = strdup(cu->home_directory);
|
||||
if (!sd)
|
||||
return log_oom();
|
||||
|
||||
cm = reallocarray(*custom_mounts, sizeof(CustomMount), *n_custom_mounts + 1);
|
||||
if (!cm)
|
||||
return log_oom();
|
||||
|
||||
*custom_mounts = cm;
|
||||
|
||||
(*custom_mounts)[(*n_custom_mounts)++] = (CustomMount) {
|
||||
.type = CUSTOM_MOUNT_BIND,
|
||||
.source = TAKE_PTR(sm),
|
||||
.destination = TAKE_PTR(sd),
|
||||
};
|
||||
|
||||
c->data[c->n_data++] = (BindUserData) {
|
||||
.host_user = TAKE_PTR(u),
|
||||
.host_group = TAKE_PTR(g),
|
||||
.payload_user = TAKE_PTR(cu),
|
||||
.payload_group = TAKE_PTR(cg),
|
||||
};
|
||||
|
||||
current_uid++;
|
||||
}
|
||||
|
||||
*ret = TAKE_PTR(c);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int write_and_symlink(
|
||||
const char *root,
|
||||
JsonVariant *v,
|
||||
const char *name,
|
||||
uid_t uid,
|
||||
const char *suffix,
|
||||
WriteStringFileFlags extra_flags) {
|
||||
|
||||
_cleanup_free_ char *j = NULL, *f = NULL, *p = NULL, *q = NULL;
|
||||
int r;
|
||||
|
||||
assert(root);
|
||||
assert(v);
|
||||
assert(name);
|
||||
assert(uid_is_valid(uid));
|
||||
assert(suffix);
|
||||
|
||||
r = json_variant_format(v, JSON_FORMAT_NEWLINE, &j);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to format user record JSON: %m");
|
||||
|
||||
f = strjoin(name, suffix);
|
||||
if (!f)
|
||||
return log_oom();
|
||||
|
||||
p = path_join(root, "/run/host/userdb/", f);
|
||||
if (!p)
|
||||
return log_oom();
|
||||
|
||||
if (asprintf(&q, "%s/run/host/userdb/" UID_FMT "%s", root, uid, suffix) < 0)
|
||||
return log_oom();
|
||||
|
||||
if (symlink(f, q) < 0)
|
||||
return log_error_errno(errno, "Failed to create symlink '%s': %m", q);
|
||||
|
||||
r = userns_lchown(q, 0, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to adjust access mode of '%s': %m", q);
|
||||
|
||||
r = write_string_file(p, j, WRITE_STRING_FILE_CREATE|extra_flags);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to write %s: %m", p);
|
||||
|
||||
r = userns_lchown(p, 0, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to adjust access mode of '%s': %m", p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bind_user_setup(
|
||||
const BindUserContext *c,
|
||||
const char *root) {
|
||||
|
||||
static const UserRecordLoadFlags strip_flags = /* Removes privileged info */
|
||||
USER_RECORD_REQUIRE_REGULAR|
|
||||
USER_RECORD_STRIP_PRIVILEGED|
|
||||
USER_RECORD_ALLOW_PER_MACHINE|
|
||||
USER_RECORD_ALLOW_BINDING|
|
||||
USER_RECORD_ALLOW_SIGNATURE;
|
||||
static const UserRecordLoadFlags shadow_flags = /* Extracts privileged info */
|
||||
USER_RECORD_STRIP_REGULAR|
|
||||
USER_RECORD_ALLOW_PRIVILEGED|
|
||||
USER_RECORD_STRIP_PER_MACHINE|
|
||||
USER_RECORD_STRIP_BINDING|
|
||||
USER_RECORD_STRIP_SIGNATURE|
|
||||
USER_RECORD_EMPTY_OK;
|
||||
int r;
|
||||
|
||||
assert(root);
|
||||
|
||||
if (!c || c->n_data == 0)
|
||||
return 0;
|
||||
|
||||
r = userns_mkdir(root, "/run/host", 0755, 0, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create /run/host: %m");
|
||||
|
||||
r = userns_mkdir(root, "/run/host/home", 0755, 0, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create /run/host/userdb: %m");
|
||||
|
||||
r = userns_mkdir(root, "/run/host/userdb", 0755, 0, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create /run/host/userdb: %m");
|
||||
|
||||
for (size_t i = 0; i < c->n_data; i++) {
|
||||
_cleanup_(group_record_unrefp) GroupRecord *stripped_group = NULL, *shadow_group = NULL;
|
||||
_cleanup_(user_record_unrefp) UserRecord *stripped_user = NULL, *shadow_user = NULL;
|
||||
const BindUserData *d = c->data + i;
|
||||
|
||||
/* First, write shadow (i.e. privileged) data for group record */
|
||||
r = group_record_clone(d->payload_group, shadow_flags, &shadow_group);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to extract privileged information from group record: %m");
|
||||
|
||||
if (!json_variant_is_blank_object(shadow_group->json)) {
|
||||
r = write_and_symlink(
|
||||
root,
|
||||
shadow_group->json,
|
||||
d->payload_group->group_name,
|
||||
d->payload_group->gid,
|
||||
".group-privileged",
|
||||
WRITE_STRING_FILE_MODE_0600);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Second, write main part of group record. */
|
||||
r = group_record_clone(d->payload_group, strip_flags, &stripped_group);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to strip privileged information from group record: %m");
|
||||
|
||||
r = write_and_symlink(
|
||||
root,
|
||||
stripped_group->json,
|
||||
d->payload_group->group_name,
|
||||
d->payload_group->gid,
|
||||
".group",
|
||||
0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Third, write out user shadow data. i.e. extract privileged info from user record */
|
||||
r = user_record_clone(d->payload_user, shadow_flags, &shadow_user);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to extract privileged information from user record: %m");
|
||||
|
||||
if (!json_variant_is_blank_object(shadow_user->json)) {
|
||||
r = write_and_symlink(
|
||||
root,
|
||||
shadow_user->json,
|
||||
d->payload_user->user_name,
|
||||
d->payload_user->uid,
|
||||
".user-privileged",
|
||||
WRITE_STRING_FILE_MODE_0600);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Finally write out the main part of the user record */
|
||||
r = user_record_clone(d->payload_user, strip_flags, &stripped_user);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to strip privileged information from user record: %m");
|
||||
|
||||
r = write_and_symlink(
|
||||
root,
|
||||
stripped_user->json,
|
||||
d->payload_user->user_name,
|
||||
d->payload_user->uid,
|
||||
".user",
|
||||
0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
29
src/nspawn/nspawn-bind-user.h
Normal file
29
src/nspawn/nspawn-bind-user.h
Normal file
@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
#pragma once
|
||||
|
||||
#include "user-record.h"
|
||||
#include "group-record.h"
|
||||
#include "nspawn-mount.h"
|
||||
|
||||
typedef struct BindUserData {
|
||||
/* The host's user/group records */
|
||||
UserRecord *host_user;
|
||||
GroupRecord *host_group;
|
||||
|
||||
/* The mapped records to place into the container */
|
||||
UserRecord *payload_user;
|
||||
GroupRecord *payload_group;
|
||||
} BindUserData;
|
||||
|
||||
typedef struct BindUserContext {
|
||||
BindUserData *data;
|
||||
size_t n_data;
|
||||
} BindUserContext;
|
||||
|
||||
BindUserContext* bind_user_context_free(BindUserContext *c);
|
||||
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(BindUserContext*, bind_user_context_free);
|
||||
|
||||
int bind_user_prepare(const char *directory, char **bind_user, uid_t uid_shift, uid_t uid_range, CustomMount **custom_mounts, size_t *n_custom_mounts, BindUserContext **ret);
|
||||
|
||||
int bind_user_setup(const BindUserContext *c, const char *root);
|
@ -69,6 +69,7 @@ Files.Overlay, config_parse_overlay, 0, 0
|
||||
Files.OverlayReadOnly, config_parse_overlay, 1, 0
|
||||
Files.PrivateUsersChown, config_parse_userns_chown, 0, offsetof(Settings, userns_ownership)
|
||||
Files.PrivateUsersOwnership, config_parse_userns_ownership, 0, offsetof(Settings, userns_ownership)
|
||||
Files.BindUser, config_parse_bind_user, 0, offsetof(Settings, bind_user)
|
||||
Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
|
||||
Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
|
||||
Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
|
||||
|
@ -132,6 +132,7 @@ Settings* settings_free(Settings *s) {
|
||||
rlimit_free_all(s->rlimit);
|
||||
free(s->hostname);
|
||||
cpu_set_reset(&s->cpu_set);
|
||||
strv_free(s->bind_user);
|
||||
|
||||
strv_free(s->network_interfaces);
|
||||
strv_free(s->network_macvlan);
|
||||
@ -907,3 +908,51 @@ int config_parse_userns_chown(
|
||||
*ownership = r ? USER_NAMESPACE_OWNERSHIP_CHOWN : USER_NAMESPACE_OWNERSHIP_OFF;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_bind_user(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
char ***bind_user = data;
|
||||
int r;
|
||||
|
||||
assert(rvalue);
|
||||
assert(bind_user);
|
||||
|
||||
if (isempty(rvalue)) {
|
||||
*bind_user = strv_free(*bind_user);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (const char* p = rvalue;;) {
|
||||
_cleanup_free_ char *word = NULL;
|
||||
|
||||
r = extract_first_word(&p, &word, NULL, 0);
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse BindUser= list, ignoring: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
if (r == 0)
|
||||
break;
|
||||
|
||||
if (!valid_user_group_name(word, 0)) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, 0, "User name '%s' not valid, ignoring.", word);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strv_consume(bind_user, TAKE_PTR(word)) < 0)
|
||||
return log_oom();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -126,9 +126,10 @@ typedef enum SettingsMask {
|
||||
SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28,
|
||||
SETTING_CONSOLE_MODE = UINT64_C(1) << 29,
|
||||
SETTING_CREDENTIALS = UINT64_C(1) << 30,
|
||||
SETTING_RLIMIT_FIRST = UINT64_C(1) << 31, /* we define one bit per resource limit here */
|
||||
SETTING_RLIMIT_LAST = UINT64_C(1) << (31 + _RLIMIT_MAX - 1),
|
||||
_SETTINGS_MASK_ALL = (UINT64_C(1) << (31 + _RLIMIT_MAX)) -1,
|
||||
SETTING_BIND_USER = UINT64_C(1) << 31,
|
||||
SETTING_RLIMIT_FIRST = UINT64_C(1) << 32, /* we define one bit per resource limit here */
|
||||
SETTING_RLIMIT_LAST = UINT64_C(1) << (32 + _RLIMIT_MAX - 1),
|
||||
_SETTINGS_MASK_ALL = (UINT64_C(1) << (32 + _RLIMIT_MAX)) -1,
|
||||
_SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
|
||||
} SettingsMask;
|
||||
|
||||
@ -195,6 +196,7 @@ typedef struct Settings {
|
||||
CustomMount *custom_mounts;
|
||||
size_t n_custom_mounts;
|
||||
UserNamespaceOwnership userns_ownership;
|
||||
char **bind_user;
|
||||
|
||||
/* [Network] */
|
||||
int private_network;
|
||||
@ -266,6 +268,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_link_journal);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_userns_chown);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_userns_ownership);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_bind_user);
|
||||
|
||||
const char *resolv_conf_mode_to_string(ResolvConfMode a) _const_;
|
||||
ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
|
||||
|
@ -63,6 +63,7 @@
|
||||
#include "mountpoint-util.h"
|
||||
#include "namespace-util.h"
|
||||
#include "netlink-util.h"
|
||||
#include "nspawn-bind-user.h"
|
||||
#include "nspawn-cgroup.h"
|
||||
#include "nspawn-creds.h"
|
||||
#include "nspawn-def.h"
|
||||
@ -226,6 +227,7 @@ static char **arg_sysctl = NULL;
|
||||
static ConsoleMode arg_console_mode = _CONSOLE_MODE_INVALID;
|
||||
static Credential *arg_credentials = NULL;
|
||||
static size_t arg_n_credentials = 0;
|
||||
static char **arg_bind_user = NULL;
|
||||
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_template, freep);
|
||||
@ -258,6 +260,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_seccomp, seccomp_releasep);
|
||||
#endif
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_cpu_set, cpu_set_reset);
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_sysctl, strv_freep);
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_bind_user, strv_freep);
|
||||
|
||||
static int handle_arg_console(const char *arg) {
|
||||
if (streq(arg, "help")) {
|
||||
@ -423,7 +426,8 @@ static int help(void) {
|
||||
" Create an overlay mount from the host to \n"
|
||||
" the container\n"
|
||||
" --overlay-ro=PATH[:PATH...]:PATH\n"
|
||||
" Similar, but creates a read-only overlay mount\n\n"
|
||||
" Similar, but creates a read-only overlay mount\n"
|
||||
" --bind-user=NAME Bind user from host to container\n\n"
|
||||
"%3$sInput/Output:%4$s\n"
|
||||
" --console=MODE Select how stdin/stdout/stderr and /dev/console are\n"
|
||||
" set up for the container.\n"
|
||||
@ -707,6 +711,7 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
ARG_NO_PAGER,
|
||||
ARG_SET_CREDENTIAL,
|
||||
ARG_LOAD_CREDENTIAL,
|
||||
ARG_BIND_USER,
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
@ -778,6 +783,7 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
|
||||
{ "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
|
||||
{ "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
|
||||
{ "bind-user", required_argument, NULL, ARG_BIND_USER },
|
||||
{}
|
||||
};
|
||||
|
||||
@ -1656,6 +1662,16 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
break;
|
||||
}
|
||||
|
||||
case ARG_BIND_USER:
|
||||
if (!valid_user_group_name(optarg, 0))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid user name to bind: %s", optarg);
|
||||
|
||||
if (strv_extend(&arg_bind_user, optarg) < 0)
|
||||
return log_oom();
|
||||
|
||||
arg_settings_mask |= SETTING_BIND_USER;
|
||||
break;
|
||||
|
||||
case '?':
|
||||
return -EINVAL;
|
||||
|
||||
@ -1812,6 +1828,12 @@ static int verify_arguments(void) {
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "AmbientCapability= setting is not useful for boot mode.");
|
||||
}
|
||||
|
||||
if (arg_userns_mode == USER_NAMESPACE_NO && !strv_isempty(arg_bind_user))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--bind-user= requires --private-users");
|
||||
|
||||
/* Drop duplicate --bind-user= entries */
|
||||
strv_uniq(arg_bind_user);
|
||||
|
||||
r = custom_mount_check_all();
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -3569,6 +3591,7 @@ static int outer_child(
|
||||
FDSet *fds,
|
||||
int netns_fd) {
|
||||
|
||||
_cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
|
||||
_cleanup_strv_free_ char **os_release_pairs = NULL;
|
||||
_cleanup_close_ int fd = -1;
|
||||
bool idmap = false;
|
||||
@ -3716,6 +3739,36 @@ static int outer_child(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = bind_user_prepare(
|
||||
directory,
|
||||
arg_bind_user,
|
||||
arg_uid_shift,
|
||||
arg_uid_range,
|
||||
&arg_custom_mounts, &arg_n_custom_mounts,
|
||||
&bind_user_context);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO && bind_user_context) {
|
||||
/* Send the user maps we determined to the parent, so that it installs it in our user namespace UID map table */
|
||||
|
||||
for (size_t i = 0; i < bind_user_context->n_data; i++) {
|
||||
uid_t map[] = {
|
||||
bind_user_context->data[i].payload_user->uid,
|
||||
bind_user_context->data[i].host_user->uid,
|
||||
(uid_t) bind_user_context->data[i].payload_group->gid,
|
||||
(uid_t) bind_user_context->data[i].host_group->gid,
|
||||
};
|
||||
|
||||
l = send(uid_shift_socket, map, sizeof(map), MSG_NOSIGNAL);
|
||||
if (l < 0)
|
||||
return log_error_errno(errno, "Failed to send user UID map: %m");
|
||||
if (l != sizeof(map))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EIO),
|
||||
"Short write while sending user UID map.");
|
||||
}
|
||||
}
|
||||
|
||||
r = mount_custom(
|
||||
directory,
|
||||
arg_custom_mounts,
|
||||
@ -3832,6 +3885,10 @@ static int outer_child(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = bind_user_setup(bind_user_context, directory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_custom(
|
||||
directory,
|
||||
arg_custom_mounts,
|
||||
@ -4012,21 +4069,96 @@ static int uid_shift_pick(uid_t *shift, LockFile *ret_lock_file) {
|
||||
}
|
||||
}
|
||||
|
||||
static int setup_uid_map(pid_t pid) {
|
||||
char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
|
||||
static int add_one_uid_map(
|
||||
char **p,
|
||||
uid_t container_uid,
|
||||
uid_t host_uid,
|
||||
uid_t range) {
|
||||
|
||||
return strextendf(p,
|
||||
UID_FMT " " UID_FMT " " UID_FMT "\n",
|
||||
container_uid, host_uid, range);
|
||||
}
|
||||
|
||||
static int make_uid_map_string(
|
||||
const uid_t bind_user_uid[],
|
||||
size_t n_bind_user_uid,
|
||||
size_t offset,
|
||||
char **ret) {
|
||||
|
||||
_cleanup_free_ char *s = NULL;
|
||||
uid_t previous_uid = 0;
|
||||
int r;
|
||||
|
||||
assert(n_bind_user_uid == 0 || bind_user_uid);
|
||||
assert(offset == 0 || offset == 2); /* used to switch between UID and GID map */
|
||||
assert(ret);
|
||||
|
||||
/* The bind_user_uid[] array is a series of 4 uid_t values, for each --bind-user= entry one
|
||||
* quadruplet, consisting of host and container UID + GID. */
|
||||
|
||||
for (size_t i = 0; i < n_bind_user_uid; i++) {
|
||||
uid_t payload_uid = bind_user_uid[i*2+offset],
|
||||
host_uid = bind_user_uid[i*2+offset+1];
|
||||
|
||||
assert(previous_uid <= payload_uid);
|
||||
assert(payload_uid < arg_uid_range);
|
||||
|
||||
/* Add a range to close the gap to previous entry */
|
||||
if (payload_uid > previous_uid) {
|
||||
r = add_one_uid_map(&s, previous_uid, arg_uid_shift + previous_uid, payload_uid - previous_uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Map this specific user */
|
||||
r = add_one_uid_map(&s, payload_uid, host_uid, 1);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
previous_uid = payload_uid + 1;
|
||||
}
|
||||
|
||||
/* And add a range to close the gap to finish the range */
|
||||
if (arg_uid_range > previous_uid) {
|
||||
r = add_one_uid_map(&s, previous_uid, arg_uid_shift + previous_uid, arg_uid_range - previous_uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
assert(s);
|
||||
|
||||
*ret = TAKE_PTR(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_uid_map(
|
||||
pid_t pid,
|
||||
const uid_t bind_user_uid[],
|
||||
size_t n_bind_user_uid) {
|
||||
|
||||
char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1];
|
||||
_cleanup_free_ char *s = NULL;
|
||||
int r;
|
||||
|
||||
assert(pid > 1);
|
||||
|
||||
/* Build the UID map string */
|
||||
if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 0, &s) < 0) /* offset=0 contains the UID pair */
|
||||
return log_oom();
|
||||
|
||||
xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
|
||||
xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, arg_uid_shift, arg_uid_range);
|
||||
r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
|
||||
r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to write UID map: %m");
|
||||
|
||||
/* We always assign the same UID and GID ranges */
|
||||
/* And now build the GID map string */
|
||||
s = mfree(s);
|
||||
if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 2, &s) < 0) /* offset=2 contains the GID pair */
|
||||
return log_oom();
|
||||
|
||||
xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
|
||||
r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
|
||||
r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to write GID map: %m");
|
||||
|
||||
@ -4302,6 +4434,9 @@ static int merge_settings(Settings *settings, const char *path) {
|
||||
}
|
||||
}
|
||||
|
||||
if ((arg_settings_mask & SETTING_BIND_USER) == 0)
|
||||
strv_free_and_replace(arg_bind_user, settings->bind_user);
|
||||
|
||||
if ((arg_settings_mask & SETTING_NOTIFY_READY) == 0)
|
||||
arg_notify_ready = settings->notify_ready;
|
||||
|
||||
@ -4568,6 +4703,8 @@ static int run_container(
|
||||
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
|
||||
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
||||
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
|
||||
_cleanup_free_ uid_t *bind_user_uid = NULL;
|
||||
size_t n_bind_user_uid = 0;
|
||||
ContainerStatus container_status = 0;
|
||||
int ifi = 0, r;
|
||||
ssize_t l;
|
||||
@ -4723,6 +4860,26 @@ static int run_container(
|
||||
if (l != sizeof arg_uid_shift)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while writing UID shift.");
|
||||
}
|
||||
|
||||
n_bind_user_uid = strv_length(arg_bind_user);
|
||||
if (n_bind_user_uid > 0) {
|
||||
/* Right after the UID shift, we'll receive the list of UID mappings for the
|
||||
* --bind-user= logic. Always a quadruplet of payload and host UID + GID. */
|
||||
|
||||
bind_user_uid = new(uid_t, n_bind_user_uid*4);
|
||||
if (!bind_user_uid)
|
||||
return log_oom();
|
||||
|
||||
for (size_t i = 0; i < n_bind_user_uid; i++) {
|
||||
l = recv(uid_shift_socket_pair[0], bind_user_uid + i*4, sizeof(uid_t)*4, 0);
|
||||
if (l < 0)
|
||||
return log_error_errno(errno, "Failed to read user UID map pair: %m");
|
||||
if (l != sizeof(uid_t)*4)
|
||||
return log_full_errno(l == 0 ? LOG_DEBUG : LOG_WARNING,
|
||||
SYNTHETIC_ERRNO(EIO),
|
||||
"Short read while reading bind user UID pairs.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
|
||||
@ -4768,7 +4925,7 @@ static int run_container(
|
||||
if (!barrier_place_and_sync(&barrier)) /* #1 */
|
||||
return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
|
||||
|
||||
r = setup_uid_map(*pid);
|
||||
r = setup_uid_map(*pid, bind_user_uid, n_bind_user_uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user