1
0
mirror of https://github.com/systemd/systemd.git synced 2024-10-30 14:55:37 +03:00

Merge pull request #19555 from poettering/nspawn-bind-user

nspawn: add --bind-user= feature for binding  host user+homedir into a container
This commit is contained in:
Lennart Poettering 2021-05-20 07:33:51 +02:00 committed by GitHub
commit d99c2df2df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 811 additions and 16 deletions

View File

@ -241,8 +241,9 @@ the artifacts the container manager persistently leaves in the system.
| 5 | `tty` group | `systemd` | `/etc/passwd` |
| 6…999 | System users | Distributions | `/etc/passwd` |
| 1000…60000 | Regular users | Distributions | `/etc/passwd` + LDAP/NIS/… |
| 60001…60513 | Human Users (homed) | `systemd` | `nss-systemd` |
| 60514…61183 | Unused | | |
| 60001…60513 | Human users (homed) | `systemd` | `nss-systemd` |
| 60514…60577 | Host users mapped into containers | `systemd` | `systemd-nspawn` |
| 60578…61183 | Unused | | |
| 61184…65519 | Dynamic service users | `systemd` | `nss-systemd` |
| 65520…65533 | Unused | | |
| 65534 | `nobody` user | Linux | `/etc/passwd` + `nss-systemd` |

View File

@ -1352,6 +1352,58 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
make them read-only, using <option>--bind-ro=</option>.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--bind-user=</option></term>
<listitem><para>Binds the home directory of the specified user on the host into the container. Takes
the name of an existing user on the host as argument. May be used multiple times to bind multiple
users into the container. This does three things:</para>
<orderedlist>
<listitem><para>The user's home directory is bind mounted from the host into
<filename>/run/hosts/home/</filename>.</para></listitem>
<listitem><para>An additional UID/GID mapping is added that maps the host user's UID/GID to a
container UID/GID, allocated from the 60514…60577 range.</para></listitem>
<listitem><para>A JSON user and group record is generated in <filename>/run/userdb/</filename> that
describes the mapped user. It contains a minimized representation of the host's user record,
adjusted to the UID/GID and home directory path assigned to the user in the container. The
<citerefentry><refentrytitle>nss-systemd</refentrytitle><manvolnum>8</manvolnum></citerefentry>
glibc NSS module will pick up these records from there and make them available in the container's
user/group databases.</para></listitem>
</orderedlist>
<para>The combination of the three operations above ensures that it is possible to log into the
host's user account inside the container as if it was local to the container. The user is only mapped
transiently, while the container is running and the mapping itself does not result in persistent
changes to the container (except maybe for generated log messages at login time, and similar). Note
that in particular the UID/GID assignment in the container is not made persistently. If the user is
mapped transiently, it is best to not allow the user to make persistent changes to the container. If
the user leaves files or directories owned by the user, and those UIDs/GIDs are recycled during later
container invocations (possibly with a different <option>--bind-user=</option> mapping), those files
and directories will be accessible to the "new" user.</para>
<para>The user/group record mapping only works if the container contains systemd 249 or newer, with
<command>nss-systemd</command> properly configured in <filename>nsswitch.conf</filename>. See
<citerefentry><refentrytitle>nss-systemd</refentrytitle><manvolnum>8</manvolnum></citerefentry> for
details.</para>
<para>Note that the user record propagated from the host into the container will contain the UNIX
password hash of the user, so that seamless logins in the container are possible. If the container is
less trusted than the host it's hence important to use a strong UNIX password hash function
(e.g. yescrypt or similar, with the <literal>$y$</literal> hash prefix).</para>
<para>When binding a user from the host into the container checks are executed to ensure that the
username is not yet known in the container. Moreover, it is checked that the UID/GID allocated for it
is not currently defined in the user/group databases of the container. Both checks directly access
the container's <filename>/etc/passwd</filename> and <filename>/etc/group</filename>, and thus might
not detect existing accounts in other databases.</para>
<para>This operation is only supported in combination with
<option>--private-users=</option>/<option>-U</option>.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--inaccessible=</option></term>

View File

@ -415,6 +415,16 @@
is privileged (see above).</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>BindUser=</varname></term>
<listitem><para>Binds a user from the host into the container. This option is equivalent to the
command line switch <option>--bind-user=</option>, see
<citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
for details about the specific options supported. This setting is privileged (see
above).</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>TemporaryFileSystem=</varname></term>

View File

@ -1,6 +1,8 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
libnspawn_core_sources = files('''
nspawn-bind-user.c
nspawn-bind-user.h
nspawn-cgroup.c
nspawn-cgroup.h
nspawn-creds.c
@ -26,6 +28,7 @@ libnspawn_core_sources = files('''
nspawn-setuid.h
nspawn-stub-pid1.c
nspawn-stub-pid1.h
nspawn.h
'''.split())
nspawn_gperf_c = custom_target(

View File

@ -0,0 +1,479 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "fd-util.h"
#include "fileio.h"
#include "format-util.h"
#include "fs-util.h"
#include "nspawn-bind-user.h"
#include "nspawn.h"
#include "path-util.h"
#include "user-util.h"
#include "userdb.h"
#define MAP_UID_START 60514
#define MAP_UID_END 60577
static int check_etc_passwd_collisions(
const char *directory,
const char *name,
uid_t uid) {
_cleanup_fclose_ FILE *f = NULL;
int r;
assert(directory);
assert(name || uid_is_valid(uid));
r = chase_symlinks_and_fopen_unlocked("/etc/passwd", directory, CHASE_PREFIX_ROOT, "re", &f, NULL);
if (r == -ENOENT)
return 0; /* no user database? then no user, hence no collision */
if (r < 0)
return log_error_errno(r, "Failed to open /etc/passwd of container: %m");
for (;;) {
struct passwd *pw;
r = fgetpwent_sane(f, &pw);
if (r < 0)
return log_error_errno(r, "Failed to iterate through /etc/passwd of container: %m");
if (r == 0) /* EOF */
return 0; /* no collision */
if (name && streq_ptr(pw->pw_name, name))
return 1; /* name collision */
if (uid_is_valid(uid) && pw->pw_uid == uid)
return 1; /* UID collision */
}
}
static int check_etc_group_collisions(
const char *directory,
const char *name,
gid_t gid) {
_cleanup_fclose_ FILE *f = NULL;
int r;
assert(directory);
assert(name || gid_is_valid(gid));
r = chase_symlinks_and_fopen_unlocked("/etc/group", directory, CHASE_PREFIX_ROOT, "re", &f, NULL);
if (r == -ENOENT)
return 0; /* no group database? then no group, hence no collision */
if (r < 0)
return log_error_errno(r, "Failed to open /etc/group of container: %m");
for (;;) {
struct group *gr;
r = fgetgrent_sane(f, &gr);
if (r < 0)
return log_error_errno(r, "Failed to iterate through /etc/group of container: %m");
if (r == 0)
return 0; /* no collision */
if (name && streq_ptr(gr->gr_name, name))
return 1; /* name collision */
if (gid_is_valid(gid) && gr->gr_gid == gid)
return 1; /* gid collision */
}
}
static int convert_user(
const char *directory,
UserRecord *u,
GroupRecord *g,
uid_t allocate_uid,
UserRecord **ret_converted_user,
GroupRecord **ret_converted_group) {
_cleanup_(group_record_unrefp) GroupRecord *converted_group = NULL;
_cleanup_(user_record_unrefp) UserRecord *converted_user = NULL;
_cleanup_free_ char *h = NULL;
JsonVariant *p, *hp = NULL;
int r;
assert(u);
assert(g);
assert(u->gid == g->gid);
r = check_etc_passwd_collisions(directory, u->user_name, UID_INVALID);
if (r < 0)
return r;
if (r > 0)
return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
"Sorry, the user '%s' already exists in the container.", u->user_name);
r = check_etc_group_collisions(directory, g->group_name, GID_INVALID);
if (r < 0)
return r;
if (r > 0)
return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
"Sorry, the group '%s' already exists in the container.", g->group_name);
h = path_join("/run/host/home/", u->user_name);
if (!h)
return log_oom();
/* Acquire the source hashed password array as-is, so that it retains the JSON_VARIANT_SENSITIVE flag */
p = json_variant_by_key(u->json, "privileged");
if (p)
hp = json_variant_by_key(p, "hashedPassword");
r = user_record_build(
&converted_user,
JSON_BUILD_OBJECT(
JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(u->user_name)),
JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(allocate_uid)),
JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(allocate_uid)),
JSON_BUILD_PAIR_CONDITION(u->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(u->disposition))),
JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING(h)),
JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.NSpawn")),
JSON_BUILD_PAIR_CONDITION(!strv_isempty(u->hashed_password), "privileged", JSON_BUILD_OBJECT(
JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_VARIANT(hp))))));
if (r < 0)
return log_error_errno(r, "Failed to build container user record: %m");
r = group_record_build(
&converted_group,
JSON_BUILD_OBJECT(
JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name)),
JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(allocate_uid)),
JSON_BUILD_PAIR_CONDITION(g->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(g->disposition))),
JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.NSpawn"))));
if (r < 0)
return log_error_errno(r, "Failed to build container group record: %m");
*ret_converted_user = TAKE_PTR(converted_user);
*ret_converted_group = TAKE_PTR(converted_group);
return 0;
}
static int find_free_uid(const char *directory, uid_t max_uid, uid_t *current_uid) {
int r;
assert(directory);
assert(current_uid);
for (;; (*current_uid) ++) {
if (*current_uid > MAP_UID_END || *current_uid > max_uid)
return log_error_errno(
SYNTHETIC_ERRNO(EBUSY),
"No suitable available UID in range " UID_FMT "" UID_FMT " in container detected, can't map user.",
MAP_UID_START, MAP_UID_END);
r = check_etc_passwd_collisions(directory, NULL, *current_uid);
if (r < 0)
return r;
if (r > 0) /* already used */
continue;
/* We want to use the UID also as GID, hence check for it in /etc/group too */
r = check_etc_group_collisions(directory, NULL, (gid_t) *current_uid);
if (r < 0)
return r;
if (r == 0) /* free! yay! */
return 0;
}
}
BindUserContext* bind_user_context_free(BindUserContext *c) {
if (!c)
return NULL;
assert(c->n_data == 0 || c->data);
for (size_t i = 0; i < c->n_data; i++) {
user_record_unref(c->data[i].host_user);
group_record_unref(c->data[i].host_group);
user_record_unref(c->data[i].payload_user);
group_record_unref(c->data[i].payload_group);
}
return mfree(c);
}
int bind_user_prepare(
const char *directory,
char **bind_user,
uid_t uid_shift,
uid_t uid_range,
CustomMount **custom_mounts,
size_t *n_custom_mounts,
BindUserContext **ret) {
_cleanup_(bind_user_context_freep) BindUserContext *c = NULL;
uid_t current_uid = MAP_UID_START;
size_t n_allocated = 0;
char **n;
int r;
assert(custom_mounts);
assert(n_custom_mounts);
assert(ret);
/* This resolves the users specified in 'bind_user', generates a minimalized JSON user + group record
* for it to stick in the container, allocates a UID/GID for it, and updates the custom mount table,
* to include an appropriate bind mount mapping.
*
* This extends the passed custom_mounts/n_custom_mounts with the home directories, and allocates a
* new BindUserContext for the user records */
if (strv_isempty(bind_user)) {
*ret = NULL;
return 0;
}
c = new0(BindUserContext, 1);
if (!c)
return log_oom();
STRV_FOREACH(n, bind_user) {
_cleanup_(user_record_unrefp) UserRecord *u = NULL, *cu = NULL;
_cleanup_(group_record_unrefp) GroupRecord *g = NULL, *cg = NULL;
_cleanup_free_ char *sm = NULL, *sd = NULL;
CustomMount *cm;
r = userdb_by_name(*n, USERDB_DONT_SYNTHESIZE, &u);
if (r < 0)
return log_error_errno(r, "Failed to resolve user '%s': %m", *n);
/* For now, let's refuse mapping the root/nobody users explicitly. The records we generate
* are strictly additive, nss-systemd is typically placed last in /etc/nsswitch.conf. Thus
* even if we wanted, we couldn't override the root or nobody user records. Note we also
* check for name conflicts in /etc/passwd + /etc/group later on, which would usually filter
* out root/nobody too, hence these checks might appear redundant but they actually are
* not, as we want to support environments where /etc/passwd and /etc/group are non-existent,
* and the user/group databases fully synthesized at runtime. Moreover, the name of the
* user/group name of the "nobody" account differs between distros, hence a check by numeric
* UID is safer. */
if (u->uid == 0 || streq(u->user_name, "root"))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mapping 'root' user not supported, sorry.");
if (u->uid == UID_NOBODY || STR_IN_SET(u->user_name, NOBODY_USER_NAME, "nobody"))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mapping 'nobody' user not supported, sorry.");
if (u->uid >= uid_shift && u->uid < uid_shift + uid_range)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID of user '%s' to map is already in container UID range, refusing.", u->user_name);
r = groupdb_by_gid(u->gid, USERDB_DONT_SYNTHESIZE, &g);
if (r < 0)
return log_error_errno(r, "Failed to resolve group of user '%s': %m", u->user_name);
if (g->gid >= uid_shift && g->gid < uid_shift + uid_range)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "GID of group '%s' to map is already in container GID range, refusing.", g->group_name);
/* We want to synthesize exactly one user + group from the host into the container. This only
* makes sense if the user on the host has its own private group. We can't reasonably check
* this, so we just check of the name of user and group match.
*
* One of these days we might want to support users in a shared/common group too, but it's
* not clear to me how this would have to be mapped, precisely given that the common group
* probably already exists in the container. */
if (!streq(u->user_name, g->group_name))
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
"Sorry, mapping users without private groups is currently not supported.");
r = find_free_uid(directory, uid_range, &current_uid);
if (r < 0)
return r;
r = convert_user(directory, u, g, current_uid, &cu, &cg);
if (r < 0)
return r;
if (!GREEDY_REALLOC(c->data, n_allocated, c->n_data + 1))
return log_oom();
sm = strdup(u->home_directory);
if (!sm)
return log_oom();
sd = strdup(cu->home_directory);
if (!sd)
return log_oom();
cm = reallocarray(*custom_mounts, sizeof(CustomMount), *n_custom_mounts + 1);
if (!cm)
return log_oom();
*custom_mounts = cm;
(*custom_mounts)[(*n_custom_mounts)++] = (CustomMount) {
.type = CUSTOM_MOUNT_BIND,
.source = TAKE_PTR(sm),
.destination = TAKE_PTR(sd),
};
c->data[c->n_data++] = (BindUserData) {
.host_user = TAKE_PTR(u),
.host_group = TAKE_PTR(g),
.payload_user = TAKE_PTR(cu),
.payload_group = TAKE_PTR(cg),
};
current_uid++;
}
*ret = TAKE_PTR(c);
return 1;
}
static int write_and_symlink(
const char *root,
JsonVariant *v,
const char *name,
uid_t uid,
const char *suffix,
WriteStringFileFlags extra_flags) {
_cleanup_free_ char *j = NULL, *f = NULL, *p = NULL, *q = NULL;
int r;
assert(root);
assert(v);
assert(name);
assert(uid_is_valid(uid));
assert(suffix);
r = json_variant_format(v, JSON_FORMAT_NEWLINE, &j);
if (r < 0)
return log_error_errno(r, "Failed to format user record JSON: %m");
f = strjoin(name, suffix);
if (!f)
return log_oom();
p = path_join(root, "/run/host/userdb/", f);
if (!p)
return log_oom();
if (asprintf(&q, "%s/run/host/userdb/" UID_FMT "%s", root, uid, suffix) < 0)
return log_oom();
if (symlink(f, q) < 0)
return log_error_errno(errno, "Failed to create symlink '%s': %m", q);
r = userns_lchown(q, 0, 0);
if (r < 0)
return log_error_errno(r, "Failed to adjust access mode of '%s': %m", q);
r = write_string_file(p, j, WRITE_STRING_FILE_CREATE|extra_flags);
if (r < 0)
return log_error_errno(r, "Failed to write %s: %m", p);
r = userns_lchown(p, 0, 0);
if (r < 0)
return log_error_errno(r, "Failed to adjust access mode of '%s': %m", p);
return 0;
}
int bind_user_setup(
const BindUserContext *c,
const char *root) {
static const UserRecordLoadFlags strip_flags = /* Removes privileged info */
USER_RECORD_REQUIRE_REGULAR|
USER_RECORD_STRIP_PRIVILEGED|
USER_RECORD_ALLOW_PER_MACHINE|
USER_RECORD_ALLOW_BINDING|
USER_RECORD_ALLOW_SIGNATURE;
static const UserRecordLoadFlags shadow_flags = /* Extracts privileged info */
USER_RECORD_STRIP_REGULAR|
USER_RECORD_ALLOW_PRIVILEGED|
USER_RECORD_STRIP_PER_MACHINE|
USER_RECORD_STRIP_BINDING|
USER_RECORD_STRIP_SIGNATURE|
USER_RECORD_EMPTY_OK;
int r;
assert(root);
if (!c || c->n_data == 0)
return 0;
r = userns_mkdir(root, "/run/host", 0755, 0, 0);
if (r < 0)
return log_error_errno(r, "Failed to create /run/host: %m");
r = userns_mkdir(root, "/run/host/home", 0755, 0, 0);
if (r < 0)
return log_error_errno(r, "Failed to create /run/host/userdb: %m");
r = userns_mkdir(root, "/run/host/userdb", 0755, 0, 0);
if (r < 0)
return log_error_errno(r, "Failed to create /run/host/userdb: %m");
for (size_t i = 0; i < c->n_data; i++) {
_cleanup_(group_record_unrefp) GroupRecord *stripped_group = NULL, *shadow_group = NULL;
_cleanup_(user_record_unrefp) UserRecord *stripped_user = NULL, *shadow_user = NULL;
const BindUserData *d = c->data + i;
/* First, write shadow (i.e. privileged) data for group record */
r = group_record_clone(d->payload_group, shadow_flags, &shadow_group);
if (r < 0)
return log_error_errno(r, "Failed to extract privileged information from group record: %m");
if (!json_variant_is_blank_object(shadow_group->json)) {
r = write_and_symlink(
root,
shadow_group->json,
d->payload_group->group_name,
d->payload_group->gid,
".group-privileged",
WRITE_STRING_FILE_MODE_0600);
if (r < 0)
return r;
}
/* Second, write main part of group record. */
r = group_record_clone(d->payload_group, strip_flags, &stripped_group);
if (r < 0)
return log_error_errno(r, "Failed to strip privileged information from group record: %m");
r = write_and_symlink(
root,
stripped_group->json,
d->payload_group->group_name,
d->payload_group->gid,
".group",
0);
if (r < 0)
return r;
/* Third, write out user shadow data. i.e. extract privileged info from user record */
r = user_record_clone(d->payload_user, shadow_flags, &shadow_user);
if (r < 0)
return log_error_errno(r, "Failed to extract privileged information from user record: %m");
if (!json_variant_is_blank_object(shadow_user->json)) {
r = write_and_symlink(
root,
shadow_user->json,
d->payload_user->user_name,
d->payload_user->uid,
".user-privileged",
WRITE_STRING_FILE_MODE_0600);
if (r < 0)
return r;
}
/* Finally write out the main part of the user record */
r = user_record_clone(d->payload_user, strip_flags, &stripped_user);
if (r < 0)
return log_error_errno(r, "Failed to strip privileged information from user record: %m");
r = write_and_symlink(
root,
stripped_user->json,
d->payload_user->user_name,
d->payload_user->uid,
".user",
0);
if (r < 0)
return r;
}
return 1;
}

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include "user-record.h"
#include "group-record.h"
#include "nspawn-mount.h"
typedef struct BindUserData {
/* The host's user/group records */
UserRecord *host_user;
GroupRecord *host_group;
/* The mapped records to place into the container */
UserRecord *payload_user;
GroupRecord *payload_group;
} BindUserData;
typedef struct BindUserContext {
BindUserData *data;
size_t n_data;
} BindUserContext;
BindUserContext* bind_user_context_free(BindUserContext *c);
DEFINE_TRIVIAL_CLEANUP_FUNC(BindUserContext*, bind_user_context_free);
int bind_user_prepare(const char *directory, char **bind_user, uid_t uid_shift, uid_t uid_range, CustomMount **custom_mounts, size_t *n_custom_mounts, BindUserContext **ret);
int bind_user_setup(const BindUserContext *c, const char *root);

View File

@ -69,6 +69,7 @@ Files.Overlay, config_parse_overlay, 0, 0
Files.OverlayReadOnly, config_parse_overlay, 1, 0
Files.PrivateUsersChown, config_parse_userns_chown, 0, offsetof(Settings, userns_ownership)
Files.PrivateUsersOwnership, config_parse_userns_ownership, 0, offsetof(Settings, userns_ownership)
Files.BindUser, config_parse_bind_user, 0, offsetof(Settings, bind_user)
Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)

View File

@ -132,6 +132,7 @@ Settings* settings_free(Settings *s) {
rlimit_free_all(s->rlimit);
free(s->hostname);
cpu_set_reset(&s->cpu_set);
strv_free(s->bind_user);
strv_free(s->network_interfaces);
strv_free(s->network_macvlan);
@ -907,3 +908,51 @@ int config_parse_userns_chown(
*ownership = r ? USER_NAMESPACE_OWNERSHIP_CHOWN : USER_NAMESPACE_OWNERSHIP_OFF;
return 0;
}
int config_parse_bind_user(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
char ***bind_user = data;
int r;
assert(rvalue);
assert(bind_user);
if (isempty(rvalue)) {
*bind_user = strv_free(*bind_user);
return 0;
}
for (const char* p = rvalue;;) {
_cleanup_free_ char *word = NULL;
r = extract_first_word(&p, &word, NULL, 0);
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse BindUser= list, ignoring: %s", rvalue);
return 0;
}
if (r == 0)
break;
if (!valid_user_group_name(word, 0)) {
log_syntax(unit, LOG_WARNING, filename, line, 0, "User name '%s' not valid, ignoring.", word);
return 0;
}
if (strv_consume(bind_user, TAKE_PTR(word)) < 0)
return log_oom();
}
return 0;
}

View File

@ -126,9 +126,10 @@ typedef enum SettingsMask {
SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28,
SETTING_CONSOLE_MODE = UINT64_C(1) << 29,
SETTING_CREDENTIALS = UINT64_C(1) << 30,
SETTING_RLIMIT_FIRST = UINT64_C(1) << 31, /* we define one bit per resource limit here */
SETTING_RLIMIT_LAST = UINT64_C(1) << (31 + _RLIMIT_MAX - 1),
_SETTINGS_MASK_ALL = (UINT64_C(1) << (31 + _RLIMIT_MAX)) -1,
SETTING_BIND_USER = UINT64_C(1) << 31,
SETTING_RLIMIT_FIRST = UINT64_C(1) << 32, /* we define one bit per resource limit here */
SETTING_RLIMIT_LAST = UINT64_C(1) << (32 + _RLIMIT_MAX - 1),
_SETTINGS_MASK_ALL = (UINT64_C(1) << (32 + _RLIMIT_MAX)) -1,
_SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
} SettingsMask;
@ -195,6 +196,7 @@ typedef struct Settings {
CustomMount *custom_mounts;
size_t n_custom_mounts;
UserNamespaceOwnership userns_ownership;
char **bind_user;
/* [Network] */
int private_network;
@ -266,6 +268,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_link_journal);
CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
CONFIG_PARSER_PROTOTYPE(config_parse_userns_chown);
CONFIG_PARSER_PROTOTYPE(config_parse_userns_ownership);
CONFIG_PARSER_PROTOTYPE(config_parse_bind_user);
const char *resolv_conf_mode_to_string(ResolvConfMode a) _const_;
ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;

View File

@ -63,6 +63,7 @@
#include "mountpoint-util.h"
#include "namespace-util.h"
#include "netlink-util.h"
#include "nspawn-bind-user.h"
#include "nspawn-cgroup.h"
#include "nspawn-creds.h"
#include "nspawn-def.h"
@ -76,6 +77,7 @@
#include "nspawn-settings.h"
#include "nspawn-setuid.h"
#include "nspawn-stub-pid1.h"
#include "nspawn.h"
#include "nulstr-util.h"
#include "os-util.h"
#include "pager.h"
@ -225,6 +227,7 @@ static char **arg_sysctl = NULL;
static ConsoleMode arg_console_mode = _CONSOLE_MODE_INVALID;
static Credential *arg_credentials = NULL;
static size_t arg_n_credentials = 0;
static char **arg_bind_user = NULL;
STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
STATIC_DESTRUCTOR_REGISTER(arg_template, freep);
@ -257,6 +260,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_seccomp, seccomp_releasep);
#endif
STATIC_DESTRUCTOR_REGISTER(arg_cpu_set, cpu_set_reset);
STATIC_DESTRUCTOR_REGISTER(arg_sysctl, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_bind_user, strv_freep);
static int handle_arg_console(const char *arg) {
if (streq(arg, "help")) {
@ -422,7 +426,8 @@ static int help(void) {
" Create an overlay mount from the host to \n"
" the container\n"
" --overlay-ro=PATH[:PATH...]:PATH\n"
" Similar, but creates a read-only overlay mount\n\n"
" Similar, but creates a read-only overlay mount\n"
" --bind-user=NAME Bind user from host to container\n\n"
"%3$sInput/Output:%4$s\n"
" --console=MODE Select how stdin/stdout/stderr and /dev/console are\n"
" set up for the container.\n"
@ -706,6 +711,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_NO_PAGER,
ARG_SET_CREDENTIAL,
ARG_LOAD_CREDENTIAL,
ARG_BIND_USER,
};
static const struct option options[] = {
@ -777,6 +783,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
{ "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
{ "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
{ "bind-user", required_argument, NULL, ARG_BIND_USER },
{}
};
@ -1655,6 +1662,16 @@ static int parse_argv(int argc, char *argv[]) {
break;
}
case ARG_BIND_USER:
if (!valid_user_group_name(optarg, 0))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid user name to bind: %s", optarg);
if (strv_extend(&arg_bind_user, optarg) < 0)
return log_oom();
arg_settings_mask |= SETTING_BIND_USER;
break;
case '?':
return -EINVAL;
@ -1811,6 +1828,12 @@ static int verify_arguments(void) {
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "AmbientCapability= setting is not useful for boot mode.");
}
if (arg_userns_mode == USER_NAMESPACE_NO && !strv_isempty(arg_bind_user))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--bind-user= requires --private-users");
/* Drop duplicate --bind-user= entries */
strv_uniq(arg_bind_user);
r = custom_mount_check_all();
if (r < 0)
return r;
@ -1818,7 +1841,7 @@ static int verify_arguments(void) {
return 0;
}
static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
int userns_lchown(const char *p, uid_t uid, gid_t gid) {
assert(p);
if (arg_userns_mode == USER_NAMESPACE_NO)
@ -1847,7 +1870,7 @@ static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
return 0;
}
static int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid) {
int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid) {
const char *q;
int r;
@ -3568,6 +3591,7 @@ static int outer_child(
FDSet *fds,
int netns_fd) {
_cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
_cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -1;
bool idmap = false;
@ -3715,6 +3739,36 @@ static int outer_child(
if (r < 0)
return r;
r = bind_user_prepare(
directory,
arg_bind_user,
arg_uid_shift,
arg_uid_range,
&arg_custom_mounts, &arg_n_custom_mounts,
&bind_user_context);
if (r < 0)
return r;
if (arg_userns_mode != USER_NAMESPACE_NO && bind_user_context) {
/* Send the user maps we determined to the parent, so that it installs it in our user namespace UID map table */
for (size_t i = 0; i < bind_user_context->n_data; i++) {
uid_t map[] = {
bind_user_context->data[i].payload_user->uid,
bind_user_context->data[i].host_user->uid,
(uid_t) bind_user_context->data[i].payload_group->gid,
(uid_t) bind_user_context->data[i].host_group->gid,
};
l = send(uid_shift_socket, map, sizeof(map), MSG_NOSIGNAL);
if (l < 0)
return log_error_errno(errno, "Failed to send user UID map: %m");
if (l != sizeof(map))
return log_error_errno(SYNTHETIC_ERRNO(EIO),
"Short write while sending user UID map.");
}
}
r = mount_custom(
directory,
arg_custom_mounts,
@ -3831,6 +3885,10 @@ static int outer_child(
if (r < 0)
return r;
r = bind_user_setup(bind_user_context, directory);
if (r < 0)
return r;
r = mount_custom(
directory,
arg_custom_mounts,
@ -4011,21 +4069,96 @@ static int uid_shift_pick(uid_t *shift, LockFile *ret_lock_file) {
}
}
static int setup_uid_map(pid_t pid) {
char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
static int add_one_uid_map(
char **p,
uid_t container_uid,
uid_t host_uid,
uid_t range) {
return strextendf(p,
UID_FMT " " UID_FMT " " UID_FMT "\n",
container_uid, host_uid, range);
}
static int make_uid_map_string(
const uid_t bind_user_uid[],
size_t n_bind_user_uid,
size_t offset,
char **ret) {
_cleanup_free_ char *s = NULL;
uid_t previous_uid = 0;
int r;
assert(n_bind_user_uid == 0 || bind_user_uid);
assert(offset == 0 || offset == 2); /* used to switch between UID and GID map */
assert(ret);
/* The bind_user_uid[] array is a series of 4 uid_t values, for each --bind-user= entry one
* quadruplet, consisting of host and container UID + GID. */
for (size_t i = 0; i < n_bind_user_uid; i++) {
uid_t payload_uid = bind_user_uid[i*2+offset],
host_uid = bind_user_uid[i*2+offset+1];
assert(previous_uid <= payload_uid);
assert(payload_uid < arg_uid_range);
/* Add a range to close the gap to previous entry */
if (payload_uid > previous_uid) {
r = add_one_uid_map(&s, previous_uid, arg_uid_shift + previous_uid, payload_uid - previous_uid);
if (r < 0)
return r;
}
/* Map this specific user */
r = add_one_uid_map(&s, payload_uid, host_uid, 1);
if (r < 0)
return r;
previous_uid = payload_uid + 1;
}
/* And add a range to close the gap to finish the range */
if (arg_uid_range > previous_uid) {
r = add_one_uid_map(&s, previous_uid, arg_uid_shift + previous_uid, arg_uid_range - previous_uid);
if (r < 0)
return r;
}
assert(s);
*ret = TAKE_PTR(s);
return 0;
}
static int setup_uid_map(
pid_t pid,
const uid_t bind_user_uid[],
size_t n_bind_user_uid) {
char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1];
_cleanup_free_ char *s = NULL;
int r;
assert(pid > 1);
/* Build the UID map string */
if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 0, &s) < 0) /* offset=0 contains the UID pair */
return log_oom();
xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, arg_uid_shift, arg_uid_range);
r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0)
return log_error_errno(r, "Failed to write UID map: %m");
/* We always assign the same UID and GID ranges */
/* And now build the GID map string */
s = mfree(s);
if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 2, &s) < 0) /* offset=2 contains the GID pair */
return log_oom();
xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0)
return log_error_errno(r, "Failed to write GID map: %m");
@ -4301,6 +4434,9 @@ static int merge_settings(Settings *settings, const char *path) {
}
}
if ((arg_settings_mask & SETTING_BIND_USER) == 0)
strv_free_and_replace(arg_bind_user, settings->bind_user);
if ((arg_settings_mask & SETTING_NOTIFY_READY) == 0)
arg_notify_ready = settings->notify_ready;
@ -4567,6 +4703,8 @@ static int run_container(
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
_cleanup_free_ uid_t *bind_user_uid = NULL;
size_t n_bind_user_uid = 0;
ContainerStatus container_status = 0;
int ifi = 0, r;
ssize_t l;
@ -4722,6 +4860,26 @@ static int run_container(
if (l != sizeof arg_uid_shift)
return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while writing UID shift.");
}
n_bind_user_uid = strv_length(arg_bind_user);
if (n_bind_user_uid > 0) {
/* Right after the UID shift, we'll receive the list of UID mappings for the
* --bind-user= logic. Always a quadruplet of payload and host UID + GID. */
bind_user_uid = new(uid_t, n_bind_user_uid*4);
if (!bind_user_uid)
return log_oom();
for (size_t i = 0; i < n_bind_user_uid; i++) {
l = recv(uid_shift_socket_pair[0], bind_user_uid + i*4, sizeof(uid_t)*4, 0);
if (l < 0)
return log_error_errno(errno, "Failed to read user UID map pair: %m");
if (l != sizeof(uid_t)*4)
return log_full_errno(l == 0 ? LOG_DEBUG : LOG_WARNING,
SYNTHETIC_ERRNO(EIO),
"Short read while reading bind user UID pairs.");
}
}
}
if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
@ -4767,7 +4925,7 @@ static int run_container(
if (!barrier_place_and_sync(&barrier)) /* #1 */
return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
r = setup_uid_map(*pid);
r = setup_uid_map(*pid, bind_user_uid, n_bind_user_uid);
if (r < 0)
return r;

7
src/nspawn/nspawn.h Normal file
View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include <sys/types.h>
int userns_lchown(const char *p, uid_t uid, gid_t gid);
int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid);

View File

@ -1552,7 +1552,7 @@ int user_group_record_mangle(
if (FLAGS_SET(load_flags, USER_RECORD_REQUIRE_REGULAR) && !FLAGS_SET(m, USER_RECORD_REGULAR))
return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks basic identity fields, which are required.");
if (m == 0)
if (!FLAGS_SET(load_flags, USER_RECORD_EMPTY_OK) && m == 0)
return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is empty.");
if (w)

View File

@ -169,6 +169,9 @@ typedef enum UserRecordLoadFlags {
/* Whether to ignore errors and load what we can */
USER_RECORD_PERMISSIVE = 1U << 29,
/* Whether an empty record is OK */
USER_RECORD_EMPTY_OK = 1U << 30,
} UserRecordLoadFlags;
static inline UserRecordLoadFlags USER_RECORD_REQUIRE(UserRecordMask m) {