From 1a298a206c5dfe03c6cc9e690e1a81719c25c20c Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 5 May 2021 10:45:48 +0200
Subject: [PATCH 1/4] user-record: optionally, allow parsing empty user record
 JSON objects

---
 src/shared/user-record.c | 2 +-
 src/shared/user-record.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/shared/user-record.c b/src/shared/user-record.c
index bc35edd729..d82b4d3636 100644
--- a/src/shared/user-record.c
+++ b/src/shared/user-record.c
@@ -1552,7 +1552,7 @@ int user_group_record_mangle(
         if (FLAGS_SET(load_flags, USER_RECORD_REQUIRE_REGULAR) && !FLAGS_SET(m, USER_RECORD_REGULAR))
                 return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks basic identity fields, which are required.");
 
-        if (m == 0)
+        if (!FLAGS_SET(load_flags, USER_RECORD_EMPTY_OK) && m == 0)
                 return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is empty.");
 
         if (w)
diff --git a/src/shared/user-record.h b/src/shared/user-record.h
index 623f7bc9e4..66dceecfdd 100644
--- a/src/shared/user-record.h
+++ b/src/shared/user-record.h
@@ -169,6 +169,9 @@ typedef enum UserRecordLoadFlags {
 
         /* Whether to ignore errors and load what we can */
         USER_RECORD_PERMISSIVE          = 1U << 29,
+
+        /* Whether an empty record is OK */
+        USER_RECORD_EMPTY_OK            = 1U << 30,
 } UserRecordLoadFlags;
 
 static inline UserRecordLoadFlags USER_RECORD_REQUIRE(UserRecordMask m) {

From 91181e075be46e9c919315f2e8f903a963754cb2 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 5 May 2021 12:29:01 +0200
Subject: [PATCH 2/4] nspawn: export userns_mkdir() + userns_lchown() so that
 it can be used elsewhere in nspawn

---
 src/nspawn/meson.build | 1 +
 src/nspawn/nspawn.c    | 5 +++--
 src/nspawn/nspawn.h    | 7 +++++++
 3 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 src/nspawn/nspawn.h

diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build
index 172ded43c1..a0e051ed32 100644
--- a/src/nspawn/meson.build
+++ b/src/nspawn/meson.build
@@ -26,6 +26,7 @@ libnspawn_core_sources = files('''
         nspawn-setuid.h
         nspawn-stub-pid1.c
         nspawn-stub-pid1.h
+        nspawn.h
 '''.split())
 
 nspawn_gperf_c = custom_target(
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index a8441bf8e0..9dbe2af5d9 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -76,6 +76,7 @@
 #include "nspawn-settings.h"
 #include "nspawn-setuid.h"
 #include "nspawn-stub-pid1.h"
+#include "nspawn.h"
 #include "nulstr-util.h"
 #include "os-util.h"
 #include "pager.h"
@@ -1818,7 +1819,7 @@ static int verify_arguments(void) {
         return 0;
 }
 
-static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
+int userns_lchown(const char *p, uid_t uid, gid_t gid) {
         assert(p);
 
         if (arg_userns_mode == USER_NAMESPACE_NO)
@@ -1847,7 +1848,7 @@ static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
         return 0;
 }
 
-static int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid) {
+int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid) {
         const char *q;
         int r;
 
diff --git a/src/nspawn/nspawn.h b/src/nspawn/nspawn.h
new file mode 100644
index 0000000000..27fb0b44eb
--- /dev/null
+++ b/src/nspawn/nspawn.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int userns_lchown(const char *p, uid_t uid, gid_t gid);
+int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid);

From 2f8930449079403b26c9164b8eeac78d5af2c8df Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 5 May 2021 12:45:22 +0200
Subject: [PATCH 3/4] nspawn: add new --bind-user= option for binding a host
 user into the container
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This new option does three things for a host user specified via
--bind-user=:

1. Bind mount the home directory from the host directory into
   /run/host/home/<username>

2. Install an additional user namepace UID/GID mapping mapping the host
   UID/GID of the host user to an unused one from the container in the range
   60514…60577.

3. Synthesize a user/group record for the user/group under the same name
   as on the host, with minimized information, and the UID/GID set to
   the mapped UID/GID. This data is written to /run/host/userdb/ where
   nss-system will pick it up.

This should make sharing users and home directories from host into the
container pretty seamless, under some conditions:

1. User namespacing must be used.

2. The host UID/GID of the user/group cannot be in the range assigned to
   the container (kernel already refuses this, as this would mean two
   host UIDs/GIDs might end up being mapped to the same continer
   UID/GID.

3. There's a free UID/GID in the aforementioned range in the container,
   and the name of the user/group is not used in the container.

4. Container payload is new enough to include an nss-systemd version
   that picks up records from /run/host/userdb/
---
 src/nspawn/meson.build        |   2 +
 src/nspawn/nspawn-bind-user.c | 479 ++++++++++++++++++++++++++++++++++
 src/nspawn/nspawn-bind-user.h |  29 ++
 src/nspawn/nspawn-gperf.gperf |   1 +
 src/nspawn/nspawn-settings.c  |  49 ++++
 src/nspawn/nspawn-settings.h  |   9 +-
 src/nspawn/nspawn.c           | 173 +++++++++++-
 7 files changed, 731 insertions(+), 11 deletions(-)
 create mode 100644 src/nspawn/nspawn-bind-user.c
 create mode 100644 src/nspawn/nspawn-bind-user.h

diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build
index a0e051ed32..d465b3d804 100644
--- a/src/nspawn/meson.build
+++ b/src/nspawn/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: LGPL-2.1-or-later
 
 libnspawn_core_sources = files('''
+        nspawn-bind-user.c
+        nspawn-bind-user.h
         nspawn-cgroup.c
         nspawn-cgroup.h
         nspawn-creds.c
diff --git a/src/nspawn/nspawn-bind-user.c b/src/nspawn/nspawn-bind-user.c
new file mode 100644
index 0000000000..ebf7d4d917
--- /dev/null
+++ b/src/nspawn/nspawn-bind-user.c
@@ -0,0 +1,479 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "nspawn-bind-user.h"
+#include "nspawn.h"
+#include "path-util.h"
+#include "user-util.h"
+#include "userdb.h"
+
+#define MAP_UID_START 60514
+#define MAP_UID_END 60577
+
+static int check_etc_passwd_collisions(
+                const char *directory,
+                const char *name,
+                uid_t uid) {
+
+        _cleanup_fclose_ FILE *f = NULL;
+        int r;
+
+        assert(directory);
+        assert(name || uid_is_valid(uid));
+
+        r = chase_symlinks_and_fopen_unlocked("/etc/passwd", directory, CHASE_PREFIX_ROOT, "re", &f, NULL);
+        if (r == -ENOENT)
+                return 0; /* no user database? then no user, hence no collision */
+        if (r < 0)
+                return log_error_errno(r, "Failed to open /etc/passwd of container: %m");
+
+        for (;;) {
+                struct passwd *pw;
+
+                r = fgetpwent_sane(f, &pw);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to iterate through /etc/passwd of container: %m");
+                if (r == 0) /* EOF */
+                        return 0; /* no collision */
+
+                if (name && streq_ptr(pw->pw_name, name))
+                        return 1; /* name collision */
+                if (uid_is_valid(uid) && pw->pw_uid == uid)
+                        return 1; /* UID collision */
+        }
+}
+
+static int check_etc_group_collisions(
+                const char *directory,
+                const char *name,
+                gid_t gid) {
+
+        _cleanup_fclose_ FILE *f = NULL;
+        int r;
+
+        assert(directory);
+        assert(name || gid_is_valid(gid));
+
+        r = chase_symlinks_and_fopen_unlocked("/etc/group", directory, CHASE_PREFIX_ROOT, "re", &f, NULL);
+        if (r == -ENOENT)
+                return 0; /* no group database? then no group, hence no collision */
+        if (r < 0)
+                return log_error_errno(r, "Failed to open /etc/group of container: %m");
+
+        for (;;) {
+                struct group *gr;
+
+                r = fgetgrent_sane(f, &gr);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to iterate through /etc/group of container: %m");
+                if (r == 0)
+                        return 0; /* no collision */
+
+                if (name && streq_ptr(gr->gr_name, name))
+                        return 1; /* name collision */
+                if (gid_is_valid(gid) && gr->gr_gid == gid)
+                        return 1; /* gid collision */
+        }
+}
+
+static int convert_user(
+                const char *directory,
+                UserRecord *u,
+                GroupRecord *g,
+                uid_t allocate_uid,
+                UserRecord **ret_converted_user,
+                GroupRecord **ret_converted_group) {
+
+        _cleanup_(group_record_unrefp) GroupRecord *converted_group = NULL;
+        _cleanup_(user_record_unrefp) UserRecord *converted_user = NULL;
+        _cleanup_free_ char *h = NULL;
+        JsonVariant *p, *hp = NULL;
+        int r;
+
+        assert(u);
+        assert(g);
+        assert(u->gid == g->gid);
+
+        r = check_etc_passwd_collisions(directory, u->user_name, UID_INVALID);
+        if (r < 0)
+                return r;
+        if (r > 0)
+                return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
+                                       "Sorry, the user '%s' already exists in the container.", u->user_name);
+
+        r = check_etc_group_collisions(directory, g->group_name, GID_INVALID);
+        if (r < 0)
+                return r;
+        if (r > 0)
+                return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
+                                       "Sorry, the group '%s' already exists in the container.", g->group_name);
+
+        h = path_join("/run/host/home/", u->user_name);
+        if (!h)
+                return log_oom();
+
+        /* Acquire the source hashed password array as-is, so that it retains the JSON_VARIANT_SENSITIVE flag */
+        p = json_variant_by_key(u->json, "privileged");
+        if (p)
+                hp = json_variant_by_key(p, "hashedPassword");
+
+        r = user_record_build(
+                        &converted_user,
+                        JSON_BUILD_OBJECT(
+                                        JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(u->user_name)),
+                                        JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(allocate_uid)),
+                                        JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(allocate_uid)),
+                                        JSON_BUILD_PAIR_CONDITION(u->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(u->disposition))),
+                                        JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING(h)),
+                                        JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.NSpawn")),
+                                        JSON_BUILD_PAIR_CONDITION(!strv_isempty(u->hashed_password), "privileged", JSON_BUILD_OBJECT(
+                                                                                  JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_VARIANT(hp))))));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build container user record: %m");
+
+        r = group_record_build(
+                        &converted_group,
+                        JSON_BUILD_OBJECT(
+                                        JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name)),
+                                        JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(allocate_uid)),
+                                        JSON_BUILD_PAIR_CONDITION(g->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(g->disposition))),
+                                        JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.NSpawn"))));
+        if (r < 0)
+                return log_error_errno(r, "Failed to build container group record: %m");
+
+        *ret_converted_user = TAKE_PTR(converted_user);
+        *ret_converted_group = TAKE_PTR(converted_group);
+
+        return 0;
+}
+
+static int find_free_uid(const char *directory, uid_t max_uid, uid_t *current_uid) {
+        int r;
+
+        assert(directory);
+        assert(current_uid);
+
+        for (;; (*current_uid) ++) {
+                if (*current_uid > MAP_UID_END || *current_uid > max_uid)
+                        return log_error_errno(
+                                        SYNTHETIC_ERRNO(EBUSY),
+                                        "No suitable available UID in range " UID_FMT "…" UID_FMT " in container detected, can't map user.",
+                                        MAP_UID_START, MAP_UID_END);
+
+                r = check_etc_passwd_collisions(directory, NULL, *current_uid);
+                if (r < 0)
+                        return r;
+                if (r > 0) /* already used */
+                        continue;
+
+                /* We want to use the UID also as GID, hence check for it in /etc/group too */
+                r = check_etc_group_collisions(directory, NULL, (gid_t) *current_uid);
+                if (r < 0)
+                        return r;
+                if (r == 0) /* free! yay! */
+                        return 0;
+        }
+}
+
+BindUserContext* bind_user_context_free(BindUserContext *c) {
+        if (!c)
+                return NULL;
+
+        assert(c->n_data == 0 || c->data);
+
+        for (size_t i = 0; i < c->n_data; i++) {
+                user_record_unref(c->data[i].host_user);
+                group_record_unref(c->data[i].host_group);
+                user_record_unref(c->data[i].payload_user);
+                group_record_unref(c->data[i].payload_group);
+        }
+
+        return mfree(c);
+}
+
+int bind_user_prepare(
+                const char *directory,
+                char **bind_user,
+                uid_t uid_shift,
+                uid_t uid_range,
+                CustomMount **custom_mounts,
+                size_t *n_custom_mounts,
+                BindUserContext **ret) {
+
+        _cleanup_(bind_user_context_freep) BindUserContext *c = NULL;
+        uid_t current_uid = MAP_UID_START;
+        size_t n_allocated = 0;
+        char **n;
+        int r;
+
+        assert(custom_mounts);
+        assert(n_custom_mounts);
+        assert(ret);
+
+        /* This resolves the users specified in 'bind_user', generates a minimalized JSON user + group record
+         * for it to stick in the container, allocates a UID/GID for it, and updates the custom mount table,
+         * to include an appropriate bind mount mapping.
+         *
+         * This extends the passed custom_mounts/n_custom_mounts with the home directories, and allocates a
+         * new BindUserContext for the user records */
+
+        if (strv_isempty(bind_user)) {
+                *ret = NULL;
+                return 0;
+        }
+
+        c = new0(BindUserContext, 1);
+        if (!c)
+                return log_oom();
+
+        STRV_FOREACH(n, bind_user) {
+                _cleanup_(user_record_unrefp) UserRecord *u = NULL, *cu = NULL;
+                _cleanup_(group_record_unrefp) GroupRecord *g = NULL, *cg = NULL;
+                _cleanup_free_ char *sm = NULL, *sd = NULL;
+                CustomMount *cm;
+
+                r = userdb_by_name(*n, USERDB_DONT_SYNTHESIZE, &u);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve user '%s': %m", *n);
+
+                /* For now, let's refuse mapping the root/nobody users explicitly. The records we generate
+                 * are strictly additive, nss-systemd is typically placed last in /etc/nsswitch.conf. Thus
+                 * even if we wanted, we couldn't override the root or nobody user records. Note we also
+                 * check for name conflicts in /etc/passwd + /etc/group later on, which would usually filter
+                 * out root/nobody too, hence these checks might appear redundant — but they actually are
+                 * not, as we want to support environments where /etc/passwd and /etc/group are non-existent,
+                 * and the user/group databases fully synthesized at runtime. Moreover, the name of the
+                 * user/group name of the "nobody" account differs between distros, hence a check by numeric
+                 * UID is safer. */
+                if (u->uid == 0 || streq(u->user_name, "root"))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mapping 'root' user not supported, sorry.");
+                if (u->uid == UID_NOBODY || STR_IN_SET(u->user_name, NOBODY_USER_NAME, "nobody"))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mapping 'nobody' user not supported, sorry.");
+
+                if (u->uid >= uid_shift && u->uid < uid_shift + uid_range)
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID of user '%s' to map is already in container UID range, refusing.", u->user_name);
+
+                r = groupdb_by_gid(u->gid, USERDB_DONT_SYNTHESIZE, &g);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve group of user '%s': %m", u->user_name);
+
+                if (g->gid >= uid_shift && g->gid < uid_shift + uid_range)
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "GID of group '%s' to map is already in container GID range, refusing.", g->group_name);
+
+                /* We want to synthesize exactly one user + group from the host into the container. This only
+                 * makes sense if the user on the host has its own private group. We can't reasonably check
+                 * this, so we just check of the name of user and group match.
+                 *
+                 * One of these days we might want to support users in a shared/common group too, but it's
+                 * not clear to me how this would have to be mapped, precisely given that the common group
+                 * probably already exists in the container. */
+                if (!streq(u->user_name, g->group_name))
+                        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+                                               "Sorry, mapping users without private groups is currently not supported.");
+
+                r = find_free_uid(directory, uid_range, &current_uid);
+                if (r < 0)
+                        return r;
+
+                r = convert_user(directory, u, g, current_uid, &cu, &cg);
+                if (r < 0)
+                        return r;
+
+                if (!GREEDY_REALLOC(c->data, n_allocated, c->n_data + 1))
+                        return log_oom();
+
+                sm = strdup(u->home_directory);
+                if (!sm)
+                        return log_oom();
+
+                sd = strdup(cu->home_directory);
+                if (!sd)
+                        return log_oom();
+
+                cm = reallocarray(*custom_mounts, sizeof(CustomMount), *n_custom_mounts + 1);
+                if (!cm)
+                        return log_oom();
+
+                *custom_mounts = cm;
+
+                (*custom_mounts)[(*n_custom_mounts)++] = (CustomMount) {
+                        .type = CUSTOM_MOUNT_BIND,
+                        .source = TAKE_PTR(sm),
+                        .destination = TAKE_PTR(sd),
+                };
+
+                c->data[c->n_data++] = (BindUserData) {
+                        .host_user = TAKE_PTR(u),
+                        .host_group = TAKE_PTR(g),
+                        .payload_user = TAKE_PTR(cu),
+                        .payload_group = TAKE_PTR(cg),
+                };
+
+                current_uid++;
+        }
+
+        *ret = TAKE_PTR(c);
+        return 1;
+}
+
+static int write_and_symlink(
+                const char *root,
+                JsonVariant *v,
+                const char *name,
+                uid_t uid,
+                const char *suffix,
+                WriteStringFileFlags extra_flags) {
+
+        _cleanup_free_ char *j = NULL, *f = NULL, *p = NULL, *q = NULL;
+        int r;
+
+        assert(root);
+        assert(v);
+        assert(name);
+        assert(uid_is_valid(uid));
+        assert(suffix);
+
+        r = json_variant_format(v, JSON_FORMAT_NEWLINE, &j);
+        if (r < 0)
+                return log_error_errno(r, "Failed to format user record JSON: %m");
+
+        f = strjoin(name, suffix);
+        if (!f)
+                return log_oom();
+
+        p = path_join(root, "/run/host/userdb/", f);
+        if (!p)
+                return log_oom();
+
+        if (asprintf(&q, "%s/run/host/userdb/" UID_FMT "%s", root, uid, suffix) < 0)
+                return log_oom();
+
+        if (symlink(f, q) < 0)
+                return log_error_errno(errno, "Failed to create symlink '%s': %m", q);
+
+        r = userns_lchown(q, 0, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to adjust access mode of '%s': %m", q);
+
+        r = write_string_file(p, j, WRITE_STRING_FILE_CREATE|extra_flags);
+        if (r < 0)
+                return log_error_errno(r, "Failed to write %s: %m", p);
+
+        r = userns_lchown(p, 0, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to adjust access mode of '%s': %m", p);
+
+        return 0;
+}
+
+int bind_user_setup(
+                const BindUserContext *c,
+                const char *root) {
+
+        static const UserRecordLoadFlags strip_flags = /* Removes privileged info */
+                USER_RECORD_REQUIRE_REGULAR|
+                USER_RECORD_STRIP_PRIVILEGED|
+                USER_RECORD_ALLOW_PER_MACHINE|
+                USER_RECORD_ALLOW_BINDING|
+                USER_RECORD_ALLOW_SIGNATURE;
+        static const UserRecordLoadFlags shadow_flags = /* Extracts privileged info */
+                USER_RECORD_STRIP_REGULAR|
+                USER_RECORD_ALLOW_PRIVILEGED|
+                USER_RECORD_STRIP_PER_MACHINE|
+                USER_RECORD_STRIP_BINDING|
+                USER_RECORD_STRIP_SIGNATURE|
+                USER_RECORD_EMPTY_OK;
+        int r;
+
+        assert(root);
+
+        if (!c || c->n_data == 0)
+                return 0;
+
+        r = userns_mkdir(root, "/run/host", 0755, 0, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create /run/host: %m");
+
+        r = userns_mkdir(root, "/run/host/home", 0755, 0, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create /run/host/userdb: %m");
+
+        r = userns_mkdir(root, "/run/host/userdb", 0755, 0, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create /run/host/userdb: %m");
+
+        for (size_t i = 0; i < c->n_data; i++) {
+                _cleanup_(group_record_unrefp) GroupRecord *stripped_group = NULL, *shadow_group = NULL;
+                _cleanup_(user_record_unrefp) UserRecord *stripped_user = NULL, *shadow_user = NULL;
+                const BindUserData *d = c->data + i;
+
+                /* First, write shadow (i.e. privileged) data for group record */
+                r = group_record_clone(d->payload_group, shadow_flags, &shadow_group);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to extract privileged information from group record: %m");
+
+                if (!json_variant_is_blank_object(shadow_group->json)) {
+                        r = write_and_symlink(
+                                        root,
+                                        shadow_group->json,
+                                        d->payload_group->group_name,
+                                        d->payload_group->gid,
+                                        ".group-privileged",
+                                        WRITE_STRING_FILE_MODE_0600);
+                        if (r < 0)
+                                return r;
+                }
+
+                /* Second, write main part of group record. */
+                r = group_record_clone(d->payload_group, strip_flags, &stripped_group);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to strip privileged information from group record: %m");
+
+                r = write_and_symlink(
+                                root,
+                                stripped_group->json,
+                                d->payload_group->group_name,
+                                d->payload_group->gid,
+                                ".group",
+                                0);
+                if (r < 0)
+                        return r;
+
+                /* Third, write out user shadow data. i.e. extract privileged info from user record */
+                r = user_record_clone(d->payload_user, shadow_flags, &shadow_user);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to extract privileged information from user record: %m");
+
+                if (!json_variant_is_blank_object(shadow_user->json)) {
+                        r = write_and_symlink(
+                                        root,
+                                        shadow_user->json,
+                                        d->payload_user->user_name,
+                                        d->payload_user->uid,
+                                        ".user-privileged",
+                                        WRITE_STRING_FILE_MODE_0600);
+                        if (r < 0)
+                                return r;
+                }
+
+                /* Finally write out the main part of the user record */
+                r = user_record_clone(d->payload_user, strip_flags, &stripped_user);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to strip privileged information from user record: %m");
+
+                r = write_and_symlink(
+                                root,
+                                stripped_user->json,
+                                d->payload_user->user_name,
+                                d->payload_user->uid,
+                                ".user",
+                                0);
+                if (r < 0)
+                        return r;
+        }
+
+        return 1;
+}
diff --git a/src/nspawn/nspawn-bind-user.h b/src/nspawn/nspawn-bind-user.h
new file mode 100644
index 0000000000..4352ce0ab2
--- /dev/null
+++ b/src/nspawn/nspawn-bind-user.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "user-record.h"
+#include "group-record.h"
+#include "nspawn-mount.h"
+
+typedef struct BindUserData {
+        /* The host's user/group records */
+        UserRecord *host_user;
+        GroupRecord *host_group;
+
+        /* The mapped records to place into the container */
+        UserRecord *payload_user;
+        GroupRecord *payload_group;
+} BindUserData;
+
+typedef struct BindUserContext {
+        BindUserData *data;
+        size_t n_data;
+} BindUserContext;
+
+BindUserContext* bind_user_context_free(BindUserContext *c);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BindUserContext*, bind_user_context_free);
+
+int bind_user_prepare(const char *directory, char **bind_user, uid_t uid_shift, uid_t uid_range, CustomMount **custom_mounts, size_t *n_custom_mounts, BindUserContext **ret);
+
+int bind_user_setup(const BindUserContext *c, const char *root);
diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf
index 67a3682689..ea15e27148 100644
--- a/src/nspawn/nspawn-gperf.gperf
+++ b/src/nspawn/nspawn-gperf.gperf
@@ -69,6 +69,7 @@ Files.Overlay,                config_parse_overlay,        0,                 0
 Files.OverlayReadOnly,        config_parse_overlay,        1,                 0
 Files.PrivateUsersChown,      config_parse_userns_chown,   0,                 offsetof(Settings, userns_ownership)
 Files.PrivateUsersOwnership,  config_parse_userns_ownership, 0,               offsetof(Settings, userns_ownership)
+Files.BindUser,               config_parse_bind_user,      0,                 offsetof(Settings, bind_user)
 Network.Private,              config_parse_tristate,       0,                 offsetof(Settings, private_network)
 Network.Interface,            config_parse_strv,           0,                 offsetof(Settings, network_interfaces)
 Network.MACVLAN,              config_parse_strv,           0,                 offsetof(Settings, network_macvlan)
diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c
index 55b8c4375f..3847fe4ec4 100644
--- a/src/nspawn/nspawn-settings.c
+++ b/src/nspawn/nspawn-settings.c
@@ -132,6 +132,7 @@ Settings* settings_free(Settings *s) {
         rlimit_free_all(s->rlimit);
         free(s->hostname);
         cpu_set_reset(&s->cpu_set);
+        strv_free(s->bind_user);
 
         strv_free(s->network_interfaces);
         strv_free(s->network_macvlan);
@@ -907,3 +908,51 @@ int config_parse_userns_chown(
         *ownership = r ? USER_NAMESPACE_OWNERSHIP_CHOWN : USER_NAMESPACE_OWNERSHIP_OFF;
         return 0;
 }
+
+int config_parse_bind_user(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        char ***bind_user = data;
+        int r;
+
+        assert(rvalue);
+        assert(bind_user);
+
+        if (isempty(rvalue)) {
+                *bind_user = strv_free(*bind_user);
+                return 0;
+        }
+
+        for (const char* p = rvalue;;) {
+                _cleanup_free_ char *word = NULL;
+
+                r = extract_first_word(&p, &word, NULL, 0);
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse BindUser= list, ignoring: %s", rvalue);
+                        return 0;
+                }
+                if (r == 0)
+                        break;
+
+                if (!valid_user_group_name(word, 0)) {
+                        log_syntax(unit, LOG_WARNING, filename, line, 0, "User name '%s' not valid, ignoring.", word);
+                        return 0;
+                }
+
+                if (strv_consume(bind_user, TAKE_PTR(word)) < 0)
+                        return log_oom();
+        }
+
+        return 0;
+}
diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h
index c0ad0741ab..939e1c757b 100644
--- a/src/nspawn/nspawn-settings.h
+++ b/src/nspawn/nspawn-settings.h
@@ -126,9 +126,10 @@ typedef enum SettingsMask {
         SETTING_CLONE_NS_FLAGS    = UINT64_C(1) << 28,
         SETTING_CONSOLE_MODE      = UINT64_C(1) << 29,
         SETTING_CREDENTIALS       = UINT64_C(1) << 30,
-        SETTING_RLIMIT_FIRST      = UINT64_C(1) << 31, /* we define one bit per resource limit here */
-        SETTING_RLIMIT_LAST       = UINT64_C(1) << (31 + _RLIMIT_MAX - 1),
-        _SETTINGS_MASK_ALL        = (UINT64_C(1) << (31 + _RLIMIT_MAX)) -1,
+        SETTING_BIND_USER         = UINT64_C(1) << 31,
+        SETTING_RLIMIT_FIRST      = UINT64_C(1) << 32, /* we define one bit per resource limit here */
+        SETTING_RLIMIT_LAST       = UINT64_C(1) << (32 + _RLIMIT_MAX - 1),
+        _SETTINGS_MASK_ALL        = (UINT64_C(1) << (32 + _RLIMIT_MAX)) -1,
         _SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
 } SettingsMask;
 
@@ -195,6 +196,7 @@ typedef struct Settings {
         CustomMount *custom_mounts;
         size_t n_custom_mounts;
         UserNamespaceOwnership userns_ownership;
+        char **bind_user;
 
         /* [Network] */
         int private_network;
@@ -266,6 +268,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_link_journal);
 CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
 CONFIG_PARSER_PROTOTYPE(config_parse_userns_chown);
 CONFIG_PARSER_PROTOTYPE(config_parse_userns_ownership);
+CONFIG_PARSER_PROTOTYPE(config_parse_bind_user);
 
 const char *resolv_conf_mode_to_string(ResolvConfMode a) _const_;
 ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 9dbe2af5d9..21aa4f246f 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -63,6 +63,7 @@
 #include "mountpoint-util.h"
 #include "namespace-util.h"
 #include "netlink-util.h"
+#include "nspawn-bind-user.h"
 #include "nspawn-cgroup.h"
 #include "nspawn-creds.h"
 #include "nspawn-def.h"
@@ -226,6 +227,7 @@ static char **arg_sysctl = NULL;
 static ConsoleMode arg_console_mode = _CONSOLE_MODE_INVALID;
 static Credential *arg_credentials = NULL;
 static size_t arg_n_credentials = 0;
+static char **arg_bind_user = NULL;
 
 STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_template, freep);
@@ -258,6 +260,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_seccomp, seccomp_releasep);
 #endif
 STATIC_DESTRUCTOR_REGISTER(arg_cpu_set, cpu_set_reset);
 STATIC_DESTRUCTOR_REGISTER(arg_sysctl, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_bind_user, strv_freep);
 
 static int handle_arg_console(const char *arg) {
         if (streq(arg, "help")) {
@@ -423,7 +426,8 @@ static int help(void) {
                "                            Create an overlay mount from the host to \n"
                "                            the container\n"
                "     --overlay-ro=PATH[:PATH...]:PATH\n"
-               "                            Similar, but creates a read-only overlay mount\n\n"
+               "                            Similar, but creates a read-only overlay mount\n"
+               "     --bind-user=NAME       Bind user from host to container\n\n"
                "%3$sInput/Output:%4$s\n"
                "     --console=MODE         Select how stdin/stdout/stderr and /dev/console are\n"
                "                            set up for the container.\n"
@@ -707,6 +711,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NO_PAGER,
                 ARG_SET_CREDENTIAL,
                 ARG_LOAD_CREDENTIAL,
+                ARG_BIND_USER,
         };
 
         static const struct option options[] = {
@@ -778,6 +783,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "no-pager",               no_argument,       NULL, ARG_NO_PAGER               },
                 { "set-credential",         required_argument, NULL, ARG_SET_CREDENTIAL         },
                 { "load-credential",        required_argument, NULL, ARG_LOAD_CREDENTIAL        },
+                { "bind-user",              required_argument, NULL, ARG_BIND_USER              },
                 {}
         };
 
@@ -1656,6 +1662,16 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_BIND_USER:
+                        if (!valid_user_group_name(optarg, 0))
+                                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid user name to bind: %s", optarg);
+
+                        if (strv_extend(&arg_bind_user, optarg) < 0)
+                                return log_oom();
+
+                        arg_settings_mask |= SETTING_BIND_USER;
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -1812,6 +1828,12 @@ static int verify_arguments(void) {
                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "AmbientCapability= setting is not useful for boot mode.");
         }
 
+        if (arg_userns_mode == USER_NAMESPACE_NO && !strv_isempty(arg_bind_user))
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--bind-user= requires --private-users");
+
+        /* Drop duplicate --bind-user= entries */
+        strv_uniq(arg_bind_user);
+
         r = custom_mount_check_all();
         if (r < 0)
                 return r;
@@ -3569,6 +3591,7 @@ static int outer_child(
                 FDSet *fds,
                 int netns_fd) {
 
+        _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
         _cleanup_strv_free_ char **os_release_pairs = NULL;
         _cleanup_close_ int fd = -1;
         bool idmap = false;
@@ -3716,6 +3739,36 @@ static int outer_child(
         if (r < 0)
                 return r;
 
+        r = bind_user_prepare(
+                        directory,
+                        arg_bind_user,
+                        arg_uid_shift,
+                        arg_uid_range,
+                        &arg_custom_mounts, &arg_n_custom_mounts,
+                        &bind_user_context);
+        if (r < 0)
+                return r;
+
+        if (arg_userns_mode != USER_NAMESPACE_NO && bind_user_context) {
+                /* Send the user maps we determined to the parent, so that it installs it in our user namespace UID map table */
+
+                for (size_t i = 0; i < bind_user_context->n_data; i++)  {
+                        uid_t map[] = {
+                                bind_user_context->data[i].payload_user->uid,
+                                bind_user_context->data[i].host_user->uid,
+                                (uid_t) bind_user_context->data[i].payload_group->gid,
+                                (uid_t) bind_user_context->data[i].host_group->gid,
+                        };
+
+                        l = send(uid_shift_socket, map, sizeof(map), MSG_NOSIGNAL);
+                        if (l < 0)
+                                return log_error_errno(errno, "Failed to send user UID map: %m");
+                        if (l != sizeof(map))
+                                return log_error_errno(SYNTHETIC_ERRNO(EIO),
+                                                       "Short write while sending user UID map.");
+                }
+        }
+
         r = mount_custom(
                         directory,
                         arg_custom_mounts,
@@ -3832,6 +3885,10 @@ static int outer_child(
         if (r < 0)
                 return r;
 
+        r = bind_user_setup(bind_user_context, directory);
+        if (r < 0)
+                return r;
+
         r = mount_custom(
                         directory,
                         arg_custom_mounts,
@@ -4012,21 +4069,96 @@ static int uid_shift_pick(uid_t *shift, LockFile *ret_lock_file) {
         }
 }
 
-static int setup_uid_map(pid_t pid) {
-        char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
+static int add_one_uid_map(
+                char **p,
+                uid_t container_uid,
+                uid_t host_uid,
+                uid_t range) {
+
+        return strextendf(p,
+                       UID_FMT " " UID_FMT " " UID_FMT "\n",
+                       container_uid, host_uid, range);
+}
+
+static int make_uid_map_string(
+                const uid_t bind_user_uid[],
+                size_t n_bind_user_uid,
+                size_t offset,
+                char **ret) {
+
+        _cleanup_free_ char *s = NULL;
+        uid_t previous_uid = 0;
+        int r;
+
+        assert(n_bind_user_uid == 0 || bind_user_uid);
+        assert(offset == 0 || offset == 2); /* used to switch between UID and GID map */
+        assert(ret);
+
+        /* The bind_user_uid[] array is a series of 4 uid_t values, for each --bind-user= entry one
+         * quadruplet, consisting of host and container UID + GID. */
+
+        for (size_t i = 0; i < n_bind_user_uid; i++) {
+                uid_t payload_uid = bind_user_uid[i*2+offset],
+                        host_uid = bind_user_uid[i*2+offset+1];
+
+                assert(previous_uid <= payload_uid);
+                assert(payload_uid < arg_uid_range);
+
+                /* Add a range to close the gap to previous entry */
+                if (payload_uid > previous_uid) {
+                        r = add_one_uid_map(&s, previous_uid, arg_uid_shift + previous_uid, payload_uid - previous_uid);
+                        if (r < 0)
+                                return r;
+                }
+
+                /* Map this specific user */
+                r = add_one_uid_map(&s, payload_uid, host_uid, 1);
+                if (r < 0)
+                        return r;
+
+                previous_uid = payload_uid + 1;
+        }
+
+        /* And add a range to close the gap to finish the range */
+        if (arg_uid_range > previous_uid) {
+                r = add_one_uid_map(&s, previous_uid, arg_uid_shift + previous_uid, arg_uid_range - previous_uid);
+                if (r < 0)
+                        return r;
+        }
+
+        assert(s);
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+static int setup_uid_map(
+                pid_t pid,
+                const uid_t bind_user_uid[],
+                size_t n_bind_user_uid) {
+
+        char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1];
+        _cleanup_free_ char *s = NULL;
         int r;
 
         assert(pid > 1);
 
+        /* Build the UID map string */
+        if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 0, &s) < 0) /* offset=0 contains the UID pair */
+                return log_oom();
+
         xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
-        xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, arg_uid_shift, arg_uid_range);
-        r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
+        r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
         if (r < 0)
                 return log_error_errno(r, "Failed to write UID map: %m");
 
-        /* We always assign the same UID and GID ranges */
+        /* And now build the GID map string */
+        s = mfree(s);
+        if (make_uid_map_string(bind_user_uid, n_bind_user_uid, 2, &s) < 0) /* offset=2 contains the GID pair */
+                return log_oom();
+
         xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
-        r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
+        r = write_string_file(uid_map, s, WRITE_STRING_FILE_DISABLE_BUFFER);
         if (r < 0)
                 return log_error_errno(r, "Failed to write GID map: %m");
 
@@ -4302,6 +4434,9 @@ static int merge_settings(Settings *settings, const char *path) {
                 }
         }
 
+        if ((arg_settings_mask & SETTING_BIND_USER) == 0)
+                strv_free_and_replace(arg_bind_user, settings->bind_user);
+
         if ((arg_settings_mask & SETTING_NOTIFY_READY) == 0)
                 arg_notify_ready = settings->notify_ready;
 
@@ -4568,6 +4703,8 @@ static int run_container(
         _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
         _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
         _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+        _cleanup_free_ uid_t *bind_user_uid = NULL;
+        size_t n_bind_user_uid = 0;
         ContainerStatus container_status = 0;
         int ifi = 0, r;
         ssize_t l;
@@ -4723,6 +4860,26 @@ static int run_container(
                         if (l != sizeof arg_uid_shift)
                                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while writing UID shift.");
                 }
+
+                n_bind_user_uid = strv_length(arg_bind_user);
+                if (n_bind_user_uid > 0) {
+                        /* Right after the UID shift, we'll receive the list of UID mappings for the
+                         * --bind-user= logic. Always a quadruplet of payload and host UID + GID. */
+
+                        bind_user_uid = new(uid_t, n_bind_user_uid*4);
+                        if (!bind_user_uid)
+                                return log_oom();
+
+                        for (size_t i = 0; i < n_bind_user_uid; i++) {
+                                l = recv(uid_shift_socket_pair[0], bind_user_uid + i*4, sizeof(uid_t)*4, 0);
+                                if (l < 0)
+                                        return log_error_errno(errno, "Failed to read user UID map pair: %m");
+                                if (l != sizeof(uid_t)*4)
+                                        return log_full_errno(l == 0 ? LOG_DEBUG : LOG_WARNING,
+                                                              SYNTHETIC_ERRNO(EIO),
+                                                              "Short read while reading bind user UID pairs.");
+                        }
+                }
         }
 
         if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
@@ -4768,7 +4925,7 @@ static int run_container(
                 if (!barrier_place_and_sync(&barrier)) /* #1 */
                         return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
 
-                r = setup_uid_map(*pid);
+                r = setup_uid_map(*pid, bind_user_uid, n_bind_user_uid);
                 if (r < 0)
                         return r;
 

From a06c9ac27782f21cd0eaf4078f4588b4f8cd2585 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 7 May 2021 11:44:26 +0200
Subject: [PATCH 4/4] man: document new nspawn --bind-user= feature

---
 docs/UIDS-GIDS.md      |  5 ++--
 man/systemd-nspawn.xml | 52 ++++++++++++++++++++++++++++++++++++++++++
 man/systemd.nspawn.xml | 10 ++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/docs/UIDS-GIDS.md b/docs/UIDS-GIDS.md
index e289a9b68e..5342ccd166 100644
--- a/docs/UIDS-GIDS.md
+++ b/docs/UIDS-GIDS.md
@@ -241,8 +241,9 @@ the artifacts the container manager persistently leaves in the system.
 |                     5 | `tty` group           | `systemd`     | `/etc/passwd`                 |
 |                 6…999 | System users          | Distributions | `/etc/passwd`                 |
 |            1000…60000 | Regular users         | Distributions | `/etc/passwd` + LDAP/NIS/…    |
-|           60001…60513 | Human Users (homed)   | `systemd`     | `nss-systemd`                 |
-|           60514…61183 | Unused                |               |                               |
+|           60001…60513 | Human users (homed)   | `systemd`     | `nss-systemd`                 |
+|           60514…60577 | Host users mapped into containers | `systemd` | `systemd-nspawn`           |
+|           60578…61183 | Unused                |               |                               |
 |           61184…65519 | Dynamic service users | `systemd`     | `nss-systemd`                 |
 |           65520…65533 | Unused                |               |                               |
 |                 65534 | `nobody` user         | Linux         | `/etc/passwd` + `nss-systemd` |
diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index 403636545a..e929d32f62 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -1352,6 +1352,58 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
         make them read-only, using <option>--bind-ro=</option>.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>--bind-user=</option></term>
+
+        <listitem><para>Binds the home directory of the specified user on the host into the container. Takes
+        the name of an existing user on the host as argument. May be used multiple times to bind multiple
+        users into the container. This does three things:</para>
+
+        <orderedlist>
+          <listitem><para>The user's home directory is bind mounted from the host into
+          <filename>/run/hosts/home/</filename>.</para></listitem>
+
+          <listitem><para>An additional UID/GID mapping is added that maps the host user's UID/GID to a
+          container UID/GID, allocated from the 60514…60577 range.</para></listitem>
+
+          <listitem><para>A JSON user and group record is generated in <filename>/run/userdb/</filename> that
+          describes the mapped user. It contains a minimized representation of the host's user record,
+          adjusted to the UID/GID and home directory path assigned to the user in the container. The
+          <citerefentry><refentrytitle>nss-systemd</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+          glibc NSS module will pick up these records from there and make them available in the container's
+          user/group databases.</para></listitem>
+        </orderedlist>
+
+        <para>The combination of the three operations above ensures that it is possible to log into the
+        host's user account inside the container as if it was local to the container. The user is only mapped
+        transiently, while the container is running and the mapping itself does not result in persistent
+        changes to the container (except maybe for generated log messages at login time, and similar). Note
+        that in particular the UID/GID assignment in the container is not made persistently. If the user is
+        mapped transiently, it is best to not allow the user to make persistent changes to the container. If
+        the user leaves files or directories owned by the user, and those UIDs/GIDs are recycled during later
+        container invocations (possibly with a different <option>--bind-user=</option> mapping), those files
+        and directories will be accessible to the "new" user.</para>
+
+        <para>The user/group record mapping only works if the container contains systemd 249 or newer, with
+        <command>nss-systemd</command> properly configured in <filename>nsswitch.conf</filename>. See
+        <citerefentry><refentrytitle>nss-systemd</refentrytitle><manvolnum>8</manvolnum></citerefentry> for
+        details.</para>
+
+        <para>Note that the user record propagated from the host into the container will contain the UNIX
+        password hash of the user, so that seamless logins in the container are possible. If the container is
+        less trusted than the host it's hence important to use a strong UNIX password hash function
+        (e.g. yescrypt or similar, with the <literal>$y$</literal> hash prefix).</para>
+
+        <para>When binding a user from the host into the container checks are executed to ensure that the
+        username is not yet known in the container. Moreover, it is checked that the UID/GID allocated for it
+        is not currently defined in the user/group databases of the container. Both checks directly access
+        the container's <filename>/etc/passwd</filename> and <filename>/etc/group</filename>, and thus might
+        not detect existing accounts in other databases.</para>
+
+        <para>This operation is only supported in combination with
+        <option>--private-users=</option>/<option>-U</option>.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><option>--inaccessible=</option></term>
 
diff --git a/man/systemd.nspawn.xml b/man/systemd.nspawn.xml
index 186616b6ad..7ba8e361b4 100644
--- a/man/systemd.nspawn.xml
+++ b/man/systemd.nspawn.xml
@@ -415,6 +415,16 @@
         is privileged (see above).</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>BindUser=</varname></term>
+
+        <listitem><para>Binds a user from the host into the container. This option is equivalent to the
+        command line switch <option>--bind-user=</option>, see
+        <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+        for details about the specific options supported. This setting is privileged (see
+        above).</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>TemporaryFileSystem=</varname></term>