1
0
mirror of https://github.com/systemd/systemd.git synced 2025-02-28 05:57:33 +03:00

Add support for id-mapped mounts to Exec directories (#34078)

Currently, bind-mounted directories within a user/mount namespace get
the uid/gid stored on their files. If the host creates a file in the
source directory, it will still show as root in the namespace.
Id-mapping is a filesystem feature that allows a mount namespace to show
a different uid than what is actually stored on a file. Add support for
id-mappings to exec directories, so that the files within the mount
namespace are owned by the unprivileged uid/gid.

Example:

Using unit:
```
[Unit]
Description=Sample service

[Service]
MountAPIVFS=yes
DynamicUser=yes
PrivateUsers=yes
TemporaryFileSystem=/run /var/opt /var/lib /vol
UMask=0000
ExecStart=/bin/bash -c 'while true; do echo "ping"; sleep 5; done'
StateDirectory=andresstatedir:sampleservice

[Install]
WantedBy=multi-user.target
```

In the host namespace, creating a file "test":
```
root@abeltran-test:/var/lib/andresstatedir# ls -lah
total 8.0K
drwxr-xr-x 2 root root 4.0K Aug 21 23:48 .
drwx------ 3 root root 4.0K Aug 21 23:47 ..
-rw-r--r-- 1 root root    0 Aug 21 23:48 test
```

Within the unit namespace:
```
root@abeltran-test:/var/lib/sampleservice# ls -lah
total 4.0K
drwxr-xr-x 2 63750 63750 4.0K Aug 21 23:48 .
drwxr-xr-x 3 root  root    60 Aug 21 23:47 ..
-rw-r--r-- 1 63750 63750    0 Aug 21 23:48 test
```
```
root@abeltran-test:/# mount | grep and
/dev/sda1 on /var/lib/private/andresstatedir type ext4 (rw,nosuid,noexec,relatime,idmapped,discard,errors=remount-ro,commit=30)
```
This commit is contained in:
Luca Boccassi 2024-11-02 12:04:49 +00:00 committed by GitHub
commit c7e818fc1a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 253 additions and 13 deletions

View File

@ -1476,6 +1476,13 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
below the locations defined in the following table. Also, the corresponding environment variable will
be defined with the full paths of the directories. If multiple directories are set, then in the
environment variable the paths are concatenated with colon (<literal>:</literal>).</para>
<para>If <varname>DynamicUser=</varname> is used, and if the kernel version supports
<ulink url="https://lwn.net/Articles/896255/">id-mapped mounts</ulink>, the specified directories will
be owned by "nobody" in the host namespace and will be mapped to (and will be owned by) the service's
UID/GID in its own namespace. For backward compatibility, existing directories created without id-mapped
mounts will be kept untouched.</para>
<table>
<title>Automatic directory creation and environment variables</title>
<tgroup cols='4'>

View File

@ -3,6 +3,9 @@
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#if WANT_LINUX_FS_H
#include <linux/fs.h>
#endif
#include "errno-util.h"
#include "fd-util.h"
@ -10,6 +13,8 @@
#include "missing_fs.h"
#include "missing_magic.h"
#include "missing_sched.h"
#include "missing_syscall.h"
#include "mountpoint-util.h"
#include "namespace-util.h"
#include "parse-util.h"
#include "process-util.h"
@ -502,3 +507,52 @@ int is_our_namespace(int fd, NamespaceType request_type) {
return stat_inode_same(&st_ours, &st_fd);
}
int is_idmapping_supported(const char *path) {
_cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
int r;
assert(path);
if (!mount_new_api_supported())
return false;
r = strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_NOBODY, UID_NOBODY, 1u);
if (r < 0)
return r;
r = strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", GID_NOBODY, GID_NOBODY, 1u);
if (r < 0)
return r;
userns_fd = userns_acquire(uid_map, gid_map);
if (ERRNO_IS_NEG_NOT_SUPPORTED(userns_fd))
return false;
if (userns_fd < 0)
return log_debug_errno(userns_fd, "ID-mapping supported namespace acquire failed for '%s' : %m", path);
dir_fd = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
if (ERRNO_IS_NEG_NOT_SUPPORTED(dir_fd) || dir_fd == -EINVAL)
return false;
if (dir_fd < 0)
return log_debug_errno(dir_fd, "ID-mapping supported open failed for '%s' : %m", path);
mount_fd = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
if (ERRNO_IS_NEG_NOT_SUPPORTED(mount_fd) || mount_fd == -EINVAL)
return false;
if (mount_fd < 0)
return log_debug_errno(mount_fd, "ID-mapping supported open_tree failed for '%s' : %m", path);
r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
&(struct mount_attr) {
.attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
.userns_fd = userns_fd,
}, sizeof(struct mount_attr)));
if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || r == -EINVAL || r == -EPERM)
return false;
if (r < 0)
return log_debug_errno(r, "ID-mapping supported setattr failed for '%s' : %m", path);
return true;
}

View File

@ -75,3 +75,5 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r
int namespace_open_by_type(NamespaceType type);
int is_our_namespace(int fd, NamespaceType type);
int is_idmapping_supported(const char *path);

View File

@ -2554,7 +2554,8 @@ static int setup_exec_directory(
/* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
* specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
* current UID/GID ownership.) */
r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
const char *target_dir = pp ?: p;
r = chmod_and_chown(target_dir, context->directories[type].mode, UID_INVALID, GID_INVALID);
if (r < 0)
goto fail;
@ -2563,12 +2564,51 @@ static int setup_exec_directory(
if (params->runtime_scope != RUNTIME_SCOPE_SYSTEM)
continue;
/* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
int idmapping_supported = is_idmapping_supported(target_dir);
if (idmapping_supported < 0) {
r = log_debug_errno(idmapping_supported, "Unable to determine if ID mapping is supported on mount '%s': %m", target_dir);
goto fail;
}
log_debug("ID-mapping is%ssupported for exec directory %s", idmapping_supported ? " " : " not ", target_dir);
/* Change the ownership of the whole tree, if necessary. When dynamic users are used we
* drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
* assignments to exist. */
r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
if (r < 0)
goto fail;
uid_t chown_uid = uid;
gid_t chown_gid = gid;
bool do_chown = false;
if (uid == 0 || gid == 0 || !idmapping_supported) {
do_chown = true;
i->idmapped = false;
} else {
/* Use 'nobody' uid/gid for exec directories if ID-mapping is supported. For backward compatibility,
* continue doing chmod/chown if the directory was chmod/chowned before (if uid/gid is not 'nobody') */
struct stat st;
r = RET_NERRNO(stat(target_dir, &st));
if (r < 0)
goto fail;
if (st.st_uid == UID_NOBODY && st.st_gid == GID_NOBODY) {
do_chown = false;
i->idmapped = true;
} else if (exec_directory_is_private(context, type) && st.st_uid == 0 && st.st_gid == 0) {
chown_uid = UID_NOBODY;
chown_gid = GID_NOBODY;
do_chown = true;
i->idmapped = true;
} else {
do_chown = true;
i->idmapped = false;
}
}
if (do_chown) {
r = path_chown_recursive(target_dir, chown_uid, chown_gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
if (r < 0)
goto fail;
}
}
/* If we are not going to run in a namespace, set up the symlinks - otherwise
@ -2620,6 +2660,8 @@ static int setup_smack(
static int compile_bind_mounts(
const ExecContext *context,
const ExecParameters *params,
uid_t exec_directory_uid, /* only used for id-mapped mounts Exec directories */
gid_t exec_directory_gid, /* only used for id-mapped mounts Exec directories */
BindMount **ret_bind_mounts,
size_t *ret_n_bind_mounts,
char ***ret_empty_directories) {
@ -2721,6 +2763,9 @@ static int compile_bind_mounts(
.nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
.recursive = true,
.read_only = FLAGS_SET(i->flags, EXEC_DIRECTORY_READ_ONLY),
.idmapped = i->idmapped,
.uid = exec_directory_uid,
.gid = exec_directory_gid,
};
}
}
@ -3057,7 +3102,9 @@ static int apply_mount_namespace(
ExecRuntime *runtime,
const char *memory_pressure_path,
bool needs_sandboxing,
char **reterr_path) {
char **reterr_path,
uid_t exec_directory_uid,
gid_t exec_directory_gid) {
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
@ -3095,7 +3142,7 @@ static int apply_mount_namespace(
return r;
}
r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
r = compile_bind_mounts(context, params, exec_directory_uid, exec_directory_gid, &bind_mounts, &n_bind_mounts, &empty_directories);
if (r < 0)
return r;
@ -4886,7 +4933,9 @@ int exec_invoke(
runtime,
memory_pressure_path,
needs_sandboxing,
&error_path);
&error_path,
uid,
gid);
if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",

View File

@ -163,6 +163,7 @@ typedef struct ExecDirectoryItem {
char *path;
char **symlinks;
ExecDirectoryFlags flags;
bool idmapped;
} ExecDirectoryItem;
typedef struct ExecDirectory {

View File

@ -113,6 +113,9 @@ typedef struct MountEntry {
LIST_HEAD(MountOptions, image_options_const);
char **overlay_layers;
VeritySettings verity;
bool idmapped;
uid_t idmap_uid;
gid_t idmap_gid;
} MountEntry;
typedef struct MountList {
@ -467,6 +470,9 @@ static int append_bind_mounts(MountList *ml, const BindMount *binds, size_t n) {
.flags = b->nodev ? MS_NODEV : 0,
.source_const = b->source,
.ignore = b->ignore_enoent,
.idmapped = b->idmapped,
.idmap_uid = b->uid,
.idmap_gid = b->gid,
};
}
@ -1892,6 +1898,45 @@ static int apply_one_mount(
}
log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
/* Take care of id-mapped mounts */
if (m->idmapped && uid_is_valid(m->idmap_uid) && gid_is_valid(m->idmap_gid)) {
_cleanup_close_ int userns_fd = -EBADF;
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
log_debug("Setting an id-mapped mount on %s", mount_entry_path(m));
/* Do mapping from nobody (in setup_exec_directory()) -> this uid */
if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_NOBODY, (uid_t)m->idmap_uid, (uid_t)1u) < 0)
return log_oom();
/* Consider StateDirectory=xxx aaa xxx:aaa/222
* To allow for later symlink creation (by root) in create_symlinks_from_tuples(), map root as well. */
if (m->idmap_uid != (uid_t)0) {
if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", (uid_t)0, (uid_t)0, (uid_t)1u) < 0)
return log_oom();
}
if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", GID_NOBODY, (gid_t)m->idmap_gid, (gid_t)1u) < 0)
return log_oom();
if (m->idmap_gid != (gid_t)0) {
if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", (gid_t)0, (gid_t)0, (gid_t)1u) < 0)
return log_oom();
}
userns_fd = userns_acquire(uid_map, gid_map);
if (userns_fd < 0)
return log_error_errno(userns_fd, "Failed to allocate user namespace: %m");
/* Drop SUID, add NOEXEC for the mount to avoid root exploits */
r = remount_idmap_fd(STRV_MAKE(mount_entry_path(m)), userns_fd, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV);
if (r < 0)
return log_error_errno(r, "Failed to create an id-mapped mount: %m");
log_debug("ID-mapped mount created successfully for %s from " UID_FMT " to " UID_FMT "", mount_entry_path(m), UID_NOBODY, m->idmap_uid);
}
return 1;
}

View File

@ -87,6 +87,9 @@ struct BindMount {
bool noexec;
bool recursive;
bool ignore_enoent;
bool idmapped;
uid_t uid;
gid_t gid;
};
struct TemporaryFileSystem {

View File

@ -2089,7 +2089,7 @@ static int mount_partition(
(void) fs_grow(node, -EBADF, p);
if (userns_fd >= 0) {
r = remount_idmap_fd(STRV_MAKE(p), userns_fd);
r = remount_idmap_fd(STRV_MAKE(p), userns_fd, /* extra_mount_attr_set= */ 0);
if (r < 0)
return r;
}

View File

@ -1386,7 +1386,8 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest
int remount_idmap_fd(
char **paths,
int userns_fd) {
int userns_fd,
uint64_t extra_mount_attr_set) {
int r;
@ -1423,7 +1424,7 @@ int remount_idmap_fd(
/* Set the user namespace mapping attribute on the cloned mount point */
if (mount_setattr(mntfd, "", AT_EMPTY_PATH,
&(struct mount_attr) {
.attr_set = MOUNT_ATTR_IDMAP,
.attr_set = MOUNT_ATTR_IDMAP | extra_mount_attr_set,
.userns_fd = userns_fd,
}, sizeof(struct mount_attr)) < 0)
return log_debug_errno(errno, "Failed to change bind mount attributes for clone of '%s': %m", paths[i]);
@ -1460,7 +1461,7 @@ int remount_idmap(
if (userns_fd < 0)
return userns_fd;
return remount_idmap_fd(p, userns_fd);
return remount_idmap_fd(p, userns_fd, /* extra_mount_attr_set= */ 0);
}
static void sub_mount_clear(SubMount *s) {

View File

@ -163,7 +163,7 @@ typedef enum RemountIdmapping {
} RemountIdmapping;
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int remount_idmap_fd(char **p, int userns_fd);
int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set);
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int bind_mount_submounts(

View File

@ -205,6 +205,14 @@ TEST(protect_kernel_logs) {
assert_se(wait_for_terminate_and_check("ns-kernellogs", pid, WAIT_LOG) == EXIT_SUCCESS);
}
TEST(idmapping_supported) {
assert_se(is_idmapping_supported("/run") >= 0);
assert_se(is_idmapping_supported("/var/lib") >= 0);
assert_se(is_idmapping_supported("/var/cache") >= 0);
assert_se(is_idmapping_supported("/var/log") >= 0);
assert_se(is_idmapping_supported("/etc") >= 0);
}
static int intro(void) {
if (!have_namespaces())
return log_tests_skipped("Don't have namespace support");

View File

@ -162,6 +162,71 @@ EOF
systemctl start testservice-34-check-writable.service
}
test_check_idmapped_mounts() {
rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
[Unit]
Description=Check id-mapped directories when DynamicUser=yes with StateDirectory
[Service]
# Relevant only for sanitizer runs
EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
Type=oneshot
MountAPIVFS=yes
DynamicUser=yes
PrivateUsers=yes
TemporaryFileSystem=/run /var/opt /var/lib /vol
UMask=0000
StateDirectory=testidmapped:sampleservice
ExecStart=/bin/bash -c ' \
set -eux; \
set -o pipefail; \
touch /var/lib/sampleservice/testfile; \
[[ $(awk "NR==2 {print \$1}" /proc/self/uid_map) == $(stat -c "%%u" /var/lib/private/testidmapped/testfile) ]]; \
'
EOF
systemctl daemon-reload
systemctl start testservice-34-check-idmapped.service
[[ $(stat -c "%u" /var/lib/private/testidmapped/testfile) == 65534 ]]
}
test_check_idmapped_mounts_root() {
rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
[Unit]
Description=Check id-mapped directories when DynamicUser=no with StateDirectory
[Service]
# Relevant only for sanitizer runs
EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
Type=oneshot
MountAPIVFS=yes
User=root
DynamicUser=no
PrivateUsers=no
TemporaryFileSystem=/run /var/opt /var/lib /vol
UMask=0000
StateDirectory=testidmapped:sampleservice
ExecStart=/bin/bash -c ' \
set -eux; \
set -o pipefail; \
touch /var/lib/sampleservice/testfile; \
[[ 0 == $(stat -c "%%u" /var/lib/testidmapped/testfile) ]]; \
'
EOF
systemctl daemon-reload
systemctl start testservice-34-check-idmapped.service
[[ $(stat -c "%u" /var/lib/testidmapped/testfile) == 0 ]]
}
test_directory "StateDirectory" "/var/lib"
test_directory "RuntimeDirectory" "/run"
test_directory "CacheDirectory" "/var/cache"
@ -169,6 +234,11 @@ test_directory "LogsDirectory" "/var/log"
test_check_writable
if systemd-analyze compare-versions "$(uname -r)" ge 5.12; then
test_check_idmapped_mounts
test_check_idmapped_mounts_root
fi
systemd-analyze log-level info
touch /testok