mirror of
https://github.com/systemd/systemd.git
synced 2025-01-10 05:18:17 +03:00
nspawn: add filesystem id mapping support to --bind and --bind-ro
This commit is contained in:
parent
5433d425b4
commit
c0c8f71800
@ -1357,17 +1357,21 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
|
||||
source path is taken relative to the image's root directory. This permits setting up bind mounts within the
|
||||
container image. The source path may be specified as empty string, in which case a temporary directory below
|
||||
the host's <filename>/var/tmp/</filename> directory is used. It is automatically removed when the container is
|
||||
shut down. Mount options are comma-separated and currently, only <option>rbind</option> and
|
||||
<option>norbind</option> are allowed, controlling whether to create a recursive or a regular bind
|
||||
mount. Defaults to "rbind". Backslash escapes are interpreted, so <literal>\:</literal> may be used to embed
|
||||
colons in either path. This option may be specified multiple times for creating multiple independent bind
|
||||
mount points. The <option>--bind-ro=</option> option creates read-only bind mounts.</para>
|
||||
shut down. The <option>--bind-ro=</option> option creates read-only bind mounts. Backslash escapes are interpreted,
|
||||
so <literal>\:</literal> may be used to embed colons in either path. This option may be specified
|
||||
multiple times for creating multiple independent bind mount points.</para>
|
||||
|
||||
<para>Mount options are comma-separated. <option>rbind</option> and <option>norbind</option> control whether
|
||||
to create a recursive or a regular bind mount. Defaults to "rbind". <option>idmap</option> and <option>noidmap</option>
|
||||
control if the bind mount should use filesystem id mappings. Using this option requires support by the source filesystem
|
||||
for id mappings. Defaults to "noidmap".</para>
|
||||
|
||||
<para>Note that when this option is used in combination with <option>--private-users</option>, the resulting
|
||||
mount points will be owned by the <constant>nobody</constant> user. That's because the mount and its files and
|
||||
directories continue to be owned by the relevant host users and groups, which do not exist in the container,
|
||||
and thus show up under the wildcard UID 65534 (nobody). If such bind mounts are created, it is recommended to
|
||||
make them read-only, using <option>--bind-ro=</option>.</para></listitem>
|
||||
make them read-only, using <option>--bind-ro=</option>. Alternatively you can use the "idmap" mount option to
|
||||
map the filesystem ids.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -672,9 +672,10 @@ int mount_all(const char *dest,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
|
||||
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts, bool *idmapped) {
|
||||
unsigned long flags = *mount_flags;
|
||||
char *opts = NULL;
|
||||
bool flag_idmapped = *idmapped;
|
||||
int r;
|
||||
|
||||
assert(options);
|
||||
@ -692,29 +693,35 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl
|
||||
flags |= MS_REC;
|
||||
else if (streq(word, "norbind"))
|
||||
flags &= ~MS_REC;
|
||||
else if (streq(word, "idmap"))
|
||||
flag_idmapped = true;
|
||||
else if (streq(word, "noidmap"))
|
||||
flag_idmapped = false;
|
||||
else
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"Invalid bind mount option: %s", word);
|
||||
}
|
||||
|
||||
*mount_flags = flags;
|
||||
*idmapped = flag_idmapped;
|
||||
/* in the future mount_opts will hold string options for mount(2) */
|
||||
*mount_opts = opts;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mount_bind(const char *dest, CustomMount *m) {
|
||||
static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t uid_range) {
|
||||
_cleanup_free_ char *mount_opts = NULL, *where = NULL;
|
||||
unsigned long mount_flags = MS_BIND | MS_REC;
|
||||
struct stat source_st, dest_st;
|
||||
int r;
|
||||
bool idmapped = false;
|
||||
|
||||
assert(dest);
|
||||
assert(m);
|
||||
|
||||
if (m->options) {
|
||||
r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
|
||||
r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts, &idmapped);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
@ -767,6 +774,12 @@ static int mount_bind(const char *dest, CustomMount *m) {
|
||||
return log_error_errno(r, "Read-only bind mount failed: %m");
|
||||
}
|
||||
|
||||
if (idmapped) {
|
||||
r = remount_idmap(where, uid_shift, uid_range);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -906,6 +919,7 @@ int mount_custom(
|
||||
const char *dest,
|
||||
CustomMount *mounts, size_t n,
|
||||
uid_t uid_shift,
|
||||
uid_t uid_range,
|
||||
const char *selinux_apifs_context,
|
||||
MountSettingsMask mount_settings) {
|
||||
int r;
|
||||
@ -927,7 +941,7 @@ int mount_custom(
|
||||
switch (m->type) {
|
||||
|
||||
case CUSTOM_MOUNT_BIND:
|
||||
r = mount_bind(dest, m);
|
||||
r = mount_bind(dest, m, uid_shift, uid_range);
|
||||
break;
|
||||
|
||||
case CUSTOM_MOUNT_TMPFS:
|
||||
|
@ -58,7 +58,7 @@ int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
|
||||
int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
|
||||
int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
|
||||
|
||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, const char *selinux_apifs_context, MountSettingsMask mount_settings);
|
||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, MountSettingsMask mount_settings);
|
||||
bool has_custom_root_mount(const CustomMount *mounts, size_t n);
|
||||
|
||||
int setup_volatile_mode(const char *directory, VolatileMode mode, uid_t uid_shift, const char *selinux_apifs_context);
|
||||
|
@ -3314,6 +3314,7 @@ static int inner_child(
|
||||
arg_custom_mounts,
|
||||
arg_n_custom_mounts,
|
||||
0,
|
||||
0,
|
||||
arg_selinux_apifs_context,
|
||||
MOUNT_NON_ROOT_ONLY | MOUNT_IN_USERNS);
|
||||
if (r < 0)
|
||||
@ -3719,32 +3720,6 @@ static int outer_child(
|
||||
directory = "/run/systemd/nspawn-root";
|
||||
}
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO &&
|
||||
IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) &&
|
||||
arg_uid_shift != 0) {
|
||||
r = make_mount_point(directory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = remount_idmap(directory, arg_uid_shift, arg_uid_range);
|
||||
if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) {
|
||||
/* This might fail because the kernel or file system doesn't support idmapping. We
|
||||
* can't really distinguish this nicely, nor do we have any guarantees about the
|
||||
* error codes we see, could be EOPNOTSUPP or EINVAL. */
|
||||
if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"ID mapped mounts are apparently not available, sorry.");
|
||||
|
||||
log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing.");
|
||||
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
|
||||
} else if (r < 0)
|
||||
return log_error_errno(r, "Failed to set up ID mapped mounts: %m");
|
||||
else {
|
||||
log_debug("ID mapped mounts available, making use of them.");
|
||||
idmap = true;
|
||||
}
|
||||
}
|
||||
|
||||
r = setup_pivot_root(
|
||||
directory,
|
||||
arg_pivot_root_new,
|
||||
@ -3795,6 +3770,7 @@ static int outer_child(
|
||||
arg_custom_mounts,
|
||||
arg_n_custom_mounts,
|
||||
arg_uid_shift,
|
||||
arg_uid_range,
|
||||
arg_selinux_apifs_context,
|
||||
MOUNT_ROOT_ONLY);
|
||||
if (r < 0)
|
||||
@ -3805,6 +3781,29 @@ static int outer_child(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO &&
|
||||
IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) &&
|
||||
arg_uid_shift != 0) {
|
||||
|
||||
r = remount_idmap(directory, arg_uid_shift, arg_uid_range);
|
||||
if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) {
|
||||
/* This might fail because the kernel or file system doesn't support idmapping. We
|
||||
* can't really distinguish this nicely, nor do we have any guarantees about the
|
||||
* error codes we see, could be EOPNOTSUPP or EINVAL. */
|
||||
if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"ID mapped mounts are apparently not available, sorry.");
|
||||
|
||||
log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing.");
|
||||
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
|
||||
} else if (r < 0)
|
||||
return log_error_errno(r, "Failed to set up ID mapped mounts: %m");
|
||||
else {
|
||||
log_debug("ID mapped mounts available, making use of them.");
|
||||
idmap = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (dissected_image) {
|
||||
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
|
||||
r = dissected_image_mount(
|
||||
@ -3915,6 +3914,7 @@ static int outer_child(
|
||||
arg_custom_mounts,
|
||||
arg_n_custom_mounts,
|
||||
arg_uid_shift,
|
||||
arg_uid_range,
|
||||
arg_selinux_apifs_context,
|
||||
MOUNT_NON_ROOT_ONLY);
|
||||
if (r < 0)
|
||||
|
Loading…
Reference in New Issue
Block a user