mirror of
https://github.com/systemd/systemd.git
synced 2025-03-31 14:50:15 +03:00
Merge pull request #19438 from poettering/nspawn-uidmap
nspawn: add support for kernel 5.12 ID mapping mounts
This commit is contained in:
commit
ebf940e1e9
9
TODO
9
TODO
@ -36,12 +36,15 @@ Features:
|
||||
* journald: support RFC3164 fully for the incoming syslog transport, see
|
||||
https://github.com/systemd/systemd/issues/19251#issuecomment-816601955
|
||||
|
||||
* nspawn: support uid mapping bind mounts, as defined available in kernel 5.12,
|
||||
for all our disk image needs
|
||||
|
||||
* homed: if kernel 5.12 uid mapping mounts exist, use that instead of recursive
|
||||
chowns.
|
||||
|
||||
* DynamicUser= + StateDirectory= → use uid mapping mounts, too, in order to
|
||||
make dirs appear under right UID.
|
||||
|
||||
* nspawn: make --bind= work sanely with --private-users when uid mapping mounts
|
||||
are used.
|
||||
|
||||
* cryptsetup: tweak tpm2-device=auto logic, abort quickly if firmware tells us
|
||||
there isn't any TPM2 device anyway. that way, we'll wait for the TPM2 device
|
||||
to show up only if registered in LUKS header + the firmware suggests there is
|
||||
|
@ -696,32 +696,41 @@
|
||||
number of host UIDs/GIDs to assign to the container. If the second parameter is omitted, 65536 UIDs/GIDs are
|
||||
assigned.</para></listitem>
|
||||
|
||||
<listitem><para>If the parameter is omitted, or true, user namespacing is turned on. The UID/GID range to
|
||||
use is determined automatically from the file ownership of the root directory of the container's directory
|
||||
tree. To use this option, make sure to prepare the directory tree in advance, and ensure that all files and
|
||||
directories in it are owned by UIDs/GIDs in the range you'd like to use. Also, make sure that used file ACLs
|
||||
exclusively reference UIDs/GIDs in the appropriate range. If this mode is used the number of UIDs/GIDs
|
||||
assigned to the container for use is 65536, and the UID/GID of the root directory must be a multiple of
|
||||
65536.</para></listitem>
|
||||
<listitem><para>If the parameter is <literal>yes</literal>, user namespacing is turned on. The
|
||||
UID/GID range to use is determined automatically from the file ownership of the root directory of
|
||||
the container's directory tree. To use this option, make sure to prepare the directory tree in
|
||||
advance, and ensure that all files and directories in it are owned by UIDs/GIDs in the range you'd
|
||||
like to use. Also, make sure that used file ACLs exclusively reference UIDs/GIDs in the appropriate
|
||||
range. In this mode, the number of UIDs/GIDs assigned to the container is 65536, and the owner
|
||||
UID/GID of the root directory must be a multiple of 65536.</para></listitem>
|
||||
|
||||
<listitem><para>If the parameter is false, user namespacing is turned off. This is the default.</para>
|
||||
<listitem><para>If the parameter is <literal>no</literal>, user namespacing is turned off. This is
|
||||
the default.</para>
|
||||
</listitem>
|
||||
|
||||
<listitem><para>The special value <literal>pick</literal> turns on user namespacing. In this case the UID/GID
|
||||
range is automatically chosen. As first step, the file owner of the root directory of the container's
|
||||
directory tree is read, and it is checked that it is currently not used by the system otherwise (in
|
||||
particular, that no other container is using it). If this check is successful, the UID/GID range determined
|
||||
this way is used, similar to the behavior if "yes" is specified. If the check is not successful (and thus
|
||||
the UID/GID range indicated in the root directory's file owner is already used elsewhere) a new – currently
|
||||
unused – UID/GID range of 65536 UIDs/GIDs is randomly chosen between the host UID/GIDs of 524288 and
|
||||
1878982656, always starting at a multiple of 65536, and, if possible, consistently hashed from the machine
|
||||
name. This setting implies
|
||||
<option>--private-users-chown</option> (see below), which has the effect that the files and directories in
|
||||
the container's directory tree will be owned by the appropriate users of the range picked. Using this option
|
||||
makes user namespace behavior fully automatic. Note that the first invocation of a previously unused
|
||||
container image might result in picking a new UID/GID range for it, and thus in the (possibly expensive) file
|
||||
ownership adjustment operation. However, subsequent invocations of the container will be cheap (unless of
|
||||
course the picked UID/GID range is assigned to a different use by then).</para></listitem>
|
||||
<listitem><para>If the parameter is <literal>identity</literal>, user namespacing is employed with
|
||||
an identity mapping for the first 65536 UIDs/GIDs. This is mostly equivalent to
|
||||
<option>--private-users=0:65536</option>. While it does not provide UID/GID isolation, since all
|
||||
host and container UIDs/GIDs are chosen identically it does provide process capability isolation,
|
||||
and hence is often a good choice if proper user namespacing with distinct UID maps is not
|
||||
appropriate.</para></listitem>
|
||||
|
||||
<listitem><para>The special value <literal>pick</literal> turns on user namespacing. In this case
|
||||
the UID/GID range is automatically chosen. As first step, the file owner UID/GID of the root
|
||||
directory of the container's directory tree is read, and it is checked that no other container is
|
||||
currently using it. If this check is successful, the UID/GID range determined this way is used,
|
||||
similar to the behavior if <literal>yes</literal> is specified. If the check is not successful (and
|
||||
thus the UID/GID range indicated in the root directory's file owner is already used elsewhere) a
|
||||
new – currently unused – UID/GID range of 65536 UIDs/GIDs is randomly chosen between the host
|
||||
UID/GIDs of 524288 and 1878982656, always starting at a multiple of 65536, and, if possible,
|
||||
consistently hashed from the machine name. This setting implies
|
||||
<option>--private-users-ownership=auto</option> (see below), which possibly has the effect that the
|
||||
files and directories in the container's directory tree will be owned by the appropriate users of
|
||||
the range picked. Using this option makes user namespace behavior fully automatic. Note that the
|
||||
first invocation of a previously unused container image might result in picking a new UID/GID range
|
||||
for it, and thus in the (possibly expensive) file ownership adjustment operation. However,
|
||||
subsequent invocations of the container will be cheap (unless of course the picked UID/GID range is
|
||||
assigned to a different use by then).</para></listitem>
|
||||
</orderedlist>
|
||||
|
||||
<para>It is recommended to assign at least 65536 UIDs/GIDs to each container, so that the usable UID/GID range in the
|
||||
@ -747,31 +756,44 @@
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--private-users-chown</option></term>
|
||||
<term><option>--private-users-ownership=</option></term>
|
||||
|
||||
<listitem><para>If specified, all files and directories in the container's directory tree will be
|
||||
adjusted so that they are owned by the appropriate UIDs/GIDs selected for the container (see above).
|
||||
This operation is potentially expensive, as it involves iterating through the full directory tree of
|
||||
the container. Besides actual file ownership, file ACLs are adjusted as well.</para>
|
||||
<listitem><para>Controls how to adjust the container image's UIDs and GIDs to match the UID/GID range
|
||||
chosen with <option>--private-users=</option>, see above. Takes one of <literal>off</literal> (to
|
||||
leave the image as is), <literal>chown</literal> (to recursively <function>chown()</function> the
|
||||
container's directory tree as needed), <literal>map</literal> (in order to use transparent ID mapping
|
||||
mounts) or <literal>auto</literal> for automatically using <literal>map</literal> where available and
|
||||
<literal>chown</literal> where not.</para>
|
||||
|
||||
<para>This option is implied if <option>--private-users=pick</option> is used. This option has no effect if
|
||||
user namespacing is not used.</para></listitem>
|
||||
<para>If <literal>chown</literal> is selected, all files and directories in the container's directory
|
||||
tree will be adjusted so that they are owned by the appropriate UIDs/GIDs selected for the container
|
||||
(see above). This operation is potentially expensive, as it involves iterating through the full
|
||||
directory tree of the container. Besides actual file ownership, file ACLs are adjusted as
|
||||
well.</para>
|
||||
|
||||
<para>Typically <literal>map</literal> is the best choice, since it transparently maps UIDs/GIDs in
|
||||
memory as needed without modifying the image, and without requiring an expensive recursive adjustment
|
||||
operation. However, it is not available for all file systems, currently.</para>
|
||||
|
||||
<para>The <option>--private-users-ownership=auto</option> option is implied if
|
||||
<option>--private-users=pick</option> is used. This option has no effect if user namespacing is not
|
||||
used.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-U</option></term>
|
||||
|
||||
<listitem><para>If the kernel supports the user namespaces feature, equivalent to
|
||||
<option>--private-users=pick --private-users-chown</option>, otherwise equivalent to
|
||||
<option>--private-users=pick --private-users-ownership=auto</option>, otherwise equivalent to
|
||||
<option>--private-users=no</option>.</para>
|
||||
|
||||
<para>Note that <option>-U</option> is the default if the
|
||||
<filename>systemd-nspawn@.service</filename> template unit file is used.</para>
|
||||
|
||||
<para>Note: it is possible to undo the effect of <option>--private-users-chown</option> (or
|
||||
<para>Note: it is possible to undo the effect of <option>--private-users-ownership=chown</option> (or
|
||||
<option>-U</option>) on the file system by redoing the operation with the first UID of 0:</para>
|
||||
|
||||
<programlisting>systemd-nspawn … --private-users=0 --private-users-chown</programlisting>
|
||||
<programlisting>systemd-nspawn … --private-users=0 --private-users-ownership=chown</programlisting>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
@ -452,12 +452,12 @@
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>PrivateUsersChown=</varname></term>
|
||||
<term><varname>PrivateUsersOwnership=</varname></term>
|
||||
|
||||
<listitem><para>Configures whether the ownership of the files and directories in the container tree shall be
|
||||
adjusted to the UID/GID range used, if necessary and user namespacing is enabled. This is equivalent to the
|
||||
<option>--private-users-chown</option> command line switch. This option is privileged (see
|
||||
above). </para></listitem>
|
||||
<listitem><para>Configures whether the ownership of the files and directories in the container tree
|
||||
shall be adjusted to the UID/GID range used, if necessary and user namespacing is enabled. This is
|
||||
equivalent to the <option>--private-users-ownership=</option> command line switch. This option is
|
||||
privileged (see above).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
@ -63,14 +63,14 @@ _systemd_nspawn() {
|
||||
|
||||
local -A OPTS=(
|
||||
[STANDALONE]='-h --help --version --private-network -b --boot --read-only -q --quiet --share-system
|
||||
--keep-unit -n --network-veth -j -x --ephemeral -a --as-pid2 --private-users-chown -U'
|
||||
--keep-unit -n --network-veth -j -x --ephemeral -a --as-pid2 -U'
|
||||
[ARG]='-D --directory -u --user --uuid --capability --drop-capability --link-journal --bind --bind-ro
|
||||
-M --machine -S --slice -E --setenv -Z --selinux-context -L --selinux-apifs-context
|
||||
--register --network-interface --network-bridge --personality -i --image --tmpfs
|
||||
--volatile --network-macvlan --kill-signal --template --notify-ready --root-hash --chdir
|
||||
--pivot-root --property --private-users --network-namespace-path --network-ipvlan
|
||||
--network-veth-extra --network-zone -p --port --system-call-filter --overlay --overlay-ro
|
||||
--settings --rlimit --hostname --no-new-privileges --oom-score-adjust --cpu-affinity
|
||||
--pivot-root --property --private-users --private-users-ownership --network-namespace-path
|
||||
--network-ipvlan --network-veth-extra --network-zone -p --port --system-call-filter --overlay
|
||||
--overlay-ro --settings --rlimit --hostname --no-new-privileges --oom-score-adjust --cpu-affinity
|
||||
--resolv-conf --timezone --root-hash-sig'
|
||||
)
|
||||
|
||||
|
@ -9,3 +9,18 @@ int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int
|
||||
int fd_is_ns(int fd, unsigned long nsflag);
|
||||
|
||||
int detach_mount_namespace(void);
|
||||
|
||||
static inline bool userns_shift_range_valid(uid_t shift, uid_t range) {
|
||||
/* Checks that the specified userns range makes sense, i.e. contains at least one UID, and the end
|
||||
* doesn't overflow uid_t. */
|
||||
|
||||
assert_cc((uid_t) -1 > 0); /* verify that uid_t is unsigned */
|
||||
|
||||
if (range <= 0)
|
||||
return false;
|
||||
|
||||
if (shift > (uid_t) -1 - range)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1306,8 +1306,10 @@ int safe_fork_full(
|
||||
saved_ssp = &saved_ss;
|
||||
}
|
||||
|
||||
if (flags & FORK_NEW_MOUNTNS)
|
||||
pid = raw_clone(SIGCHLD|CLONE_NEWNS);
|
||||
if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS)) != 0)
|
||||
pid = raw_clone(SIGCHLD|
|
||||
(FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) |
|
||||
(FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0));
|
||||
else
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
|
@ -165,6 +165,7 @@ typedef enum ForkFlags {
|
||||
FORK_RLIMIT_NOFILE_SAFE = 1 << 10, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
|
||||
FORK_STDOUT_TO_STDERR = 1 << 11, /* Make stdout a copy of stderr */
|
||||
FORK_FLUSH_STDIO = 1 << 12, /* fflush() stdout (and stderr) before forking */
|
||||
FORK_NEW_USERNS = 1 << 13, /* Run child in its own user namespace */
|
||||
} ForkFlags;
|
||||
|
||||
int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
|
||||
|
@ -2170,7 +2170,7 @@ int setup_namespace(
|
||||
|
||||
if (root_image) {
|
||||
/* A root image is specified, mount it to the right place */
|
||||
r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags);
|
||||
r = dissected_image_mount(dissected_image, root, UID_INVALID, UID_INVALID, dissect_image_flags);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to mount root image: %m");
|
||||
goto finish;
|
||||
|
@ -582,7 +582,7 @@ static int action_mount(DissectedImage *m, LoopDevice *d) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = dissected_image_mount_and_warn(m, arg_path, UID_INVALID, arg_flags);
|
||||
r = dissected_image_mount_and_warn(m, arg_path, UID_INVALID, UID_INVALID, arg_flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -628,7 +628,7 @@ static int action_copy(DissectedImage *m, LoopDevice *d) {
|
||||
|
||||
created_dir = TAKE_PTR(temp);
|
||||
|
||||
r = dissected_image_mount_and_warn(m, created_dir, UID_INVALID, arg_flags);
|
||||
r = dissected_image_mount_and_warn(m, created_dir, UID_INVALID, UID_INVALID, arg_flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -67,7 +67,8 @@ Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
|
||||
Files.Inaccessible, config_parse_inaccessible, 0, 0
|
||||
Files.Overlay, config_parse_overlay, 0, 0
|
||||
Files.OverlayReadOnly, config_parse_overlay, 1, 0
|
||||
Files.PrivateUsersChown, config_parse_tristate, 0, offsetof(Settings, userns_chown)
|
||||
Files.PrivateUsersChown, config_parse_userns_chown, 0, offsetof(Settings, userns_ownership)
|
||||
Files.PrivateUsersOwnership, config_parse_userns_ownership, 0, offsetof(Settings, userns_ownership)
|
||||
Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
|
||||
Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
|
||||
Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "conf-parser.h"
|
||||
#include "cpu-set-util.h"
|
||||
#include "hostname-util.h"
|
||||
#include "namespace-util.h"
|
||||
#include "nspawn-network.h"
|
||||
#include "nspawn-settings.h"
|
||||
#include "parse-util.h"
|
||||
@ -33,7 +34,7 @@ Settings *settings_new(void) {
|
||||
.timezone = _TIMEZONE_MODE_INVALID,
|
||||
|
||||
.userns_mode = _USER_NAMESPACE_MODE_INVALID,
|
||||
.userns_chown = -1,
|
||||
.userns_ownership = _USER_NAMESPACE_OWNERSHIP_INVALID,
|
||||
.uid_shift = UID_INVALID,
|
||||
.uid_range = UID_INVALID,
|
||||
|
||||
@ -84,12 +85,9 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
|
||||
/* Make sure that if userns_mode is set, userns_chown is set to something appropriate, and vice versa. Either
|
||||
* both fields shall be initialized or neither. */
|
||||
if (s->userns_mode == USER_NAMESPACE_PICK)
|
||||
s->userns_chown = true;
|
||||
else if (s->userns_mode != _USER_NAMESPACE_MODE_INVALID && s->userns_chown < 0)
|
||||
s->userns_chown = false;
|
||||
|
||||
if (s->userns_chown >= 0 && s->userns_mode == _USER_NAMESPACE_MODE_INVALID)
|
||||
if (s->userns_mode >= 0 && s->userns_ownership < 0)
|
||||
s->userns_ownership = s->userns_mode == USER_NAMESPACE_PICK ? USER_NAMESPACE_OWNERSHIP_CHOWN : USER_NAMESPACE_OWNERSHIP_OFF;
|
||||
if (s->userns_ownership >= 0 && s->userns_mode < 0)
|
||||
s->userns_mode = USER_NAMESPACE_NO;
|
||||
|
||||
*ret = TAKE_PTR(s);
|
||||
@ -614,7 +612,7 @@ int config_parse_private_users(
|
||||
range++;
|
||||
|
||||
r = safe_atou32(range, &rn);
|
||||
if (r < 0 || rn <= 0) {
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "UID/GID range invalid, ignoring: %s", range);
|
||||
return 0;
|
||||
}
|
||||
@ -629,6 +627,11 @@ int config_parse_private_users(
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!userns_shift_range_valid(sh, rn)) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, 0, "UID/GID shift and range combination invalid, ignoring: %s", range);
|
||||
return 0;
|
||||
}
|
||||
|
||||
settings->userns_mode = USER_NAMESPACE_FIXED;
|
||||
settings->uid_shift = sh;
|
||||
settings->uid_range = rn;
|
||||
@ -863,3 +866,44 @@ static const char *const timezone_mode_table[_TIMEZONE_MODE_MAX] = {
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(timezone_mode, TimezoneMode, TIMEZONE_AUTO);
|
||||
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_userns_ownership, user_namespace_ownership, UserNamespaceOwnership, "Failed to parse user namespace ownership mode");
|
||||
|
||||
static const char *const user_namespace_ownership_table[_USER_NAMESPACE_OWNERSHIP_MAX] = {
|
||||
[USER_NAMESPACE_OWNERSHIP_OFF] = "off",
|
||||
[USER_NAMESPACE_OWNERSHIP_CHOWN] = "chown",
|
||||
[USER_NAMESPACE_OWNERSHIP_MAP] = "map",
|
||||
[USER_NAMESPACE_OWNERSHIP_AUTO] = "auto",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(user_namespace_ownership, UserNamespaceOwnership);
|
||||
|
||||
int config_parse_userns_chown(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
UserNamespaceOwnership *ownership = data;
|
||||
int r;
|
||||
|
||||
assert(rvalue);
|
||||
assert(ownership);
|
||||
|
||||
/* Compatibility support for UserNamespaceChown=, whose job has been taken over by UserNamespaceOwnership= */
|
||||
|
||||
r = parse_boolean(rvalue);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse user namespace ownership mode, ignoring: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*ownership = r ? USER_NAMESPACE_OWNERSHIP_CHOWN : USER_NAMESPACE_OWNERSHIP_OFF;
|
||||
return 0;
|
||||
}
|
||||
|
@ -36,6 +36,15 @@ typedef enum UserNamespaceMode {
|
||||
_USER_NAMESPACE_MODE_INVALID = -EINVAL,
|
||||
} UserNamespaceMode;
|
||||
|
||||
typedef enum UserNamespaceOwnership {
|
||||
USER_NAMESPACE_OWNERSHIP_OFF,
|
||||
USER_NAMESPACE_OWNERSHIP_CHOWN,
|
||||
USER_NAMESPACE_OWNERSHIP_MAP,
|
||||
USER_NAMESPACE_OWNERSHIP_AUTO,
|
||||
_USER_NAMESPACE_OWNERSHIP_MAX,
|
||||
_USER_NAMESPACE_OWNERSHIP_INVALID = -1,
|
||||
} UserNamespaceOwnership;
|
||||
|
||||
typedef enum ResolvConfMode {
|
||||
RESOLV_CONF_OFF,
|
||||
RESOLV_CONF_COPY_HOST, /* /etc/resolv.conf */
|
||||
@ -185,7 +194,7 @@ typedef struct Settings {
|
||||
VolatileMode volatile_mode;
|
||||
CustomMount *custom_mounts;
|
||||
size_t n_custom_mounts;
|
||||
int userns_chown;
|
||||
UserNamespaceOwnership userns_ownership;
|
||||
|
||||
/* [Network] */
|
||||
int private_network;
|
||||
@ -255,6 +264,8 @@ CONFIG_PARSER_PROTOTYPE(config_parse_cpu_affinity);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_resolv_conf);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_link_journal);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_userns_chown);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_userns_ownership);
|
||||
|
||||
const char *resolv_conf_mode_to_string(ResolvConfMode a) _const_;
|
||||
ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
|
||||
@ -262,6 +273,9 @@ ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
|
||||
const char *timezone_mode_to_string(TimezoneMode a) _const_;
|
||||
TimezoneMode timezone_mode_from_string(const char *s) _pure_;
|
||||
|
||||
const char *user_namespace_ownership_to_string(UserNamespaceOwnership a) _const_;
|
||||
UserNamespaceOwnership user_namespace_ownership_from_string(const char *s) _pure_;
|
||||
|
||||
int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try);
|
||||
|
||||
void device_node_array_free(DeviceNode *node, size_t n);
|
||||
|
@ -194,7 +194,7 @@ static char **arg_property = NULL;
|
||||
static sd_bus_message *arg_property_message = NULL;
|
||||
static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO;
|
||||
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
|
||||
static bool arg_userns_chown = false;
|
||||
static UserNamespaceOwnership arg_userns_ownership = _USER_NAMESPACE_OWNERSHIP_INVALID;
|
||||
static int arg_kill_signal = 0;
|
||||
static CGroupUnified arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_UNKNOWN;
|
||||
static SettingsMask arg_settings_mask = 0;
|
||||
@ -352,7 +352,9 @@ static int help(void) {
|
||||
" -U --private-users=pick Run within user namespace, autoselect UID/GID range\n"
|
||||
" --private-users[=UIDBASE[:NUIDS]]\n"
|
||||
" Similar, but with user configured UID/GID range\n"
|
||||
" --private-users-chown Adjust OS tree ownership to private UID/GID range\n\n"
|
||||
" --private-users-ownership=MODE\n"
|
||||
" Adjust ('chown') or map ('map') OS tree ownership\n"
|
||||
" to private UID/GID range\n\n"
|
||||
"%3$sNetworking:%4$s\n"
|
||||
" --private-network Disable network in container\n"
|
||||
" --network-interface=INTERFACE\n"
|
||||
@ -449,10 +451,10 @@ static int custom_mount_check_all(void) {
|
||||
CustomMount *m = &arg_custom_mounts[i];
|
||||
|
||||
if (path_equal(m->destination, "/") && arg_userns_mode != USER_NAMESPACE_NO) {
|
||||
if (arg_userns_chown)
|
||||
if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_OFF)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"--private-users-chown may not be combined with custom root mounts.");
|
||||
else if (arg_uid_shift == UID_INVALID)
|
||||
"--private-users-ownership=own not be combined with custom root mounts.");
|
||||
if (arg_uid_shift == UID_INVALID)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"--private-users with automatic UID shift may not be combined with custom root mounts.");
|
||||
}
|
||||
@ -685,6 +687,7 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
ARG_CHDIR,
|
||||
ARG_PIVOT_ROOT,
|
||||
ARG_PRIVATE_USERS_CHOWN,
|
||||
ARG_PRIVATE_USERS_OWNERSHIP,
|
||||
ARG_NOTIFY_READY,
|
||||
ARG_ROOT_HASH,
|
||||
ARG_ROOT_HASH_SIG,
|
||||
@ -752,7 +755,8 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
{ "port", required_argument, NULL, 'p' },
|
||||
{ "property", required_argument, NULL, ARG_PROPERTY },
|
||||
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
|
||||
{ "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
|
||||
{ "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN }, /* obsolete */
|
||||
{ "private-users-ownership",required_argument, NULL, ARG_PRIVATE_USERS_OWNERSHIP},
|
||||
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
|
||||
{ "settings", required_argument, NULL, ARG_SETTINGS },
|
||||
{ "chdir", required_argument, NULL, ARG_CHDIR },
|
||||
@ -1195,29 +1199,41 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
break;
|
||||
|
||||
case ARG_PRIVATE_USERS: {
|
||||
int boolean = -1;
|
||||
int boolean;
|
||||
|
||||
if (!optarg)
|
||||
boolean = true;
|
||||
else if (!in_charset(optarg, DIGITS))
|
||||
/* do *not* parse numbers as booleans */
|
||||
boolean = parse_boolean(optarg);
|
||||
else
|
||||
boolean = -1;
|
||||
|
||||
if (boolean == false) {
|
||||
if (boolean == 0) {
|
||||
/* no: User namespacing off */
|
||||
arg_userns_mode = USER_NAMESPACE_NO;
|
||||
arg_uid_shift = UID_INVALID;
|
||||
arg_uid_range = UINT32_C(0x10000);
|
||||
} else if (boolean == true) {
|
||||
} else if (boolean > 0) {
|
||||
/* yes: User namespacing on, UID range is read from root dir */
|
||||
arg_userns_mode = USER_NAMESPACE_FIXED;
|
||||
arg_uid_shift = UID_INVALID;
|
||||
arg_uid_range = UINT32_C(0x10000);
|
||||
} else if (streq(optarg, "pick")) {
|
||||
/* pick: User namespacing on, UID range is picked randomly */
|
||||
arg_userns_mode = USER_NAMESPACE_PICK;
|
||||
arg_userns_mode = USER_NAMESPACE_PICK; /* Note that arg_userns_ownership is
|
||||
* implied by USER_NAMESPACE_PICK
|
||||
* further down. */
|
||||
arg_uid_shift = UID_INVALID;
|
||||
arg_uid_range = UINT32_C(0x10000);
|
||||
|
||||
} else if (streq(optarg, "identity")) {
|
||||
/* identitiy: User namespaces on, UID range is map the 0…0xFFFF range to
|
||||
* itself, i.e. we don't actually map anything, but do take benefit of
|
||||
* isolation of capability sets. */
|
||||
arg_userns_mode = USER_NAMESPACE_FIXED;
|
||||
arg_uid_shift = 0;
|
||||
arg_uid_range = UINT32_C(0x10000);
|
||||
} else {
|
||||
_cleanup_free_ char *buffer = NULL;
|
||||
const char *range, *shift;
|
||||
@ -1243,11 +1259,10 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
return log_error_errno(r, "Failed to parse UID \"%s\": %m", optarg);
|
||||
|
||||
arg_userns_mode = USER_NAMESPACE_FIXED;
|
||||
}
|
||||
|
||||
if (arg_uid_range <= 0)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"UID range cannot be 0.");
|
||||
if (!userns_shift_range_valid(arg_uid_shift, arg_uid_range))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
|
||||
}
|
||||
|
||||
arg_settings_mask |= SETTING_USERNS;
|
||||
break;
|
||||
@ -1255,7 +1270,9 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
|
||||
case 'U':
|
||||
if (userns_supported()) {
|
||||
arg_userns_mode = USER_NAMESPACE_PICK;
|
||||
arg_userns_mode = USER_NAMESPACE_PICK; /* Note that arg_userns_ownership is
|
||||
* implied by USER_NAMESPACE_PICK
|
||||
* further down. */
|
||||
arg_uid_shift = UID_INVALID;
|
||||
arg_uid_range = UINT32_C(0x10000);
|
||||
|
||||
@ -1265,7 +1282,20 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
break;
|
||||
|
||||
case ARG_PRIVATE_USERS_CHOWN:
|
||||
arg_userns_chown = true;
|
||||
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
|
||||
|
||||
arg_settings_mask |= SETTING_USERNS;
|
||||
break;
|
||||
|
||||
case ARG_PRIVATE_USERS_OWNERSHIP:
|
||||
if (streq(optarg, "help")) {
|
||||
DUMP_STRING_TABLE(user_namespace_ownership, UserNamespaceOwnership, _USER_NAMESPACE_OWNERSHIP_MAX);
|
||||
return 0;
|
||||
}
|
||||
|
||||
arg_userns_ownership = user_namespace_ownership_from_string(optarg);
|
||||
if (arg_userns_ownership < 0)
|
||||
return log_error_errno(arg_userns_ownership, "Cannot parse --user-namespace-ownership= value: %s", optarg);
|
||||
|
||||
arg_settings_mask |= SETTING_USERNS;
|
||||
break;
|
||||
@ -1701,8 +1731,10 @@ static int verify_arguments(void) {
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--boot cannot be used without namespacing.");
|
||||
}
|
||||
|
||||
if (arg_userns_mode == USER_NAMESPACE_PICK)
|
||||
arg_userns_chown = true;
|
||||
if (arg_userns_ownership < 0)
|
||||
arg_userns_ownership =
|
||||
arg_userns_mode == USER_NAMESPACE_PICK ? USER_NAMESPACE_OWNERSHIP_AUTO :
|
||||
USER_NAMESPACE_OWNERSHIP_OFF;
|
||||
|
||||
if (arg_start_mode == START_BOOT && arg_kill_signal <= 0)
|
||||
arg_kill_signal = SIGRTMIN+3;
|
||||
@ -1736,15 +1768,15 @@ static int verify_arguments(void) {
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO && !userns_supported())
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--private-users= is not supported, kernel compiled without user namespace support.");
|
||||
|
||||
if (arg_userns_chown && arg_read_only)
|
||||
if (arg_userns_ownership == USER_NAMESPACE_OWNERSHIP_CHOWN && arg_read_only)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"--read-only and --private-users-chown may not be combined.");
|
||||
"--read-only and --private-users-ownership=chown may not be combined.");
|
||||
|
||||
/* We don't support --private-users-chown together with any of the volatile modes since we couldn't
|
||||
* change the read-only part of the tree (i.e. /usr) anyway, or because it would trigger a massive
|
||||
* copy-up (in case of overlay) making the entire exercise pointless. */
|
||||
if (arg_userns_chown && arg_volatile_mode != VOLATILE_NO)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--volatile= and --private-users-chown may not be combined.");
|
||||
/* We don't support --private-users-ownership=chown together with any of the volatile modes since we
|
||||
* couldn't change the read-only part of the tree (i.e. /usr) anyway, or because it would trigger a
|
||||
* massive copy-up (in case of overlay) making the entire exercise pointless. */
|
||||
if (arg_userns_ownership == USER_NAMESPACE_OWNERSHIP_CHOWN && arg_volatile_mode != VOLATILE_NO)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--volatile= and --private-users-ownership=chown may not be combined.");
|
||||
|
||||
/* If --network-namespace-path is given with any other network-related option (except --private-network),
|
||||
* we need to error out, to avoid conflicts between different network options. */
|
||||
@ -2781,7 +2813,7 @@ static int recursive_chown(const char *directory, uid_t shift, uid_t range) {
|
||||
|
||||
assert(directory);
|
||||
|
||||
if (arg_userns_mode == USER_NAMESPACE_NO || !arg_userns_chown)
|
||||
if (arg_userns_mode == USER_NAMESPACE_NO || arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_CHOWN)
|
||||
return 0;
|
||||
|
||||
r = path_patch_uid(directory, arg_uid_shift, arg_uid_range);
|
||||
@ -3012,7 +3044,6 @@ static int chase_symlinks_and_update(char **p, unsigned flags) {
|
||||
}
|
||||
|
||||
static int determine_uid_shift(const char *directory) {
|
||||
int r;
|
||||
|
||||
if (arg_userns_mode == USER_NAMESPACE_NO) {
|
||||
arg_uid_shift = 0;
|
||||
@ -3022,8 +3053,9 @@ static int determine_uid_shift(const char *directory) {
|
||||
if (arg_uid_shift == UID_INVALID) {
|
||||
struct stat st;
|
||||
|
||||
r = stat(directory, &st);
|
||||
if (r < 0)
|
||||
/* Read the UID shift off the image. Maybe we can reuse this to avoid chowning. */
|
||||
|
||||
if (stat(directory, &st) < 0)
|
||||
return log_error_errno(errno, "Failed to determine UID base of %s: %m", directory);
|
||||
|
||||
arg_uid_shift = st.st_uid & UINT32_C(0xffff0000);
|
||||
@ -3033,11 +3065,22 @@ static int determine_uid_shift(const char *directory) {
|
||||
"UID and GID base of %s don't match.", directory);
|
||||
|
||||
arg_uid_range = UINT32_C(0x10000);
|
||||
|
||||
if (arg_uid_shift != 0) {
|
||||
/* If the image is shifted already, then we'll fall back to classic chowning, for
|
||||
* compatibility (and simplicity), or refuse if mapping is explicitly requested. */
|
||||
|
||||
if (arg_userns_ownership == USER_NAMESPACE_OWNERSHIP_AUTO) {
|
||||
log_debug("UID base of %s is non-zero, not using UID mapping.", directory);
|
||||
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
|
||||
} else if (arg_userns_ownership == USER_NAMESPACE_OWNERSHIP_MAP)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"UID base of %s is not zero, UID mapping not supported.", directory);
|
||||
}
|
||||
}
|
||||
|
||||
if (arg_uid_shift > UID_INVALID - arg_uid_range)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"UID base too high for UID range.");
|
||||
if (!userns_shift_range_valid(arg_uid_shift, arg_uid_range))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID base too high for UID range.");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3527,6 +3570,7 @@ static int outer_child(
|
||||
|
||||
_cleanup_strv_free_ char **os_release_pairs = NULL;
|
||||
_cleanup_close_ int fd = -1;
|
||||
bool idmap = false;
|
||||
const char *p;
|
||||
pid_t pid;
|
||||
ssize_t l;
|
||||
@ -3574,6 +3618,7 @@ static int outer_child(
|
||||
dissected_image,
|
||||
directory,
|
||||
arg_uid_shift,
|
||||
arg_uid_range,
|
||||
DISSECT_IMAGE_MOUNT_ROOT_ONLY|
|
||||
DISSECT_IMAGE_DISCARD_ON_LOOP|
|
||||
DISSECT_IMAGE_USR_NO_ROOT|
|
||||
@ -3629,6 +3674,32 @@ static int outer_child(
|
||||
directory = "/run/systemd/nspawn-root";
|
||||
}
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO &&
|
||||
IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) &&
|
||||
arg_uid_shift != 0) {
|
||||
r = make_mount_point(directory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = remount_idmap(directory, arg_uid_shift, arg_uid_range);
|
||||
if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) {
|
||||
/* This might fail because the kernel or file system doesn't support idmapping. We
|
||||
* can't really distinguish this nicely, nor do we have any guarantees about the
|
||||
* error codes we see, could be EOPNOTSUPP or EINVAL. */
|
||||
if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"ID mapped mounts are apparently not available, sorry.");
|
||||
|
||||
log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing.");
|
||||
arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN;
|
||||
} else if (r < 0)
|
||||
return log_error_errno(r, "Failed to set up ID mapped mounts: %m");
|
||||
else {
|
||||
log_debug("ID mapped mounts available, making use of them.");
|
||||
idmap = true;
|
||||
}
|
||||
}
|
||||
|
||||
r = setup_pivot_root(
|
||||
directory,
|
||||
arg_pivot_root_new,
|
||||
@ -3655,11 +3726,9 @@ static int outer_child(
|
||||
return r;
|
||||
|
||||
/* Make sure we always have a mount that we can move to root later on. */
|
||||
if (!path_is_mount_point(directory, NULL, 0)) {
|
||||
r = mount_nofollow_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
r = make_mount_point(directory);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (dissected_image) {
|
||||
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
|
||||
@ -3667,10 +3736,12 @@ static int outer_child(
|
||||
dissected_image,
|
||||
directory,
|
||||
arg_uid_shift,
|
||||
arg_uid_range,
|
||||
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY|
|
||||
DISSECT_IMAGE_DISCARD_ON_LOOP|
|
||||
DISSECT_IMAGE_USR_NO_ROOT|
|
||||
(arg_read_only ? DISSECT_IMAGE_READ_ONLY : DISSECT_IMAGE_FSCK|DISSECT_IMAGE_GROWFS));
|
||||
(arg_read_only ? DISSECT_IMAGE_READ_ONLY : DISSECT_IMAGE_FSCK|DISSECT_IMAGE_GROWFS)|
|
||||
(idmap ? DISSECT_IMAGE_MOUNT_IDMAPPED : 0));
|
||||
if (r == -EUCLEAN)
|
||||
return log_error_errno(r, "File system check for image failed: %m");
|
||||
if (r < 0)
|
||||
@ -4226,7 +4297,7 @@ static int merge_settings(Settings *settings, const char *path) {
|
||||
arg_userns_mode = settings->userns_mode;
|
||||
arg_uid_shift = settings->uid_shift;
|
||||
arg_uid_range = settings->uid_range;
|
||||
arg_userns_chown = settings->userns_chown;
|
||||
arg_userns_ownership = settings->userns_ownership;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -426,7 +426,7 @@ static int portable_extract_by_path(
|
||||
if (r == 0) {
|
||||
seq[0] = safe_close(seq[0]);
|
||||
|
||||
r = dissected_image_mount(m, tmpdir, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
|
||||
r = dissected_image_mount(m, tmpdir, UID_INVALID, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to mount dissected image: %m");
|
||||
goto child_finish;
|
||||
|
@ -1472,11 +1472,12 @@ static int mount_partition(
|
||||
const char *where,
|
||||
const char *directory,
|
||||
uid_t uid_shift,
|
||||
uid_t uid_range,
|
||||
DissectImageFlags flags) {
|
||||
|
||||
_cleanup_free_ char *chased = NULL, *options = NULL;
|
||||
const char *p, *node, *fstype;
|
||||
bool rw;
|
||||
bool rw, remap_uid_gid = false;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
@ -1536,14 +1537,18 @@ static int mount_partition(
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
|
||||
_cleanup_free_ char *uid_option = NULL;
|
||||
if (uid_is_valid(uid_shift) && uid_shift != 0) {
|
||||
|
||||
if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
|
||||
return -ENOMEM;
|
||||
if (fstype_can_uid_gid(fstype)) {
|
||||
_cleanup_free_ char *uid_option = NULL;
|
||||
|
||||
if (!strextend_with_separator(&options, ",", uid_option))
|
||||
return -ENOMEM;
|
||||
if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!strextend_with_separator(&options, ",", uid_option))
|
||||
return -ENOMEM;
|
||||
} else if (FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED))
|
||||
remap_uid_gid = true;
|
||||
}
|
||||
|
||||
if (!isempty(m->mount_options))
|
||||
@ -1578,6 +1583,12 @@ static int mount_partition(
|
||||
if (rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS))
|
||||
(void) fs_grow(node, p);
|
||||
|
||||
if (remap_uid_gid) {
|
||||
r = remount_idmap(p, uid_shift, uid_range);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -1607,7 +1618,13 @@ static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlag
|
||||
return 1;
|
||||
}
|
||||
|
||||
int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
|
||||
int dissected_image_mount(
|
||||
DissectedImage *m,
|
||||
const char *where,
|
||||
uid_t uid_shift,
|
||||
uid_t uid_range,
|
||||
DissectImageFlags flags) {
|
||||
|
||||
int r, xbootldr_mounted;
|
||||
|
||||
assert(m);
|
||||
@ -1631,14 +1648,14 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
|
||||
|
||||
/* First mount the root fs. If there's none we use a tmpfs. */
|
||||
if (m->partitions[PARTITION_ROOT].found)
|
||||
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
|
||||
else
|
||||
r = mount_root_tmpfs(where, uid_shift, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* For us mounting root always means mounting /usr as well */
|
||||
r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -1659,23 +1676,23 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
|
||||
if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
|
||||
return 0;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, flags);
|
||||
xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
|
||||
if (xbootldr_mounted < 0)
|
||||
return xbootldr_mounted;
|
||||
|
||||
@ -1701,7 +1718,7 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
|
||||
return r;
|
||||
} else if (dir_is_empty(p) > 0) {
|
||||
/* It exists and is an empty directory. Let's mount the ESP there. */
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -1713,7 +1730,7 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
|
||||
if (!esp_done) {
|
||||
/* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
@ -1722,13 +1739,19 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
|
||||
int dissected_image_mount_and_warn(
|
||||
DissectedImage *m,
|
||||
const char *where,
|
||||
uid_t uid_shift,
|
||||
uid_t uid_range,
|
||||
DissectImageFlags flags) {
|
||||
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(where);
|
||||
|
||||
r = dissected_image_mount(m, where, uid_shift, flags);
|
||||
r = dissected_image_mount(m, where, uid_shift, uid_range, flags);
|
||||
if (r == -ENXIO)
|
||||
return log_error_errno(r, "Not root file system found in image.");
|
||||
if (r == -EMEDIUMTYPE)
|
||||
@ -2523,6 +2546,7 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
|
||||
m,
|
||||
t,
|
||||
UID_INVALID,
|
||||
UID_INVALID,
|
||||
DISSECT_IMAGE_READ_ONLY|
|
||||
DISSECT_IMAGE_MOUNT_ROOT_ONLY|
|
||||
DISSECT_IMAGE_VALIDATE_OS|
|
||||
@ -2811,7 +2835,7 @@ int mount_image_privately_interactively(
|
||||
|
||||
created_dir = TAKE_PTR(temp);
|
||||
|
||||
r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, flags);
|
||||
r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, UID_INVALID, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -2917,7 +2941,7 @@ int verity_dissect_and_mount(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
|
||||
|
||||
r = dissected_image_mount(dissected_image, dest, UID_INVALID, dissect_image_flags);
|
||||
r = dissected_image_mount(dissected_image, dest, UID_INVALID, UID_INVALID, dissect_image_flags);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to mount image: %m");
|
||||
|
||||
|
@ -112,6 +112,7 @@ typedef enum DissectImageFlags {
|
||||
DISSECT_IMAGE_READ_ONLY = DISSECT_IMAGE_DEVICE_READ_ONLY |
|
||||
DISSECT_IMAGE_MOUNT_READ_ONLY,
|
||||
DISSECT_IMAGE_GROWFS = 1 << 18, /* Grow file systems in partitions marked for that to the size of the partitions after mount */
|
||||
DISSECT_IMAGE_MOUNT_IDMAPPED = 1 << 19, /* Mount mounts with kernel 5.12-style userns ID mapping, if file sytem type doesn't support uid=/gid= */
|
||||
} DissectImageFlags;
|
||||
|
||||
struct DissectedImage {
|
||||
@ -169,8 +170,8 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
|
||||
|
||||
int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
|
||||
int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
|
||||
int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
|
||||
int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags);
|
||||
int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags);
|
||||
int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags);
|
||||
|
||||
int dissected_image_acquire_metadata(DissectedImage *m);
|
||||
|
||||
|
@ -1,12 +1,13 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include <errno.h>
|
||||
#include <linux/loop.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/loop.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "dissect-image.h"
|
||||
@ -16,6 +17,7 @@
|
||||
#include "fs-util.h"
|
||||
#include "hashmap.h"
|
||||
#include "libmount-util.h"
|
||||
#include "missing_syscall.h"
|
||||
#include "mkdir.h"
|
||||
#include "mount-util.h"
|
||||
#include "mountpoint-util.h"
|
||||
@ -986,3 +988,104 @@ int mount_image_in_namespace(
|
||||
|
||||
return mount_in_namespace(target, propagate_path, incoming_path, src, dest, read_only, make_file_or_directory, options, true);
|
||||
}
|
||||
|
||||
int make_mount_point(const char *path) {
|
||||
int r;
|
||||
|
||||
assert(path);
|
||||
|
||||
/* If 'path' is already a mount point, does nothing and returns 0. If it is not it makes it one, and returns 1. */
|
||||
|
||||
r = path_is_mount_point(path, NULL, 0);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to determine whether '%s' is a mount point: %m", path);
|
||||
if (r > 0)
|
||||
return 0;
|
||||
|
||||
r = mount_nofollow_verbose(LOG_DEBUG, path, path, NULL, MS_BIND|MS_REC, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int make_userns(uid_t uid_shift, uid_t uid_range) {
|
||||
char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
|
||||
_cleanup_(sigkill_waitp) pid_t pid = 0;
|
||||
_cleanup_close_ int userns_fd = -1;
|
||||
int r;
|
||||
|
||||
/* Allocates a userns file descriptor with the mapping we need. For this we'll fork off a child
|
||||
* process whose only purpose is to give us a new user namespace. It's killed when we got it. */
|
||||
|
||||
r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
/* Child. We do nothing here, just freeze until somebody kills us. */
|
||||
freeze();
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range);
|
||||
|
||||
xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
|
||||
r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to write UID map: %m");
|
||||
|
||||
/* We always assign the same UID and GID ranges */
|
||||
xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
|
||||
r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to write GID map: %m");
|
||||
|
||||
r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return TAKE_FD(userns_fd);
|
||||
}
|
||||
|
||||
int remount_idmap(
|
||||
const char *p,
|
||||
uid_t uid_shift,
|
||||
uid_t uid_range) {
|
||||
|
||||
_cleanup_close_ int mount_fd = -1, userns_fd = -1;
|
||||
int r;
|
||||
|
||||
assert(p);
|
||||
|
||||
if (!userns_shift_range_valid(uid_shift, uid_range))
|
||||
return -EINVAL;
|
||||
|
||||
/* Clone the mount point */
|
||||
mount_fd = open_tree(-1, p, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
|
||||
if (mount_fd < 0)
|
||||
return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", p);
|
||||
|
||||
/* Create a user namespace mapping */
|
||||
userns_fd = make_userns(uid_shift, uid_range);
|
||||
if (userns_fd < 0)
|
||||
return userns_fd;
|
||||
|
||||
/* Set the user namespace mapping attribute on the cloned mount point */
|
||||
if (mount_setattr(mount_fd, "", AT_EMPTY_PATH | AT_RECURSIVE,
|
||||
&(struct mount_attr) {
|
||||
.attr_set = MOUNT_ATTR_IDMAP,
|
||||
.userns_fd = userns_fd,
|
||||
}, sizeof(struct mount_attr)) < 0)
|
||||
return log_debug_errno(errno, "Failed to change bind mount attributes for '%s': %m", p);
|
||||
|
||||
/* Remove the old mount point */
|
||||
r = umount_verbose(LOG_DEBUG, p, UMOUNT_NOFOLLOW);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* And place the cloned version in its place */
|
||||
if (move_mount(mount_fd, "", -1, p, MOVE_MOUNT_F_EMPTY_PATH) < 0)
|
||||
return log_debug_errno(errno, "Failed to attach UID mapped mount to '%s': %m", p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -103,3 +103,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free);
|
||||
|
||||
int bind_mount_in_namespace(pid_t target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory);
|
||||
int mount_image_in_namespace(pid_t target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory, const MountOptions *options);
|
||||
|
||||
int make_mount_point(const char *path);
|
||||
|
||||
int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range);
|
||||
|
@ -551,6 +551,7 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
|
||||
m,
|
||||
p,
|
||||
UID_INVALID,
|
||||
UID_INVALID,
|
||||
flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -77,7 +77,7 @@ static void* thread_func(void *ptr) {
|
||||
assert_se(dissected->partitions[PARTITION_HOME].found);
|
||||
assert_se(dissected->partitions[PARTITION_HOME].node);
|
||||
|
||||
r = dissected_image_mount(dissected, mounted, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
|
||||
r = dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
|
||||
log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
|
||||
assert_se(r >= 0);
|
||||
|
||||
@ -217,7 +217,7 @@ int main(int argc, char *argv[]) {
|
||||
assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
|
||||
|
||||
/* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
|
||||
assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, 0) >= 0);
|
||||
assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, 0) >= 0);
|
||||
|
||||
assert_se(umount_recursive(mounted, 0) >= 0);
|
||||
loop = loop_device_unref(loop);
|
||||
|
Loading…
x
Reference in New Issue
Block a user