mirror of
https://github.com/systemd/systemd.git
synced 2025-01-11 09:18:07 +03:00
Merge pull request #3093 from poettering/nspawn-userns-magic
nspawn automatic user namespaces
This commit is contained in:
commit
d7fe83bbc2
1
.gitignore
vendored
1
.gitignore
vendored
@ -240,6 +240,7 @@
|
|||||||
/test-ns
|
/test-ns
|
||||||
/test-nss
|
/test-nss
|
||||||
/test-parse-util
|
/test-parse-util
|
||||||
|
/test-patch-uid
|
||||||
/test-path
|
/test-path
|
||||||
/test-path-lookup
|
/test-path-lookup
|
||||||
/test-path-util
|
/test-path-util
|
||||||
|
13
Makefile.am
13
Makefile.am
@ -3021,6 +3021,8 @@ systemd_nspawn_SOURCES = \
|
|||||||
src/nspawn/nspawn-setuid.h \
|
src/nspawn/nspawn-setuid.h \
|
||||||
src/nspawn/nspawn-stub-pid1.c \
|
src/nspawn/nspawn-stub-pid1.c \
|
||||||
src/nspawn/nspawn-stub-pid1.h \
|
src/nspawn/nspawn-stub-pid1.h \
|
||||||
|
src/nspawn/nspawn-patch-uid.c \
|
||||||
|
src/nspawn/nspawn-patch-uid.h \
|
||||||
src/core/mount-setup.c \
|
src/core/mount-setup.c \
|
||||||
src/core/mount-setup.h \
|
src/core/mount-setup.h \
|
||||||
src/core/loopback-setup.c \
|
src/core/loopback-setup.c \
|
||||||
@ -3048,6 +3050,17 @@ systemd_nspawn_LDADD += \
|
|||||||
libfirewall.la
|
libfirewall.la
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
test_patch_uid_SOURCES = \
|
||||||
|
src/nspawn/nspawn-patch-uid.c \
|
||||||
|
src/nspawn/nspawn-patch-uid.h \
|
||||||
|
src/nspawn/test-patch-uid.c
|
||||||
|
|
||||||
|
test_patch_uid_LDADD = \
|
||||||
|
libshared.la
|
||||||
|
|
||||||
|
manual_tests += \
|
||||||
|
test-patch-uid
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
systemd_run_SOURCES = \
|
systemd_run_SOURCES = \
|
||||||
src/run/run.c
|
src/run/run.c
|
||||||
|
@ -387,38 +387,79 @@
|
|||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--private-users=</option></term>
|
<term><option>--private-users=</option></term>
|
||||||
|
|
||||||
<listitem><para>Enables user namespacing. If enabled, the
|
<listitem><para>Controls user namespacing. If enabled, the container will run with its own private set of UNIX
|
||||||
container will run with its own private set of Unix user and
|
user and group ids (UIDs and GIDs). This involves mapping the private UIDs/GIDs used in the container (starting
|
||||||
group ids (UIDs and GIDs). Takes none, one or two
|
with the container's root user 0 and up) to a range of UIDs/GIDs on the host that are not used for other
|
||||||
colon-separated parameters: the first parameter specifies the
|
purposes (usually in the range beyond the host's UID/GID 65536). The parameter may be specified as follows:</para>
|
||||||
first host UID to assign to the container, the second
|
|
||||||
parameter specifies the number of host UIDs to assign to the
|
|
||||||
container. If the second parameter is omitted, 65536 UIDs are
|
|
||||||
assigned. If the first parameter is also omitted (and hence
|
|
||||||
no parameter passed at all), the first UID assigned to the
|
|
||||||
container is read from the owner of the root directory of the
|
|
||||||
container's directory tree. By default, no user namespacing is
|
|
||||||
applied.</para>
|
|
||||||
|
|
||||||
<para>Note that user namespacing currently requires OS trees
|
<orderedlist>
|
||||||
that are prepared for the UID shift that is being applied:
|
<listitem><para>The value <literal>no</literal> turns off user namespacing. This is the default.</para></listitem>
|
||||||
UIDs and GIDs used for file ownership or in file ACL entries
|
|
||||||
must be shifted to the container UID base that is
|
|
||||||
used during container runtime.</para>
|
|
||||||
|
|
||||||
<para>It is recommended to assign at least 65536 UIDs to each
|
<listitem><para>The value <literal>yes</literal> (or the omission of a parameter) turns on user
|
||||||
container, so that the usable UID range in the container
|
namespacing. The UID/GID range to use is determined automatically from the file ownership of the root
|
||||||
covers 16 bit. For best security, do not assign overlapping UID
|
directory of the container's directory tree. To use this option, make sure to prepare the directory tree in
|
||||||
ranges to multiple containers. It is hence a good idea to use
|
advance, and ensure that all files and directories in it are owned by UIDs/GIDs in the range you'd like to
|
||||||
the upper 16 bit of the host 32-bit UIDs as container
|
use. Also, make sure that used file ACLs exclusively reference UIDs/GIDs in the appropriate range. If this
|
||||||
identifier, while the lower 16 bit encode the container UID
|
mode is used the number of UIDs/GIDs assigned to the container for use is 65536, and the UID/GID of the
|
||||||
used.</para>
|
root directory must be a multiple of 65536.</para></listitem>
|
||||||
|
|
||||||
<para>When user namespaces are used, the GID range assigned to
|
<listitem><para>The value "pick" turns on user namespacing. In this case the UID/GID range is automatically
|
||||||
each container is always chosen identical to the UID
|
chosen. As first step, the file owner of the root directory of the container's directory tree is read, and it
|
||||||
range.</para></listitem>
|
is checked that it is currently not used by the system otherwise (in particular, that no other container is
|
||||||
|
using it). If this check is successful, the UID/GID range determined this way is used, similar to the
|
||||||
|
behaviour if "yes" is specified. If the check is not successful (and thus the UID/GID range indicated in the
|
||||||
|
root directory's file owner is already used elsewhere) a new – currently unused – UID/GID range of 65536
|
||||||
|
UIDs/GIDs is randomly chosen between the host UID/GIDs of 524288 and 1878982656, always starting at a
|
||||||
|
multiple of 65536. This setting implies <option>--private-users-chown</option> (see below), which has the
|
||||||
|
effect that the files and directories in the container's directory tree will be owned by the appropriate
|
||||||
|
users of the range picked. Using this option makes user namespace behaviour fully automatic. Note that the
|
||||||
|
first invocation of a previously unused container image might result in picking a new UID/GID range for it,
|
||||||
|
and thus in the (possibly expensive) file ownership adjustment operation. However, subsequent invocations of
|
||||||
|
the container will be cheap (unless of course the picked UID/GID range is assigned to a different use by
|
||||||
|
then).</para></listitem>
|
||||||
|
|
||||||
|
<listitem><para>Finally if one or two colon-separated numeric parameters are specified, user namespacing is
|
||||||
|
turned on, too. The first parameter specifies the first host UID/GID to assign to the container, the second
|
||||||
|
parameter specifies the number of host UIDs/GIDs to assign to the container. If the second parameter is
|
||||||
|
omitted, 65536 UIDs/GIDs are assigned.</para></listitem>
|
||||||
|
</orderedlist>
|
||||||
|
|
||||||
|
<para>It is recommended to assign at least 65536 UIDs/GIDs to each container, so that the usable UID/GID range in the
|
||||||
|
container covers 16 bit. For best security, do not assign overlapping UID/GID ranges to multiple containers. It is
|
||||||
|
hence a good idea to use the upper 16 bit of the host 32-bit UIDs/GIDs as container identifier, while the lower 16
|
||||||
|
bit encode the container UID/GID used. This is in fact the behaviour enforced by the
|
||||||
|
<option>--private-users=pick</option> option.</para>
|
||||||
|
|
||||||
|
<para>When user namespaces are used, the GID range assigned to each container is always chosen identical to the
|
||||||
|
UID range.</para>
|
||||||
|
|
||||||
|
<para>In most cases, using <option>--private-users=pick</option> is the recommended option as it enhances
|
||||||
|
container security massively and operates fully automatically in most cases.</para>
|
||||||
|
|
||||||
|
<para>Note that the picked UID/GID range is not written to <filename>/etc/passwd</filename> or
|
||||||
|
<filename>/etc/group</filename>. In fact, the allocation of the range is not stored persistently anywhere,
|
||||||
|
except in the file ownership of the files and directories of the container.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-U</option></term>
|
||||||
|
|
||||||
|
<listitem><para>If the kernel supports the user namespaces feature, equivalent to
|
||||||
|
<option>--private-users=pick</option>, otherwise equivalent to
|
||||||
|
<option>--private-users=no</option>.</para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--private-users-chown</option></term>
|
||||||
|
|
||||||
|
<listitem><para>If specified, all files and directories in the container's directory tree will adjusted so that
|
||||||
|
they are owned to the appropriate UIDs/GIDs selected for the container (see above). This operation is
|
||||||
|
potentially expensive, as it involves descending and iterating through the full directory tree of the
|
||||||
|
container. Besides actual file ownership, file ACLs are adjusted as well.</para>
|
||||||
|
|
||||||
|
<para>This option is implied if <option>--private-users=pick</option> is used. This option has no effect if
|
||||||
|
user namespacing is not used.</para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--private-network</option></term>
|
<term><option>--private-network</option></term>
|
||||||
|
@ -251,6 +251,14 @@
|
|||||||
<option>--uuid=</option> command line switch. This option is
|
<option>--uuid=</option> command line switch. This option is
|
||||||
privileged (see above). </para></listitem>
|
privileged (see above). </para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><varname>PrivateUsers=</varname></term>
|
||||||
|
|
||||||
|
<listitem><para>Configures support for usernamespacing. This is equivalent to the
|
||||||
|
<option>--private-users=</option> command line switch, and takes the same options. This option is privileged
|
||||||
|
(see above). </para></listitem>
|
||||||
|
</varlistentry>
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
@ -314,6 +322,16 @@
|
|||||||
for details about the specific options supported. This setting
|
for details about the specific options supported. This setting
|
||||||
is privileged (see above).</para></listitem>
|
is privileged (see above).</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><varname>PrivateUsersChown=</varname></term>
|
||||||
|
|
||||||
|
<listitem><para>Configures whether the ownership of the files and directories in the container tree shall be
|
||||||
|
adjusted to the UID/GID range used, if necessary and user namespacing is enabled. This is equivalent to the
|
||||||
|
<option>--private-users-chown</option> command line switch. This option is privileged (see
|
||||||
|
above). </para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
@ -102,7 +102,7 @@ int copy_bytes(int fdf, int fdt, uint64_t max_bytes, bool try_reflink) {
|
|||||||
if (try_cfr) {
|
if (try_cfr) {
|
||||||
n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
|
n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV))
|
if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
|
||||||
return n;
|
return n;
|
||||||
|
|
||||||
try_cfr = false;
|
try_cfr = false;
|
||||||
|
@ -25,11 +25,13 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "fd-util.h"
|
#include "fd-util.h"
|
||||||
|
#include "fs-util.h"
|
||||||
#include "macro.h"
|
#include "macro.h"
|
||||||
#include "missing.h"
|
#include "missing.h"
|
||||||
#include "parse-util.h"
|
#include "parse-util.h"
|
||||||
#include "path-util.h"
|
#include "path-util.h"
|
||||||
#include "socket-util.h"
|
#include "socket-util.h"
|
||||||
|
#include "stdio-util.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
int close_nointr(int fd) {
|
int close_nointr(int fd) {
|
||||||
@ -356,3 +358,11 @@ bool fdname_is_valid(const char *s) {
|
|||||||
|
|
||||||
return p - s < 256;
|
return p - s < 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int fd_get_path(int fd, char **ret) {
|
||||||
|
char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
|
||||||
|
|
||||||
|
xsprintf(procfs_path, "/proc/self/fd/%i", fd);
|
||||||
|
|
||||||
|
return readlink_malloc(procfs_path, ret);
|
||||||
|
}
|
||||||
|
@ -72,6 +72,8 @@ void cmsg_close_all(struct msghdr *mh);
|
|||||||
|
|
||||||
bool fdname_is_valid(const char *s);
|
bool fdname_is_valid(const char *s);
|
||||||
|
|
||||||
|
int fd_get_path(int fd, char **ret);
|
||||||
|
|
||||||
/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */
|
/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */
|
||||||
#define ERRNO_IS_DISCONNECT(r) \
|
#define ERRNO_IS_DISCONNECT(r) \
|
||||||
IN_SET(r, ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, ENETUNREACH)
|
IN_SET(r, ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, ENETUNREACH)
|
||||||
|
@ -445,6 +445,10 @@ struct btrfs_ioctl_quota_ctl_args {
|
|||||||
#define TMPFS_MAGIC 0x01021994
|
#define TMPFS_MAGIC 0x01021994
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef MQUEUE_MAGIC
|
||||||
|
#define MQUEUE_MAGIC 0x19800202
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef MS_MOVE
|
#ifndef MS_MOVE
|
||||||
#define MS_MOVE 8192
|
#define MS_MOVE 8192
|
||||||
#endif
|
#endif
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
bool uid_is_valid(uid_t uid);
|
bool uid_is_valid(uid_t uid);
|
||||||
|
|
||||||
@ -63,3 +64,7 @@ int take_etc_passwd_lock(const char *root);
|
|||||||
|
|
||||||
#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
|
#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
|
||||||
#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
|
#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
|
||||||
|
|
||||||
|
static inline bool userns_supported(void) {
|
||||||
|
return access("/proc/self/uid_map", F_OK) >= 0;
|
||||||
|
}
|
||||||
|
@ -16,7 +16,7 @@ struct ConfigPerfItem;
|
|||||||
%includes
|
%includes
|
||||||
%%
|
%%
|
||||||
Exec.Boot, config_parse_boot, 0, 0
|
Exec.Boot, config_parse_boot, 0, 0
|
||||||
Exec.ProcessTwo, config_parse_pid2, 0, 0,
|
Exec.ProcessTwo, config_parse_pid2, 0, 0
|
||||||
Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters)
|
Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters)
|
||||||
Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment)
|
Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment)
|
||||||
Exec.User, config_parse_string, 0, offsetof(Settings, user)
|
Exec.User, config_parse_string, 0, offsetof(Settings, user)
|
||||||
@ -26,11 +26,13 @@ Exec.KillSignal, config_parse_signal, 0, offsetof(Settings,
|
|||||||
Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
|
Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
|
||||||
Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
|
Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
|
||||||
Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
|
Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
|
||||||
|
Exec.PrivateUsers, config_parse_private_users, 0, 0
|
||||||
Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
|
Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
|
||||||
Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
|
Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
|
||||||
Files.Bind, config_parse_bind, 0, 0
|
Files.Bind, config_parse_bind, 0, 0
|
||||||
Files.BindReadOnly, config_parse_bind, 1, 0
|
Files.BindReadOnly, config_parse_bind, 1, 0
|
||||||
Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
|
Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
|
||||||
|
Files.PrivateUsersChown, config_parse_tristate, 0, offsetof(Settings, userns_chown)
|
||||||
Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
|
Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
|
||||||
Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
|
Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
|
||||||
Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
|
Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
|
||||||
|
469
src/nspawn/nspawn-patch-uid.c
Normal file
469
src/nspawn/nspawn-patch-uid.c
Normal file
@ -0,0 +1,469 @@
|
|||||||
|
/***
|
||||||
|
This file is part of systemd.
|
||||||
|
|
||||||
|
Copyright 2016 Lennart Poettering
|
||||||
|
|
||||||
|
systemd is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
systemd is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
***/
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <linux/magic.h>
|
||||||
|
#ifdef HAVE_ACL
|
||||||
|
#include <sys/acl.h>
|
||||||
|
#endif
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/vfs.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "acl-util.h"
|
||||||
|
#include "dirent-util.h"
|
||||||
|
#include "fd-util.h"
|
||||||
|
#include "missing.h"
|
||||||
|
#include "nspawn-patch-uid.h"
|
||||||
|
#include "stat-util.h"
|
||||||
|
#include "stdio-util.h"
|
||||||
|
#include "string-util.h"
|
||||||
|
#include "strv.h"
|
||||||
|
#include "user-util.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_ACL
|
||||||
|
|
||||||
|
static int get_acl(int fd, const char *name, acl_type_t type, acl_t *ret) {
|
||||||
|
char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
|
||||||
|
acl_t acl;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
assert(ret);
|
||||||
|
|
||||||
|
if (name) {
|
||||||
|
_cleanup_close_ int child_fd = -1;
|
||||||
|
|
||||||
|
child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
|
||||||
|
if (child_fd < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
xsprintf(procfs_path, "/proc/self/fd/%i", child_fd);
|
||||||
|
acl = acl_get_file(procfs_path, type);
|
||||||
|
} else if (type == ACL_TYPE_ACCESS)
|
||||||
|
acl = acl_get_fd(fd);
|
||||||
|
else {
|
||||||
|
xsprintf(procfs_path, "/proc/self/fd/%i", fd);
|
||||||
|
acl = acl_get_file(procfs_path, type);
|
||||||
|
}
|
||||||
|
if (!acl)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
*ret = acl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int set_acl(int fd, const char *name, acl_type_t type, acl_t acl) {
|
||||||
|
char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
assert(acl);
|
||||||
|
|
||||||
|
if (name) {
|
||||||
|
_cleanup_close_ int child_fd = -1;
|
||||||
|
|
||||||
|
child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
|
||||||
|
if (child_fd < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
xsprintf(procfs_path, "/proc/self/fd/%i", child_fd);
|
||||||
|
r = acl_set_file(procfs_path, type, acl);
|
||||||
|
} else if (type == ACL_TYPE_ACCESS)
|
||||||
|
r = acl_set_fd(fd, acl);
|
||||||
|
else {
|
||||||
|
xsprintf(procfs_path, "/proc/self/fd/%i", fd);
|
||||||
|
r = acl_set_file(procfs_path, type, acl);
|
||||||
|
}
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int shift_acl(acl_t acl, uid_t shift, acl_t *ret) {
|
||||||
|
_cleanup_(acl_freep) acl_t copy = NULL;
|
||||||
|
acl_entry_t i;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(acl);
|
||||||
|
assert(ret);
|
||||||
|
|
||||||
|
r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
while (r > 0) {
|
||||||
|
uid_t *old_uid, new_uid;
|
||||||
|
bool modify = false;
|
||||||
|
acl_tag_t tag;
|
||||||
|
|
||||||
|
if (acl_get_tag_type(i, &tag) < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
if (IN_SET(tag, ACL_USER, ACL_GROUP)) {
|
||||||
|
|
||||||
|
/* We don't distuingish here between uid_t and gid_t, let's make sure the compiler checks that
|
||||||
|
* this is actually OK */
|
||||||
|
assert_cc(sizeof(uid_t) == sizeof(gid_t));
|
||||||
|
|
||||||
|
old_uid = acl_get_qualifier(i);
|
||||||
|
if (!old_uid)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
new_uid = shift | (*old_uid & UINT32_C(0xFFFF));
|
||||||
|
if (!uid_is_valid(new_uid))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
modify = new_uid != *old_uid;
|
||||||
|
if (modify && !copy) {
|
||||||
|
int n;
|
||||||
|
|
||||||
|
/* There's no copy of the ACL yet? if so, let's create one, and start the loop from the
|
||||||
|
* beginning, so that we copy all entries, starting from the first, this time. */
|
||||||
|
|
||||||
|
n = acl_entries(acl);
|
||||||
|
if (n < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
copy = acl_init(n);
|
||||||
|
if (!copy)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
/* Seek back to the beginning */
|
||||||
|
r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (copy) {
|
||||||
|
acl_entry_t new_entry;
|
||||||
|
|
||||||
|
if (acl_create_entry(©, &new_entry) < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
if (acl_copy_entry(new_entry, i) < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
if (modify)
|
||||||
|
if (acl_set_qualifier(new_entry, &new_uid) < 0)
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i);
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
*ret = copy;
|
||||||
|
copy = NULL;
|
||||||
|
|
||||||
|
return !!*ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) {
|
||||||
|
_cleanup_(acl_freep) acl_t acl = NULL, shifted = NULL;
|
||||||
|
bool changed = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
assert(st);
|
||||||
|
|
||||||
|
/* ACLs are not supported on symlinks, there's no point in trying */
|
||||||
|
if (S_ISLNK(st->st_mode))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
r = get_acl(fd, name, ACL_TYPE_ACCESS, &acl);
|
||||||
|
if (r == -EOPNOTSUPP)
|
||||||
|
return 0;
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = shift_acl(acl, shift, &shifted);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
if (r > 0) {
|
||||||
|
r = set_acl(fd, name, ACL_TYPE_ACCESS, shifted);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S_ISDIR(st->st_mode)) {
|
||||||
|
acl_free(acl);
|
||||||
|
acl_free(shifted);
|
||||||
|
|
||||||
|
acl = shifted = NULL;
|
||||||
|
|
||||||
|
r = get_acl(fd, name, ACL_TYPE_DEFAULT, &acl);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = shift_acl(acl, shift, &shifted);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
if (r > 0) {
|
||||||
|
r = set_acl(fd, name, ACL_TYPE_DEFAULT, shifted);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int patch_fd(int fd, const char *name, const struct stat *st, uid_t shift) {
|
||||||
|
uid_t new_uid;
|
||||||
|
gid_t new_gid;
|
||||||
|
bool changed = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
assert(st);
|
||||||
|
|
||||||
|
new_uid = shift | (st->st_uid & UINT32_C(0xFFFF));
|
||||||
|
new_gid = (gid_t) shift | (st->st_gid & UINT32_C(0xFFFF));
|
||||||
|
|
||||||
|
if (!uid_is_valid(new_uid) || !gid_is_valid(new_gid))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (st->st_uid != new_uid || st->st_gid != new_gid) {
|
||||||
|
if (name)
|
||||||
|
r = fchownat(fd, name, new_uid, new_gid, AT_SYMLINK_NOFOLLOW);
|
||||||
|
else
|
||||||
|
r = fchown(fd, new_uid, new_gid);
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
/* The Linux kernel alters the mode in some cases of chown(). Let's undo this. */
|
||||||
|
if (name && !S_ISLNK(st->st_mode))
|
||||||
|
r = fchmodat(fd, name, st->st_mode, 0);
|
||||||
|
else
|
||||||
|
r = fchmod(fd, st->st_mode);
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = patch_acls(fd, name, st, shift);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
return r > 0 || changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int is_procfs_sysfs_or_suchlike(int fd) {
|
||||||
|
struct statfs sfs;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
|
||||||
|
if (fstatfs(fd, &sfs) < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
return F_TYPE_EQUAL(sfs.f_type, BINFMTFS_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, CGROUP_SUPER_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, CGROUP2_SUPER_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, DEBUGFS_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, DEVPTS_SUPER_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, EFIVARFS_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, HUGETLBFS_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, MQUEUE_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, PROC_SUPER_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, PSTOREFS_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, SELINUX_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, SMACK_MAGIC) ||
|
||||||
|
F_TYPE_EQUAL(sfs.f_type, SYSFS_MAGIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int recurse_fd(int fd, bool donate_fd, const struct stat *st, uid_t shift, bool is_toplevel) {
|
||||||
|
bool changed = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
|
||||||
|
/* We generally want to permit crossing of mount boundaries when patching the UIDs/GIDs. However, we
|
||||||
|
* probably shouldn't do this for /proc and /sys if that is already mounted into place. Hence, let's
|
||||||
|
* stop the recursion when we hit a procfs or sysfs file system. */
|
||||||
|
r = is_procfs_sysfs_or_suchlike(fd);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
if (r > 0) {
|
||||||
|
r = 0; /* don't recurse */
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = patch_fd(fd, NULL, st, shift);
|
||||||
|
if (r == -EROFS) {
|
||||||
|
_cleanup_free_ char *name = NULL;
|
||||||
|
|
||||||
|
if (!is_toplevel) {
|
||||||
|
/* When we hit a ready-only subtree we simply skip it, but log about it. */
|
||||||
|
(void) fd_get_path(fd, &name);
|
||||||
|
log_debug("Skippping read-only file or directory %s.", strna(name));
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
if (S_ISDIR(st->st_mode)) {
|
||||||
|
_cleanup_closedir_ DIR *d = NULL;
|
||||||
|
struct dirent *de;
|
||||||
|
|
||||||
|
if (!donate_fd) {
|
||||||
|
int copy;
|
||||||
|
|
||||||
|
copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
|
||||||
|
if (copy < 0) {
|
||||||
|
r = -errno;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = copy;
|
||||||
|
donate_fd = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
d = fdopendir(fd);
|
||||||
|
if (!d) {
|
||||||
|
r = -errno;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
fd = -1;
|
||||||
|
|
||||||
|
FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) {
|
||||||
|
struct stat fst;
|
||||||
|
|
||||||
|
if (STR_IN_SET(de->d_name, ".", ".."))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (fstatat(dirfd(d), de->d_name, &fst, AT_SYMLINK_NOFOLLOW) < 0) {
|
||||||
|
r = -errno;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S_ISDIR(fst.st_mode)) {
|
||||||
|
int subdir_fd;
|
||||||
|
|
||||||
|
subdir_fd = openat(dirfd(d), de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
|
||||||
|
if (subdir_fd < 0) {
|
||||||
|
r = -errno;
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
r = recurse_fd(subdir_fd, true, &fst, shift, false);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
if (r > 0)
|
||||||
|
changed = true;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
r = patch_fd(dirfd(d), de->d_name, &fst, shift);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
if (r > 0)
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r = changed;
|
||||||
|
|
||||||
|
finish:
|
||||||
|
if (donate_fd)
|
||||||
|
safe_close(fd);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fd_patch_uid_internal(int fd, bool donate_fd, uid_t shift, uid_t range) {
|
||||||
|
struct stat st;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
|
||||||
|
/* Recursively adjusts the UID/GIDs of all files of a directory tree. This is used to automatically fix up an
|
||||||
|
* OS tree to the used user namespace UID range. Note that this automatic adjustment only works for UID ranges
|
||||||
|
* following the concept that the upper 16bit of a UID identify the container, and the lower 16bit are the actual
|
||||||
|
* UID within the container. */
|
||||||
|
|
||||||
|
if ((shift & 0xFFFF) != 0) {
|
||||||
|
/* We only support containers where the shift starts at a 2^16 boundary */
|
||||||
|
r = -EOPNOTSUPP;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (range != 0x10000) {
|
||||||
|
/* We only support containers with 16bit UID ranges for the patching logic */
|
||||||
|
r = -EOPNOTSUPP;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fstat(fd, &st) < 0) {
|
||||||
|
r = -errno;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((uint32_t) st.st_uid >> 16 != (uint32_t) st.st_gid >> 16) {
|
||||||
|
/* We only support containers where the uid/gid container ID match */
|
||||||
|
r = -EBADE;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to detect if the range is already right. Of course, this a pretty drastic optimization, as we assume
|
||||||
|
* that if the top-level dir has the right upper 16bit assigned, then everything below will have too... */
|
||||||
|
if (((uint32_t) (st.st_uid ^ shift) >> 16) == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return recurse_fd(fd, donate_fd, &st, shift, true);
|
||||||
|
|
||||||
|
finish:
|
||||||
|
if (donate_fd)
|
||||||
|
safe_close(fd);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fd_patch_uid(int fd, uid_t shift, uid_t range) {
|
||||||
|
return fd_patch_uid_internal(fd, false, shift, range);
|
||||||
|
}
|
||||||
|
|
||||||
|
int path_patch_uid(const char *path, uid_t shift, uid_t range) {
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
|
||||||
|
if (fd < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
return fd_patch_uid_internal(fd, true, shift, range);
|
||||||
|
}
|
23
src/nspawn/nspawn-patch-uid.h
Normal file
23
src/nspawn/nspawn-patch-uid.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/***
|
||||||
|
This file is part of systemd.
|
||||||
|
|
||||||
|
Copyright 2016 Lennart Poettering
|
||||||
|
|
||||||
|
systemd is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
systemd is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
***/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
int fd_patch_uid(int fd, uid_t shift, uid_t range);
|
||||||
|
int path_patch_uid(const char *path, uid_t shift, uid_t range);
|
@ -25,7 +25,9 @@
|
|||||||
#include "parse-util.h"
|
#include "parse-util.h"
|
||||||
#include "process-util.h"
|
#include "process-util.h"
|
||||||
#include "strv.h"
|
#include "strv.h"
|
||||||
|
#include "user-util.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
#include "string-util.h"
|
||||||
|
|
||||||
int settings_load(FILE *f, const char *path, Settings **ret) {
|
int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||||
_cleanup_(settings_freep) Settings *s = NULL;
|
_cleanup_(settings_freep) Settings *s = NULL;
|
||||||
@ -40,9 +42,13 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
|||||||
|
|
||||||
s->start_mode = _START_MODE_INVALID;
|
s->start_mode = _START_MODE_INVALID;
|
||||||
s->personality = PERSONALITY_INVALID;
|
s->personality = PERSONALITY_INVALID;
|
||||||
|
s->userns_mode = _USER_NAMESPACE_MODE_INVALID;
|
||||||
|
s->uid_shift = UID_INVALID;
|
||||||
|
s->uid_range = UID_INVALID;
|
||||||
|
|
||||||
s->read_only = -1;
|
s->read_only = -1;
|
||||||
s->volatile_mode = _VOLATILE_MODE_INVALID;
|
s->volatile_mode = _VOLATILE_MODE_INVALID;
|
||||||
|
s->userns_chown = -1;
|
||||||
|
|
||||||
s->private_network = -1;
|
s->private_network = -1;
|
||||||
s->network_veth = -1;
|
s->network_veth = -1;
|
||||||
@ -59,6 +65,16 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
/* Make sure that if userns_mode is set, userns_chown is set to something appropriate, and vice versa. Either
|
||||||
|
* both fields shall be initialized or neither. */
|
||||||
|
if (s->userns_mode == USER_NAMESPACE_PICK)
|
||||||
|
s->userns_chown = true;
|
||||||
|
else if (s->userns_mode != _USER_NAMESPACE_MODE_INVALID && s->userns_chown < 0)
|
||||||
|
s->userns_chown = false;
|
||||||
|
|
||||||
|
if (s->userns_chown >= 0 && s->userns_mode == _USER_NAMESPACE_MODE_INVALID)
|
||||||
|
s->userns_mode = USER_NAMESPACE_NO;
|
||||||
|
|
||||||
*ret = s;
|
*ret = s;
|
||||||
s = NULL;
|
s = NULL;
|
||||||
|
|
||||||
@ -392,3 +408,73 @@ conflict:
|
|||||||
log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
|
log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int config_parse_private_users(
|
||||||
|
const char *unit,
|
||||||
|
const char *filename,
|
||||||
|
unsigned line,
|
||||||
|
const char *section,
|
||||||
|
unsigned section_line,
|
||||||
|
const char *lvalue,
|
||||||
|
int ltype,
|
||||||
|
const char *rvalue,
|
||||||
|
void *data,
|
||||||
|
void *userdata) {
|
||||||
|
|
||||||
|
Settings *settings = data;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(filename);
|
||||||
|
assert(lvalue);
|
||||||
|
assert(rvalue);
|
||||||
|
|
||||||
|
r = parse_boolean(rvalue);
|
||||||
|
if (r == 0) {
|
||||||
|
/* no: User namespacing off */
|
||||||
|
settings->userns_mode = USER_NAMESPACE_NO;
|
||||||
|
settings->uid_shift = UID_INVALID;
|
||||||
|
settings->uid_range = UINT32_C(0x10000);
|
||||||
|
} else if (r > 0) {
|
||||||
|
/* yes: User namespacing on, UID range is read from root dir */
|
||||||
|
settings->userns_mode = USER_NAMESPACE_FIXED;
|
||||||
|
settings->uid_shift = UID_INVALID;
|
||||||
|
settings->uid_range = UINT32_C(0x10000);
|
||||||
|
} else if (streq(rvalue, "pick")) {
|
||||||
|
/* pick: User namespacing on, UID range is picked randomly */
|
||||||
|
settings->userns_mode = USER_NAMESPACE_PICK;
|
||||||
|
settings->uid_shift = UID_INVALID;
|
||||||
|
settings->uid_range = UINT32_C(0x10000);
|
||||||
|
} else {
|
||||||
|
const char *range, *shift;
|
||||||
|
uid_t sh, rn;
|
||||||
|
|
||||||
|
/* anything else: User namespacing on, UID range is explicitly configured */
|
||||||
|
|
||||||
|
range = strchr(rvalue, ':');
|
||||||
|
if (range) {
|
||||||
|
shift = strndupa(rvalue, range - rvalue);
|
||||||
|
range++;
|
||||||
|
|
||||||
|
r = safe_atou32(range, &rn);
|
||||||
|
if (r < 0 || rn <= 0) {
|
||||||
|
log_syntax(unit, LOG_ERR, filename, line, r, "UID/GID range invalid, ignoring: %s", range);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
shift = rvalue;
|
||||||
|
rn = UINT32_C(0x10000);
|
||||||
|
}
|
||||||
|
|
||||||
|
r = parse_uid(shift, &sh);
|
||||||
|
if (r < 0) {
|
||||||
|
log_syntax(unit, LOG_ERR, filename, line, r, "UID/GID shift invalid, ignoring: %s", range);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
settings->userns_mode = USER_NAMESPACE_FIXED;
|
||||||
|
settings->uid_shift = sh;
|
||||||
|
settings->uid_range = rn;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@ -33,6 +33,14 @@ typedef enum StartMode {
|
|||||||
_START_MODE_INVALID = -1
|
_START_MODE_INVALID = -1
|
||||||
} StartMode;
|
} StartMode;
|
||||||
|
|
||||||
|
typedef enum UserNamespaceMode {
|
||||||
|
USER_NAMESPACE_NO,
|
||||||
|
USER_NAMESPACE_FIXED,
|
||||||
|
USER_NAMESPACE_PICK,
|
||||||
|
_USER_NAMESPACE_MODE_MAX,
|
||||||
|
_USER_NAMESPACE_MODE_INVALID = -1,
|
||||||
|
} UserNamespaceMode;
|
||||||
|
|
||||||
typedef enum SettingsMask {
|
typedef enum SettingsMask {
|
||||||
SETTING_START_MODE = 1 << 0,
|
SETTING_START_MODE = 1 << 0,
|
||||||
SETTING_ENVIRONMENT = 1 << 1,
|
SETTING_ENVIRONMENT = 1 << 1,
|
||||||
@ -47,7 +55,8 @@ typedef enum SettingsMask {
|
|||||||
SETTING_VOLATILE_MODE = 1 << 10,
|
SETTING_VOLATILE_MODE = 1 << 10,
|
||||||
SETTING_CUSTOM_MOUNTS = 1 << 11,
|
SETTING_CUSTOM_MOUNTS = 1 << 11,
|
||||||
SETTING_WORKING_DIRECTORY = 1 << 12,
|
SETTING_WORKING_DIRECTORY = 1 << 12,
|
||||||
_SETTINGS_MASK_ALL = (1 << 13) -1
|
SETTING_USERNS = 1 << 13,
|
||||||
|
_SETTINGS_MASK_ALL = (1 << 14) -1
|
||||||
} SettingsMask;
|
} SettingsMask;
|
||||||
|
|
||||||
typedef struct Settings {
|
typedef struct Settings {
|
||||||
@ -62,12 +71,15 @@ typedef struct Settings {
|
|||||||
unsigned long personality;
|
unsigned long personality;
|
||||||
sd_id128_t machine_id;
|
sd_id128_t machine_id;
|
||||||
char *working_directory;
|
char *working_directory;
|
||||||
|
UserNamespaceMode userns_mode;
|
||||||
|
uid_t uid_shift, uid_range;
|
||||||
|
|
||||||
/* [Image] */
|
/* [Image] */
|
||||||
int read_only;
|
int read_only;
|
||||||
VolatileMode volatile_mode;
|
VolatileMode volatile_mode;
|
||||||
CustomMount *custom_mounts;
|
CustomMount *custom_mounts;
|
||||||
unsigned n_custom_mounts;
|
unsigned n_custom_mounts;
|
||||||
|
int userns_chown;
|
||||||
|
|
||||||
/* [Network] */
|
/* [Network] */
|
||||||
int private_network;
|
int private_network;
|
||||||
@ -99,3 +111,4 @@ int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, co
|
|||||||
int config_parse_veth_extra(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
int config_parse_veth_extra(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||||
int config_parse_boot(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
int config_parse_boot(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||||
int config_parse_pid2(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
int config_parse_pid2(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||||
|
int config_parse_private_users(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||||
|
@ -22,7 +22,9 @@
|
|||||||
#endif
|
#endif
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
#include <grp.h>
|
||||||
#include <linux/loop.h>
|
#include <linux/loop.h>
|
||||||
|
#include <pwd.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#ifdef HAVE_SECCOMP
|
#ifdef HAVE_SECCOMP
|
||||||
#include <seccomp.h>
|
#include <seccomp.h>
|
||||||
@ -75,6 +77,7 @@
|
|||||||
#include "nspawn-expose-ports.h"
|
#include "nspawn-expose-ports.h"
|
||||||
#include "nspawn-mount.h"
|
#include "nspawn-mount.h"
|
||||||
#include "nspawn-network.h"
|
#include "nspawn-network.h"
|
||||||
|
#include "nspawn-patch-uid.h"
|
||||||
#include "nspawn-register.h"
|
#include "nspawn-register.h"
|
||||||
#include "nspawn-settings.h"
|
#include "nspawn-settings.h"
|
||||||
#include "nspawn-setuid.h"
|
#include "nspawn-setuid.h"
|
||||||
@ -101,6 +104,11 @@
|
|||||||
#include "user-util.h"
|
#include "user-util.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
|
/* Note that devpts's gid= parameter parses GIDs as signed values, hence we stay away from the upper half of the 32bit
|
||||||
|
* UID range here */
|
||||||
|
#define UID_SHIFT_PICK_MIN ((uid_t) UINT32_C(0x00080000))
|
||||||
|
#define UID_SHIFT_PICK_MAX ((uid_t) UINT32_C(0x6FFF0000))
|
||||||
|
|
||||||
typedef enum ContainerStatus {
|
typedef enum ContainerStatus {
|
||||||
CONTAINER_TERMINATED,
|
CONTAINER_TERMINATED,
|
||||||
CONTAINER_REBOOTED
|
CONTAINER_REBOOTED
|
||||||
@ -173,8 +181,9 @@ static char *arg_image = NULL;
|
|||||||
static VolatileMode arg_volatile_mode = VOLATILE_NO;
|
static VolatileMode arg_volatile_mode = VOLATILE_NO;
|
||||||
static ExposePort *arg_expose_ports = NULL;
|
static ExposePort *arg_expose_ports = NULL;
|
||||||
static char **arg_property = NULL;
|
static char **arg_property = NULL;
|
||||||
|
static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO;
|
||||||
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
|
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
|
||||||
static bool arg_userns = false;
|
static bool arg_userns_chown = false;
|
||||||
static int arg_kill_signal = 0;
|
static int arg_kill_signal = 0;
|
||||||
static bool arg_unified_cgroup_hierarchy = false;
|
static bool arg_unified_cgroup_hierarchy = false;
|
||||||
static SettingsMask arg_settings_mask = 0;
|
static SettingsMask arg_settings_mask = 0;
|
||||||
@ -202,8 +211,10 @@ static void help(void) {
|
|||||||
" --uuid=UUID Set a specific machine UUID for the container\n"
|
" --uuid=UUID Set a specific machine UUID for the container\n"
|
||||||
" -S --slice=SLICE Place the container in the specified slice\n"
|
" -S --slice=SLICE Place the container in the specified slice\n"
|
||||||
" --property=NAME=VALUE Set scope unit property\n"
|
" --property=NAME=VALUE Set scope unit property\n"
|
||||||
|
" -U --private-users=pick Run within user namespace, pick UID/GID range automatically\n"
|
||||||
" --private-users[=UIDBASE[:NUIDS]]\n"
|
" --private-users[=UIDBASE[:NUIDS]]\n"
|
||||||
" Run within user namespace\n"
|
" Run within user namespace, user configured UID/GID range\n"
|
||||||
|
" --private-user-chown Adjust OS tree file ownership for private UID/GID range\n"
|
||||||
" --private-network Disable network in container\n"
|
" --private-network Disable network in container\n"
|
||||||
" --network-interface=INTERFACE\n"
|
" --network-interface=INTERFACE\n"
|
||||||
" Assign an existing network interface to the\n"
|
" Assign an existing network interface to the\n"
|
||||||
@ -272,10 +283,16 @@ static int custom_mounts_prepare(void) {
|
|||||||
for (i = 0; i < arg_n_custom_mounts; i++) {
|
for (i = 0; i < arg_n_custom_mounts; i++) {
|
||||||
CustomMount *m = &arg_custom_mounts[i];
|
CustomMount *m = &arg_custom_mounts[i];
|
||||||
|
|
||||||
if (arg_userns && arg_uid_shift == UID_INVALID && path_equal(m->destination, "/")) {
|
if (path_equal(m->destination, "/") && arg_userns_mode != USER_NAMESPACE_NO) {
|
||||||
|
|
||||||
|
if (arg_userns_chown) {
|
||||||
|
log_error("--private-users-chown may not be combined with custom root mounts.");
|
||||||
|
return -EINVAL;
|
||||||
|
} else if (arg_uid_shift == UID_INVALID) {
|
||||||
log_error("--private-users with automatic UID shift may not be combined with custom root mounts.");
|
log_error("--private-users with automatic UID shift may not be combined with custom root mounts.");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (m->type != CUSTOM_MOUNT_OVERLAY)
|
if (m->type != CUSTOM_MOUNT_OVERLAY)
|
||||||
continue;
|
continue;
|
||||||
@ -349,6 +366,7 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
ARG_KILL_SIGNAL,
|
ARG_KILL_SIGNAL,
|
||||||
ARG_SETTINGS,
|
ARG_SETTINGS,
|
||||||
ARG_CHDIR,
|
ARG_CHDIR,
|
||||||
|
ARG_PRIVATE_USERS_CHOWN,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct option options[] = {
|
static const struct option options[] = {
|
||||||
@ -392,6 +410,7 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
{ "port", required_argument, NULL, 'p' },
|
{ "port", required_argument, NULL, 'p' },
|
||||||
{ "property", required_argument, NULL, ARG_PROPERTY },
|
{ "property", required_argument, NULL, ARG_PROPERTY },
|
||||||
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
|
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
|
||||||
|
{ "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN},
|
||||||
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
|
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
|
||||||
{ "settings", required_argument, NULL, ARG_SETTINGS },
|
{ "settings", required_argument, NULL, ARG_SETTINGS },
|
||||||
{ "chdir", required_argument, NULL, ARG_CHDIR },
|
{ "chdir", required_argument, NULL, ARG_CHDIR },
|
||||||
@ -406,7 +425,7 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
assert(argc >= 0);
|
assert(argc >= 0);
|
||||||
assert(argv);
|
assert(argv);
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:n", options, NULL)) >= 0)
|
while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:nU", options, NULL)) >= 0)
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
|
||||||
@ -797,10 +816,29 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case ARG_PRIVATE_USERS:
|
case ARG_PRIVATE_USERS:
|
||||||
if (optarg) {
|
|
||||||
|
r = optarg ? parse_boolean(optarg) : 1;
|
||||||
|
if (r == 0) {
|
||||||
|
/* no: User namespacing off */
|
||||||
|
arg_userns_mode = USER_NAMESPACE_NO;
|
||||||
|
arg_uid_shift = UID_INVALID;
|
||||||
|
arg_uid_range = UINT32_C(0x10000);
|
||||||
|
} else if (r > 0) {
|
||||||
|
/* yes: User namespacing on, UID range is read from root dir */
|
||||||
|
arg_userns_mode = USER_NAMESPACE_FIXED;
|
||||||
|
arg_uid_shift = UID_INVALID;
|
||||||
|
arg_uid_range = UINT32_C(0x10000);
|
||||||
|
} else if (streq(optarg, "pick")) {
|
||||||
|
/* pick: User namespacing on, UID range is picked randomly */
|
||||||
|
arg_userns_mode = USER_NAMESPACE_PICK;
|
||||||
|
arg_uid_shift = UID_INVALID;
|
||||||
|
arg_uid_range = UINT32_C(0x10000);
|
||||||
|
} else {
|
||||||
_cleanup_free_ char *buffer = NULL;
|
_cleanup_free_ char *buffer = NULL;
|
||||||
const char *range, *shift;
|
const char *range, *shift;
|
||||||
|
|
||||||
|
/* anything else: User namespacing on, UID range is explicitly configured */
|
||||||
|
|
||||||
range = strchr(optarg, ':');
|
range = strchr(optarg, ':');
|
||||||
if (range) {
|
if (range) {
|
||||||
buffer = strndup(optarg, range - optarg);
|
buffer = strndup(optarg, range - optarg);
|
||||||
@ -820,9 +858,28 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
log_error("Failed to parse UID: %s", optarg);
|
log_error("Failed to parse UID: %s", optarg);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
arg_userns_mode = USER_NAMESPACE_FIXED;
|
||||||
}
|
}
|
||||||
|
|
||||||
arg_userns = true;
|
arg_settings_mask |= SETTING_USERNS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
if (userns_supported()) {
|
||||||
|
arg_userns_mode = USER_NAMESPACE_PICK;
|
||||||
|
arg_uid_shift = UID_INVALID;
|
||||||
|
arg_uid_range = UINT32_C(0x10000);
|
||||||
|
|
||||||
|
arg_settings_mask |= SETTING_USERNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ARG_PRIVATE_USERS_CHOWN:
|
||||||
|
arg_userns_chown = true;
|
||||||
|
|
||||||
|
arg_settings_mask |= SETTING_USERNS;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ARG_KILL_SIGNAL:
|
case ARG_KILL_SIGNAL:
|
||||||
@ -893,6 +950,9 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
if (arg_share_system)
|
if (arg_share_system)
|
||||||
arg_register = false;
|
arg_register = false;
|
||||||
|
|
||||||
|
if (arg_userns_mode == USER_NAMESPACE_PICK)
|
||||||
|
arg_userns_chown = true;
|
||||||
|
|
||||||
if (arg_start_mode != START_PID1 && arg_share_system) {
|
if (arg_start_mode != START_PID1 && arg_share_system) {
|
||||||
log_error("--boot and --share-system may not be combined.");
|
log_error("--boot and --share-system may not be combined.");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -933,8 +993,15 @@ static int parse_argv(int argc, char *argv[]) {
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arg_userns && access("/proc/self/uid_map", F_OK) < 0)
|
if (arg_userns_mode != USER_NAMESPACE_NO && !userns_supported()) {
|
||||||
return log_error_errno(EOPNOTSUPP, "--private-users= is not supported, kernel compiled without user namespace support.");
|
log_error("--private-users= is not supported, kernel compiled without user namespace support.");
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg_userns_chown && arg_read_only) {
|
||||||
|
log_error("--read-only and --private-users-chown may not be combined.");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
if (argc > optind) {
|
if (argc > optind) {
|
||||||
arg_parameters = strv_copy(argv + optind);
|
arg_parameters = strv_copy(argv + optind);
|
||||||
@ -993,7 +1060,7 @@ static int verify_arguments(void) {
|
|||||||
static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
|
static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
|
||||||
assert(p);
|
assert(p);
|
||||||
|
|
||||||
if (!arg_userns)
|
if (arg_userns_mode == USER_NAMESPACE_NO)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (uid == UID_INVALID && gid == GID_INVALID)
|
if (uid == UID_INVALID && gid == GID_INVALID)
|
||||||
@ -2218,6 +2285,29 @@ static int setup_machine_id(const char *directory) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int recursive_chown(const char *directory, uid_t shift, uid_t range) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(directory);
|
||||||
|
|
||||||
|
if (arg_userns_mode == USER_NAMESPACE_NO || !arg_userns_chown)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
r = path_patch_uid(directory, arg_uid_shift, arg_uid_range);
|
||||||
|
if (r == -EOPNOTSUPP)
|
||||||
|
return log_error_errno(r, "Automatic UID/GID adjusting is only supported for UID/GID ranges starting at multiples of 2^16 with a range of 2^16.");
|
||||||
|
if (r == -EBADE)
|
||||||
|
return log_error_errno(r, "Upper 16 bits of root directory UID and GID do not match.");
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to adjust UID/GID shift of OS tree: %m");
|
||||||
|
if (r == 0)
|
||||||
|
log_debug("Root directory of image is already owned by the right UID/GID range, skipping recursive chown operation.");
|
||||||
|
else
|
||||||
|
log_debug("Patched directory tree to match UID/GID range.");
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int mount_devices(
|
static int mount_devices(
|
||||||
const char *where,
|
const char *where,
|
||||||
const char *root_device, bool root_device_rw,
|
const char *root_device, bool root_device_rw,
|
||||||
@ -2435,7 +2525,7 @@ static int determine_names(void) {
|
|||||||
static int determine_uid_shift(const char *directory) {
|
static int determine_uid_shift(const char *directory) {
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (!arg_userns) {
|
if (arg_userns_mode == USER_NAMESPACE_NO) {
|
||||||
arg_uid_shift = 0;
|
arg_uid_shift = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2462,7 +2552,6 @@ static int determine_uid_shift(const char *directory) {
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_info("Using user namespaces with base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2499,7 +2588,7 @@ static int inner_child(
|
|||||||
|
|
||||||
cg_unified_flush();
|
cg_unified_flush();
|
||||||
|
|
||||||
if (arg_userns) {
|
if (arg_userns_mode != USER_NAMESPACE_NO) {
|
||||||
/* Tell the parent, that it now can write the UID map. */
|
/* Tell the parent, that it now can write the UID map. */
|
||||||
(void) barrier_place(barrier); /* #1 */
|
(void) barrier_place(barrier); /* #1 */
|
||||||
|
|
||||||
@ -2510,7 +2599,14 @@ static int inner_child(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_private_network, arg_uid_range, arg_selinux_apifs_context);
|
r = mount_all(NULL,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
true,
|
||||||
|
arg_private_network,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_apifs_context);
|
||||||
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
@ -2749,7 +2845,8 @@ static int outer_child(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
if (arg_userns) {
|
if (arg_userns_mode != USER_NAMESPACE_NO) {
|
||||||
|
/* Let the parent know which UID shift we read from the image */
|
||||||
l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL);
|
l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
return log_error_errno(errno, "Failed to send UID shift: %m");
|
return log_error_errno(errno, "Failed to send UID shift: %m");
|
||||||
@ -2757,17 +2854,49 @@ static int outer_child(
|
|||||||
log_error("Short write while sending UID shift.");
|
log_error("Short write while sending UID shift.");
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (arg_userns_mode == USER_NAMESPACE_PICK) {
|
||||||
|
/* When we are supposed to pick the UID shift, the parent will check now whether the UID shift
|
||||||
|
* we just read from the image is available. If yes, it will send the UID shift back to us, if
|
||||||
|
* not it will pick a different one, and send it back to us. */
|
||||||
|
|
||||||
|
l = recv(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), 0);
|
||||||
|
if (l < 0)
|
||||||
|
return log_error_errno(errno, "Failed to recv UID shift: %m");
|
||||||
|
if (l != sizeof(arg_uid_shift)) {
|
||||||
|
log_error("Short read while recieving UID shift.");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info("Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Turn directory into bind mount */
|
/* Turn directory into bind mount */
|
||||||
if (mount(directory, directory, NULL, MS_BIND|MS_REC, NULL) < 0)
|
if (mount(directory, directory, NULL, MS_BIND|MS_REC, NULL) < 0)
|
||||||
return log_error_errno(errno, "Failed to make bind mount: %m");
|
return log_error_errno(errno, "Failed to make bind mount: %m");
|
||||||
|
|
||||||
r = setup_volatile(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context);
|
r = recursive_chown(directory, arg_uid_shift, arg_uid_range);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
r = setup_volatile_state(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context);
|
r = setup_volatile(
|
||||||
|
directory,
|
||||||
|
arg_volatile_mode,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_context);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = setup_volatile_state(
|
||||||
|
directory,
|
||||||
|
arg_volatile_mode,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_context);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
@ -2781,7 +2910,13 @@ static int outer_child(
|
|||||||
return log_error_errno(r, "Failed to make tree read-only: %m");
|
return log_error_errno(r, "Failed to make tree read-only: %m");
|
||||||
}
|
}
|
||||||
|
|
||||||
r = mount_all(directory, arg_userns, false, arg_private_network, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
|
r = mount_all(directory,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
false,
|
||||||
|
arg_private_network,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_apifs_context);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
@ -2823,11 +2958,24 @@ static int outer_child(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
r = mount_custom(directory, arg_custom_mounts, arg_n_custom_mounts, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
|
r = mount_custom(
|
||||||
|
directory,
|
||||||
|
arg_custom_mounts,
|
||||||
|
arg_n_custom_mounts,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_apifs_context);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
r = mount_cgroups(directory, arg_unified_cgroup_hierarchy, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
|
r = mount_cgroups(
|
||||||
|
directory,
|
||||||
|
arg_unified_cgroup_hierarchy,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_apifs_context);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
@ -2838,7 +2986,7 @@ static int outer_child(
|
|||||||
pid = raw_clone(SIGCHLD|CLONE_NEWNS|
|
pid = raw_clone(SIGCHLD|CLONE_NEWNS|
|
||||||
(arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS) |
|
(arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS) |
|
||||||
(arg_private_network ? CLONE_NEWNET : 0) |
|
(arg_private_network ? CLONE_NEWNET : 0) |
|
||||||
(arg_userns ? CLONE_NEWUSER : 0),
|
(arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0),
|
||||||
NULL);
|
NULL);
|
||||||
if (pid < 0)
|
if (pid < 0)
|
||||||
return log_error_errno(errno, "Failed to fork inner child: %m");
|
return log_error_errno(errno, "Failed to fork inner child: %m");
|
||||||
@ -2882,6 +3030,61 @@ static int outer_child(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int uid_shift_pick(uid_t *shift, LockFile *ret_lock_file) {
|
||||||
|
unsigned n_tries = 100;
|
||||||
|
uid_t candidate;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(shift);
|
||||||
|
assert(ret_lock_file);
|
||||||
|
assert(arg_userns_mode == USER_NAMESPACE_PICK);
|
||||||
|
assert(arg_uid_range == 0x10000U);
|
||||||
|
|
||||||
|
candidate = *shift;
|
||||||
|
|
||||||
|
(void) mkdir("/run/systemd/nspawn-uid", 0755);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
char lock_path[strlen("/run/systemd/nspawn-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
|
||||||
|
_cleanup_release_lock_file_ LockFile lf = LOCK_FILE_INIT;
|
||||||
|
|
||||||
|
if (--n_tries <= 0)
|
||||||
|
return -EBUSY;
|
||||||
|
|
||||||
|
if (candidate < UID_SHIFT_PICK_MIN || candidate > UID_SHIFT_PICK_MAX)
|
||||||
|
goto next;
|
||||||
|
if ((candidate & UINT32_C(0xFFFF)) != 0)
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
xsprintf(lock_path, "/run/systemd/nspawn-uid/" UID_FMT, candidate);
|
||||||
|
r = make_lock_file(lock_path, LOCK_EX|LOCK_NB, &lf);
|
||||||
|
if (r == -EBUSY) /* Range already taken by another nspawn instance */
|
||||||
|
goto next;
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
/* Make some superficial checks whether the range is currently known in the user database */
|
||||||
|
if (getpwuid(candidate))
|
||||||
|
goto next;
|
||||||
|
if (getpwuid(candidate + UINT32_C(0xFFFE)))
|
||||||
|
goto next;
|
||||||
|
if (getgrgid(candidate))
|
||||||
|
goto next;
|
||||||
|
if (getgrgid(candidate + UINT32_C(0xFFFE)))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
*ret_lock_file = lf;
|
||||||
|
lf = (struct LockFile) LOCK_FILE_INIT;
|
||||||
|
*shift = candidate;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
next:
|
||||||
|
random_bytes(&candidate, sizeof(candidate));
|
||||||
|
candidate = (candidate % (UID_SHIFT_PICK_MAX - UID_SHIFT_PICK_MIN)) + UID_SHIFT_PICK_MIN;
|
||||||
|
candidate &= (uid_t) UINT32_C(0xFFFF0000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int setup_uid_map(pid_t pid) {
|
static int setup_uid_map(pid_t pid) {
|
||||||
char uid_map[strlen("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
|
char uid_map[strlen("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
|
||||||
int r;
|
int r;
|
||||||
@ -3113,6 +3316,19 @@ static int load_settings(void) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((arg_settings_mask & SETTING_USERNS) == 0 &&
|
||||||
|
settings->userns_mode != _USER_NAMESPACE_MODE_INVALID) {
|
||||||
|
|
||||||
|
if (!arg_settings_trusted)
|
||||||
|
log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", p);
|
||||||
|
else {
|
||||||
|
arg_userns_mode = settings->userns_mode;
|
||||||
|
arg_uid_shift = settings->uid_shift;
|
||||||
|
arg_uid_range = settings->uid_range;
|
||||||
|
arg_userns_chown = settings->userns_chown;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3351,20 +3567,42 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
_cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 }, rtnl_socket_pair[2] = { -1, -1 },
|
|
||||||
pid_socket_pair[2] = { -1, -1 }, uuid_socket_pair[2] = { -1, -1 }, uid_shift_socket_pair[2] = { -1, -1 };
|
|
||||||
ContainerStatus container_status;
|
|
||||||
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
|
|
||||||
static const struct sigaction sa = {
|
static const struct sigaction sa = {
|
||||||
.sa_handler = nop_signal_handler,
|
.sa_handler = nop_signal_handler,
|
||||||
.sa_flags = SA_NOCLDSTOP,
|
.sa_flags = SA_NOCLDSTOP,
|
||||||
};
|
};
|
||||||
int ifi = 0;
|
|
||||||
ssize_t l;
|
_cleanup_release_lock_file_ LockFile uid_shift_lock = LOCK_FILE_INIT;
|
||||||
|
_cleanup_close_ int etc_passwd_lock = -1;
|
||||||
|
_cleanup_close_pair_ int
|
||||||
|
kmsg_socket_pair[2] = { -1, -1 },
|
||||||
|
rtnl_socket_pair[2] = { -1, -1 },
|
||||||
|
pid_socket_pair[2] = { -1, -1 },
|
||||||
|
uuid_socket_pair[2] = { -1, -1 },
|
||||||
|
uid_shift_socket_pair[2] = { -1, -1 };
|
||||||
|
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
|
||||||
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
|
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
|
||||||
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
|
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
|
||||||
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
||||||
|
ContainerStatus container_status;
|
||||||
char last_char = 0;
|
char last_char = 0;
|
||||||
|
int ifi = 0;
|
||||||
|
ssize_t l;
|
||||||
|
|
||||||
|
if (arg_userns_mode == USER_NAMESPACE_PICK) {
|
||||||
|
/* When we shall pick the UID/GID range, let's first lock /etc/passwd, so that we can safely
|
||||||
|
* check with getpwuid() if the specific user already exists. Note that /etc might be
|
||||||
|
* read-only, in which case this will fail with EROFS. But that's really OK, as in that case we
|
||||||
|
* can be reasonably sure that no users are going to be added. Note that getpwuid() checks are
|
||||||
|
* really just an extra safety net. We kinda assume that the UID range we allocate from is
|
||||||
|
* really ours. */
|
||||||
|
|
||||||
|
etc_passwd_lock = take_etc_passwd_lock(NULL);
|
||||||
|
if (etc_passwd_lock < 0 && etc_passwd_lock != -EROFS) {
|
||||||
|
log_error_errno(r, "Failed to take /etc/passwd lock: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
r = barrier_create(&barrier);
|
r = barrier_create(&barrier);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
@ -3392,7 +3630,7 @@ int main(int argc, char *argv[]) {
|
|||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arg_userns)
|
if (arg_userns_mode != USER_NAMESPACE_NO)
|
||||||
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0) {
|
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0) {
|
||||||
r = log_error_errno(errno, "Failed to create uid shift socket pair: %m");
|
r = log_error_errno(errno, "Failed to create uid shift socket pair: %m");
|
||||||
goto finish;
|
goto finish;
|
||||||
@ -3468,6 +3706,43 @@ int main(int argc, char *argv[]) {
|
|||||||
uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
|
uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
|
||||||
uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
|
uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
|
||||||
|
|
||||||
|
if (arg_userns_mode != USER_NAMESPACE_NO) {
|
||||||
|
/* The child just let us know the UID shift it might have read from the image. */
|
||||||
|
l = recv(uid_shift_socket_pair[0], &arg_uid_shift, sizeof(arg_uid_shift), 0);
|
||||||
|
if (l < 0) {
|
||||||
|
r = log_error_errno(errno, "Failed to read UID shift: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
if (l != sizeof(arg_uid_shift)) {
|
||||||
|
log_error("Short read while reading UID shift.");
|
||||||
|
r = EIO;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg_userns_mode == USER_NAMESPACE_PICK) {
|
||||||
|
/* If we are supposed to pick the UID shift, let's try to use the shift read from the
|
||||||
|
* image, but if that's already in use, pick a new one, and report back to the child,
|
||||||
|
* which one we now picked. */
|
||||||
|
|
||||||
|
r = uid_shift_pick(&arg_uid_shift, &uid_shift_lock);
|
||||||
|
if (r < 0) {
|
||||||
|
log_error_errno(r, "Failed to pick suitable UID/GID range: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
l = send(uid_shift_socket_pair[0], &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL);
|
||||||
|
if (l < 0) {
|
||||||
|
r = log_error_errno(errno, "Failed to send UID shift: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
if (l != sizeof(arg_uid_shift)) {
|
||||||
|
log_error("Short write while writing UID shift.");
|
||||||
|
r = -EIO;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Wait for the outer child. */
|
/* Wait for the outer child. */
|
||||||
r = wait_for_terminate_and_warn("namespace helper", pid, NULL);
|
r = wait_for_terminate_and_warn("namespace helper", pid, NULL);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
@ -3504,24 +3779,13 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
log_debug("Init process invoked as PID " PID_FMT, pid);
|
log_debug("Init process invoked as PID " PID_FMT, pid);
|
||||||
|
|
||||||
if (arg_userns) {
|
if (arg_userns_mode != USER_NAMESPACE_NO) {
|
||||||
if (!barrier_place_and_sync(&barrier)) { /* #1 */
|
if (!barrier_place_and_sync(&barrier)) { /* #1 */
|
||||||
log_error("Child died too early.");
|
log_error("Child died too early.");
|
||||||
r = -ESRCH;
|
r = -ESRCH;
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
l = recv(uid_shift_socket_pair[0], &arg_uid_shift, sizeof(arg_uid_shift), 0);
|
|
||||||
if (l < 0) {
|
|
||||||
r = log_error_errno(errno, "Failed to read UID shift: %m");
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
if (l != sizeof(arg_uid_shift)) {
|
|
||||||
log_error("Short read while reading UID shift.");
|
|
||||||
r = EIO;
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = setup_uid_map(pid);
|
r = setup_uid_map(pid);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
goto finish;
|
||||||
@ -3619,6 +3883,10 @@ int main(int argc, char *argv[]) {
|
|||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* At this point we have made use of the UID we picked, and thus nss-mymachines will make them appear
|
||||||
|
* in getpwuid(), thus we can release the /etc/passwd lock. */
|
||||||
|
etc_passwd_lock = safe_close(etc_passwd_lock);
|
||||||
|
|
||||||
sd_notifyf(false,
|
sd_notifyf(false,
|
||||||
"READY=1\n"
|
"READY=1\n"
|
||||||
"STATUS=Container running.\n"
|
"STATUS=Container running.\n"
|
||||||
|
61
src/nspawn/test-patch-uid.c
Normal file
61
src/nspawn/test-patch-uid.c
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/***
|
||||||
|
This file is part of systemd.
|
||||||
|
|
||||||
|
Copyright 2016 Lennart Poettering
|
||||||
|
|
||||||
|
systemd is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
systemd is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
***/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "log.h"
|
||||||
|
#include "nspawn-patch-uid.h"
|
||||||
|
#include "user-util.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
uid_t shift, range;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
log_set_max_level(LOG_DEBUG);
|
||||||
|
log_parse_environment();
|
||||||
|
log_open();
|
||||||
|
|
||||||
|
if (argc != 4) {
|
||||||
|
log_error("Expected PATH SHIFT RANGE parameters.");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = parse_uid(argv[2], &shift);
|
||||||
|
if (r < 0) {
|
||||||
|
log_error_errno(r, "Failed to parse UID shift %s.", argv[2]);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = parse_gid(argv[3], &range);
|
||||||
|
if (r < 0) {
|
||||||
|
log_error_errno(r, "Failed to parse UID range %s.", argv[3]);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = path_patch_uid(argv[1], shift, range);
|
||||||
|
if (r < 0) {
|
||||||
|
log_error_errno(r, "Failed to patch directory tree: %m");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info("Changed: %s", yes_no(r));
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
@ -13,7 +13,7 @@ Before=machines.target
|
|||||||
After=network.target
|
After=network.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
ExecStart=@bindir@/systemd-nspawn --quiet --keep-unit --boot --link-journal=try-guest --network-veth --settings=override --machine=%i
|
ExecStart=@bindir@/systemd-nspawn --quiet --keep-unit --boot --link-journal=try-guest --network-veth -U --settings=override --machine=%i
|
||||||
KillMode=mixed
|
KillMode=mixed
|
||||||
Type=notify
|
Type=notify
|
||||||
RestartForceExitStatus=133
|
RestartForceExitStatus=133
|
||||||
|
Loading…
Reference in New Issue
Block a user