mirror of
https://github.com/systemd/systemd.git
synced 2025-03-08 08:58:27 +03:00
core: do not imply PrivateTmp with DynamicUser, create a private tmpfs instead
DynamicUser= enables PrivateTmp= implicitly to avoid files owned by reusable uids leaking into the host. Change it to instead create a fully private tmpfs instance instead, which also ensures the same result, since it has less impactful semantics with respect to PrivateTmp=yes, which links the mount namespace to the host's /tmp instead. If a user specifies PrivateTmp manually, let the existing behaviour unchanged to ensure backward compatibility is not broken.
This commit is contained in:
parent
a2979bb842
commit
0e551b04ef
@ -671,12 +671,13 @@
|
||||
part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by
|
||||
these users/groups around, as a different unit might get the same UID/GID assigned later on, and thus
|
||||
gain access to these files or directories. If <varname>DynamicUser=</varname> is enabled,
|
||||
<varname>RemoveIPC=</varname> and <varname>PrivateTmp=</varname> are implied (and cannot be turned
|
||||
off). This ensures that the lifetime of IPC objects and temporary files created by the executed
|
||||
processes is bound to the runtime of the service, and hence the lifetime of the dynamic
|
||||
user/group. Since <filename>/tmp/</filename> and <filename>/var/tmp/</filename> are usually the only
|
||||
world-writable directories on a system this ensures that a unit making use of dynamic user/group
|
||||
allocation cannot leave files around after unit termination. Furthermore
|
||||
<varname>RemoveIPC=</varname> is implied (and cannot be turned off). This ensures that the lifetime
|
||||
of IPC objects and temporary files created by the executed processes is bound to the runtime of the
|
||||
service, and hence the lifetime of the dynamic user/group. Since <filename>/tmp/</filename> and
|
||||
<filename>/var/tmp/</filename> are usually the only world-writable directories on a system, unless
|
||||
<varname>PrivateTmp=</varname> is manually enabled, those directories will be placed on a private
|
||||
tmpfs filesystem, as this ensures that a unit making use of dynamic user/group allocation cannot
|
||||
leave files around after unit termination. Furthermore
|
||||
<varname>NoNewPrivileges=</varname> and <varname>RestrictSUIDSGID=</varname> are implicitly enabled
|
||||
(and cannot be disabled), to ensure that processes invoked cannot take benefit or create SUID/SGID
|
||||
files or directories. Moreover <varname>ProtectSystem=strict</varname> and
|
||||
|
@ -1055,7 +1055,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("NoExecPaths", "as", NULL, offsetof(ExecContext, no_exec_paths), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ExecSearchPath", "as", NULL, offsetof(ExecContext, exec_search_path), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_propagation_flag), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_private_tmp, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProtectClock", "b", bus_property_get_bool, offsetof(ExecContext, protect_clock), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
@ -1736,7 +1736,7 @@ int bus_exec_context_set_transient_property(
|
||||
return bus_set_transient_unsigned(u, name, &c->tty_cols, message, flags, error);
|
||||
|
||||
if (streq(name, "PrivateTmp"))
|
||||
return bus_set_transient_bool(u, name, &c->private_tmp, message, flags, error);
|
||||
return bus_set_transient_private_tmp(u, name, &c->private_tmp, message, flags, error);
|
||||
|
||||
if (streq(name, "PrivateDevices"))
|
||||
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
|
||||
|
@ -150,6 +150,45 @@ int bus_set_transient_usec_internal(
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bus_set_transient_private_tmp(
|
||||
Unit *u,
|
||||
const char *name,
|
||||
PrivateTmp *p,
|
||||
sd_bus_message *message,
|
||||
UnitWriteFlags flags,
|
||||
sd_bus_error *error) {
|
||||
|
||||
int v, r;
|
||||
|
||||
assert(p);
|
||||
|
||||
r = sd_bus_message_read(message, "b", &v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
|
||||
*p = v ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
|
||||
unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bus_property_get_private_tmp(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
PrivateTmp *p = ASSERT_PTR(userdata);
|
||||
int b = *p != PRIVATE_TMP_OFF;
|
||||
|
||||
return sd_bus_message_append_basic(reply, 'b', &b);
|
||||
}
|
||||
|
||||
int bus_verify_manage_units_async_full(
|
||||
Unit *u,
|
||||
const char *verb,
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "sd-bus.h"
|
||||
|
||||
#include "dissect-image.h"
|
||||
#include "execute.h"
|
||||
#include "unit.h"
|
||||
|
||||
int bus_property_get_triggered_unit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
|
||||
@ -244,6 +245,7 @@ int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message
|
||||
int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
|
||||
int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
|
||||
int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
|
||||
int bus_set_transient_private_tmp(Unit *u, const char *name, PrivateTmp *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
|
||||
static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
|
||||
return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
|
||||
}
|
||||
@ -255,3 +257,4 @@ int bus_verify_manage_units_async_full(Unit *u, const char *verb, const char *po
|
||||
int bus_read_mount_options(sd_bus_message *message, sd_bus_error *error, MountOptions **ret_options, char **ret_format_str, const char *separator);
|
||||
|
||||
int bus_property_get_activation_details(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
|
||||
int bus_property_get_private_tmp(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
|
||||
|
@ -3054,7 +3054,7 @@ static int apply_mount_namespace(
|
||||
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
|
||||
**read_write_paths_cleanup = NULL;
|
||||
_cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
|
||||
*extension_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
|
||||
*private_namespace_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
|
||||
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
|
||||
char **read_write_paths;
|
||||
bool setup_os_release_symlink;
|
||||
@ -3108,7 +3108,7 @@ static int apply_mount_namespace(
|
||||
* to world users. Inside of it there's a /tmp that is sticky, and that's the one we want to
|
||||
* use here. This does not apply when we are using /run/systemd/empty as fallback. */
|
||||
|
||||
if (context->private_tmp && runtime && runtime->shared) {
|
||||
if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared) {
|
||||
if (streq_ptr(runtime->shared->tmp_dir, RUN_SYSTEMD_EMPTY))
|
||||
tmp_dir = runtime->shared->tmp_dir;
|
||||
else if (runtime->shared->tmp_dir)
|
||||
@ -3145,8 +3145,8 @@ static int apply_mount_namespace(
|
||||
if (!incoming_dir)
|
||||
return -ENOMEM;
|
||||
|
||||
extension_dir = strdup("/run/systemd/unit-extensions");
|
||||
if (!extension_dir)
|
||||
private_namespace_dir = strdup("/run/systemd");
|
||||
if (!private_namespace_dir)
|
||||
return -ENOMEM;
|
||||
|
||||
/* If running under a different root filesystem, propagate the host's os-release. We make a
|
||||
@ -3159,7 +3159,7 @@ static int apply_mount_namespace(
|
||||
} else {
|
||||
assert(params->runtime_scope == RUNTIME_SCOPE_USER);
|
||||
|
||||
if (asprintf(&extension_dir, "/run/user/" UID_FMT "/systemd/unit-extensions", geteuid()) < 0)
|
||||
if (asprintf(&private_namespace_dir, "/run/user/" UID_FMT "/systemd", geteuid()) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
if (setup_os_release_symlink) {
|
||||
@ -3205,6 +3205,8 @@ static int apply_mount_namespace(
|
||||
.temporary_filesystems = context->temporary_filesystems,
|
||||
.n_temporary_filesystems = context->n_temporary_filesystems,
|
||||
|
||||
.private_tmp = context->private_tmp,
|
||||
|
||||
.mount_images = context->mount_images,
|
||||
.n_mount_images = context->n_mount_images,
|
||||
.mount_image_policy = context->mount_image_policy ?: &image_policy_service,
|
||||
@ -3225,7 +3227,7 @@ static int apply_mount_namespace(
|
||||
|
||||
.propagate_dir = propagate_dir,
|
||||
.incoming_dir = incoming_dir,
|
||||
.extension_dir = extension_dir,
|
||||
.private_namespace_dir = private_namespace_dir,
|
||||
.notify_socket = root_dir || root_image ? params->notify_socket : NULL,
|
||||
.host_os_release_stage = host_os_release_stage,
|
||||
|
||||
@ -3857,7 +3859,7 @@ static bool exec_context_need_unprivileged_private_users(
|
||||
return false;
|
||||
|
||||
return context->private_users ||
|
||||
context->private_tmp ||
|
||||
context->private_tmp != PRIVATE_TMP_OFF ||
|
||||
context->private_devices ||
|
||||
context->private_network ||
|
||||
context->network_namespace_path ||
|
||||
|
@ -1840,7 +1840,7 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = serialize_bool_elide(f, "exec-context-private-tmp", c->private_tmp);
|
||||
r = serialize_item(f, "exec-context-private-tmp", private_tmp_to_string(c->private_tmp));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -2720,10 +2720,9 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else if ((val = startswith(l, "exec-context-private-tmp="))) {
|
||||
r = parse_boolean(val);
|
||||
if (r < 0)
|
||||
return r;
|
||||
c->private_tmp = r;
|
||||
c->private_tmp = private_tmp_from_string(val);
|
||||
if (c->private_tmp < 0)
|
||||
return c->private_tmp;
|
||||
} else if ((val = startswith(l, "exec-context-private-devices="))) {
|
||||
r = parse_boolean(val);
|
||||
if (r < 0)
|
||||
|
@ -231,7 +231,10 @@ bool exec_needs_mount_namespace(
|
||||
if (!IN_SET(context->mount_propagation_flag, 0, MS_SHARED))
|
||||
return true;
|
||||
|
||||
if (context->private_tmp && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
|
||||
if (context->private_tmp == PRIVATE_TMP_DISCONNECTED)
|
||||
return true;
|
||||
|
||||
if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
|
||||
return true;
|
||||
|
||||
if (context->private_devices ||
|
||||
@ -964,7 +967,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
|
||||
prefix, empty_to_root(c->root_directory),
|
||||
prefix, yes_no(c->root_ephemeral),
|
||||
prefix, yes_no(c->non_blocking),
|
||||
prefix, yes_no(c->private_tmp),
|
||||
prefix, private_tmp_to_string(c->private_tmp),
|
||||
prefix, yes_no(c->private_devices),
|
||||
prefix, yes_no(c->protect_kernel_tunables),
|
||||
prefix, yes_no(c->protect_kernel_modules),
|
||||
@ -2155,12 +2158,12 @@ static int exec_shared_runtime_make(
|
||||
assert(id);
|
||||
|
||||
/* It is not necessary to create ExecSharedRuntime object. */
|
||||
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) {
|
||||
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && c->private_tmp != PRIVATE_TMP_CONNECTED) {
|
||||
*ret = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (c->private_tmp &&
|
||||
if (c->private_tmp == PRIVATE_TMP_CONNECTED &&
|
||||
!(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
|
||||
(prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
|
||||
prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
|
||||
|
@ -314,7 +314,7 @@ struct ExecContext {
|
||||
int private_mounts;
|
||||
int mount_apivfs;
|
||||
int memory_ksm;
|
||||
bool private_tmp;
|
||||
PrivateTmp private_tmp;
|
||||
bool private_network;
|
||||
bool private_devices;
|
||||
bool private_users;
|
||||
|
@ -119,7 +119,7 @@
|
||||
{{type}}.BindPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.PrivateTmp, config_parse_bool, 0, offsetof({{type}}, exec_context.private_tmp)
|
||||
{{type}}.PrivateTmp, config_parse_private_tmp, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.PrivateDevices, config_parse_bool, 0, offsetof({{type}}, exec_context.private_devices)
|
||||
{{type}}.ProtectKernelTunables, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_tunables)
|
||||
{{type}}.ProtectKernelModules, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_modules)
|
||||
|
@ -5199,6 +5199,34 @@ int config_parse_temporary_filesystems(
|
||||
}
|
||||
}
|
||||
|
||||
int config_parse_private_tmp(
|
||||
const char* unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
ExecContext *c = ASSERT_PTR(data);
|
||||
int r;
|
||||
|
||||
assert(filename);
|
||||
assert(rvalue);
|
||||
|
||||
r = parse_boolean(rvalue);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse boolean value: %s ignoring", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
c->private_tmp = r ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_bind_paths(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
|
@ -113,6 +113,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_import_credential);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_private_tmp);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);
|
||||
|
@ -72,6 +72,7 @@ typedef enum MountMode {
|
||||
MOUNT_EXEC,
|
||||
MOUNT_TMPFS,
|
||||
MOUNT_RUN,
|
||||
MOUNT_PRIVATE_TMPFS, /* Mounted outside the root directory, and used by subsequent mounts */
|
||||
MOUNT_EXTENSION_DIRECTORY, /* Bind-mounted outside the root directory, and used by subsequent mounts */
|
||||
MOUNT_EXTENSION_IMAGE, /* Mounted outside the root directory, and used by subsequent mounts */
|
||||
MOUNT_MQUEUEFS,
|
||||
@ -97,6 +98,8 @@ typedef struct MountEntry {
|
||||
bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
|
||||
bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */
|
||||
bool exec:1; /* Shall clear MS_NOEXEC on the mount itself */
|
||||
bool create_source_dir:1; /* Create the source directory if it doesn't exist - for implicit bind mounts */
|
||||
mode_t source_dir_mode; /* Mode for the source directory, if it is to be created */
|
||||
MountEntryState state; /* Whether it was already processed or skipped */
|
||||
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
|
||||
const char *unprefixed_path_const; /* If the path was amended with a prefix, these will save the original */
|
||||
@ -246,6 +249,7 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
|
||||
[MOUNT_EXEC] = "exec",
|
||||
[MOUNT_TMPFS] = "tmpfs",
|
||||
[MOUNT_RUN] = "run",
|
||||
[MOUNT_PRIVATE_TMPFS] = "private-tmpfs",
|
||||
[MOUNT_EXTENSION_DIRECTORY] = "extension-directory",
|
||||
[MOUNT_EXTENSION_IMAGE] = "extension-image",
|
||||
[MOUNT_MQUEUEFS] = "mqueuefs",
|
||||
@ -467,7 +471,7 @@ static int append_mount_images(MountList *ml, const MountImage *mount_images, si
|
||||
static int append_extensions(
|
||||
MountList *ml,
|
||||
const char *root,
|
||||
const char *extension_dir,
|
||||
const char *private_namespace_dir,
|
||||
char **hierarchies,
|
||||
const MountImage *mount_images,
|
||||
size_t n_mount_images,
|
||||
@ -482,7 +486,7 @@ static int append_extensions(
|
||||
if (n_mount_images == 0 && strv_isempty(extension_directories))
|
||||
return 0;
|
||||
|
||||
assert(extension_dir);
|
||||
assert(private_namespace_dir);
|
||||
|
||||
n_overlays = strv_length(hierarchies);
|
||||
if (n_overlays == 0)
|
||||
@ -519,7 +523,7 @@ static int append_extensions(
|
||||
"No matching entry in .v/ directory %s found.",
|
||||
m->source);
|
||||
|
||||
if (asprintf(&mount_point, "%s/%zu", extension_dir, i) < 0)
|
||||
if (asprintf(&mount_point, "%s/unit-extensions/%zu", private_namespace_dir, i) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
for (size_t j = 0; hierarchies && hierarchies[j]; ++j) {
|
||||
@ -556,7 +560,7 @@ static int append_extensions(
|
||||
bool ignore_enoent = false;
|
||||
|
||||
/* Pick up the counter where the ExtensionImages left it. */
|
||||
if (asprintf(&mount_point, "%s/%zu", extension_dir, n_mount_images++) < 0)
|
||||
if (asprintf(&mount_point, "%s/unit-extensions/%zu", private_namespace_dir, n_mount_images++) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Look for any prefixes */
|
||||
@ -888,8 +892,12 @@ static void drop_outside_root(MountList *ml, const char *root_directory) {
|
||||
|
||||
for (f = ml->mounts, t = ml->mounts; f < ml->mounts + ml->n_mounts; f++) {
|
||||
|
||||
/* ExtensionImages/Directories bases are opened in /run/systemd/unit-extensions on the host */
|
||||
if (!IN_SET(f->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY) && !path_startswith(mount_entry_path(f), root_directory)) {
|
||||
/* ExtensionImages/Directories bases are opened in /run/[user/xyz/]systemd/unit-extensions
|
||||
* on the host, and a private (invisible to the guest) tmpfs instance is mounted on
|
||||
* /run/[user/xyz/]systemd/unit-private-tmp as the storage backend of private /tmp and
|
||||
* /var/tmp. */
|
||||
if (!IN_SET(f->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY, MOUNT_PRIVATE_TMPFS) &&
|
||||
!path_startswith(mount_entry_path(f), root_directory)) {
|
||||
log_debug("%s is outside of root directory.", mount_entry_path(f));
|
||||
mount_entry_done(f);
|
||||
continue;
|
||||
@ -1618,6 +1626,14 @@ static int apply_one_mount(
|
||||
* that bind mount source paths are always relative to the host root, hence we pass NULL as
|
||||
* root directory to chase() here. */
|
||||
|
||||
/* When we create implicit mounts, we might need to create the path ourselves as it is on a
|
||||
* just-created tmpfs, for example. */
|
||||
if (m->create_source_dir) {
|
||||
r = mkdir_p(mount_entry_source(m), m->source_dir_mode);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to create source directory %s: %m", mount_entry_source(m));
|
||||
}
|
||||
|
||||
r = chase(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased, NULL);
|
||||
if (r == -ENOENT && m->ignore) {
|
||||
log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m));
|
||||
@ -1637,6 +1653,7 @@ static int apply_one_mount(
|
||||
}
|
||||
|
||||
case MOUNT_EMPTY_DIR:
|
||||
case MOUNT_PRIVATE_TMPFS:
|
||||
case MOUNT_TMPFS:
|
||||
return mount_tmpfs(m);
|
||||
|
||||
@ -1743,7 +1760,7 @@ static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self
|
||||
* and running Linux <= 4.17. */
|
||||
submounts =
|
||||
mount_entry_read_only(m) &&
|
||||
!IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
|
||||
!IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
|
||||
if (submounts)
|
||||
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
|
||||
else
|
||||
@ -1783,7 +1800,7 @@ static int make_noexec(const MountEntry *m, char **deny_list, FILE *proc_self_mo
|
||||
if (flags_mask == 0) /* No Change? */
|
||||
return 0;
|
||||
|
||||
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
|
||||
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
|
||||
|
||||
if (submounts)
|
||||
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
|
||||
@ -1808,7 +1825,7 @@ static int make_nosuid(const MountEntry *m, FILE *proc_self_mountinfo) {
|
||||
if (m->state != MOUNT_APPLIED)
|
||||
return 0;
|
||||
|
||||
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
|
||||
submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
|
||||
if (submounts)
|
||||
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), MS_NOSUID, MS_NOSUID, NULL, proc_self_mountinfo);
|
||||
else
|
||||
@ -1959,8 +1976,11 @@ static int apply_mounts(
|
||||
if (m->state != MOUNT_PENDING)
|
||||
continue;
|
||||
|
||||
/* ExtensionImages/Directories are first opened in the propagate directory, not in the root_directory */
|
||||
r = follow_symlink(!IN_SET(m->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY) ? root : NULL, m);
|
||||
/* ExtensionImages/Directories are first opened in the propagate directory, not in
|
||||
* the root_directory. A private (invisible to the guest) tmpfs instance is mounted
|
||||
* on /run/[user/xyz/]systemd/unit-private-tmp as the storage backend of private
|
||||
* /tmp and /var/tmp. */
|
||||
r = follow_symlink(!IN_SET(m->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY, MOUNT_PRIVATE_TMPFS) ? root : NULL, m);
|
||||
if (r < 0) {
|
||||
mount_entry_path_debug_string(root, m, error_path);
|
||||
return r;
|
||||
@ -2245,39 +2265,88 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (p->tmp_dir) {
|
||||
bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);
|
||||
/* When DynamicUser=yes enforce that /tmp/ and /var/tmp/ are disconnected from the host's
|
||||
* directories, as they are world writable and ephemeral uid/gid will be used. */
|
||||
if (p->private_tmp == PRIVATE_TMP_DISCONNECTED) {
|
||||
_cleanup_free_ char *tmpfs_dir = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
|
||||
MountEntry *tmpfs_entry, *tmp_entry, *var_tmp_entry;
|
||||
|
||||
MountEntry *me = mount_list_extend(&ml);
|
||||
if (!me)
|
||||
tmpfs_dir = path_join(p->private_namespace_dir, "unit-private-tmp");
|
||||
tmp_dir = path_join(tmpfs_dir, "tmp");
|
||||
var_tmp_dir = path_join(tmpfs_dir, "var-tmp");
|
||||
if (!tmpfs_dir || !tmp_dir || !var_tmp_dir)
|
||||
return log_oom_debug();
|
||||
|
||||
*me = (MountEntry) {
|
||||
tmpfs_entry = mount_list_extend(&ml);
|
||||
if (!tmpfs_entry)
|
||||
return log_oom_debug();
|
||||
*tmpfs_entry = (MountEntry) {
|
||||
.path_malloc = TAKE_PTR(tmpfs_dir),
|
||||
.mode = MOUNT_PRIVATE_TMPFS,
|
||||
.options_const = "mode=0700" NESTED_TMPFS_LIMITS,
|
||||
.flags = MS_NODEV|MS_STRICTATIME,
|
||||
.has_prefix = true,
|
||||
};
|
||||
|
||||
tmp_entry = mount_list_extend(&ml);
|
||||
if (!tmp_entry)
|
||||
return log_oom_debug();
|
||||
*tmp_entry = (MountEntry) {
|
||||
.source_malloc = TAKE_PTR(tmp_dir),
|
||||
.path_const = "/tmp",
|
||||
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
|
||||
.source_const = p->tmp_dir,
|
||||
.mode = MOUNT_BIND,
|
||||
.source_dir_mode = 01777,
|
||||
.create_source_dir = true,
|
||||
};
|
||||
}
|
||||
|
||||
if (p->var_tmp_dir) {
|
||||
bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
|
||||
|
||||
MountEntry *me = mount_list_extend(&ml);
|
||||
if (!me)
|
||||
var_tmp_entry = mount_list_extend(&ml);
|
||||
if (!var_tmp_entry)
|
||||
return log_oom_debug();
|
||||
|
||||
*me = (MountEntry) {
|
||||
*var_tmp_entry = (MountEntry) {
|
||||
.source_malloc = TAKE_PTR(var_tmp_dir),
|
||||
.path_const = "/var/tmp",
|
||||
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
|
||||
.source_const = p->var_tmp_dir,
|
||||
.mode = MOUNT_BIND,
|
||||
.source_dir_mode = 01777,
|
||||
.create_source_dir = true,
|
||||
};
|
||||
|
||||
/* Ensure that the tmpfs is mounted first, and bind mounts are added later. */
|
||||
assert_cc(MOUNT_BIND < MOUNT_PRIVATE_TMPFS);
|
||||
} else {
|
||||
if (p->tmp_dir) {
|
||||
bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);
|
||||
|
||||
MountEntry *me = mount_list_extend(&ml);
|
||||
if (!me)
|
||||
return log_oom_debug();
|
||||
|
||||
*me = (MountEntry) {
|
||||
.path_const = "/tmp",
|
||||
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
|
||||
.source_const = p->tmp_dir,
|
||||
};
|
||||
}
|
||||
|
||||
if (p->var_tmp_dir) {
|
||||
bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
|
||||
|
||||
MountEntry *me = mount_list_extend(&ml);
|
||||
if (!me)
|
||||
return log_oom_debug();
|
||||
|
||||
*me = (MountEntry) {
|
||||
.path_const = "/var/tmp",
|
||||
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
|
||||
.source_const = p->var_tmp_dir,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
r = append_mount_images(&ml, p->mount_images, p->n_mount_images);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = append_extensions(&ml, root, p->extension_dir, hierarchies, p->extension_images, p->n_extension_images, p->extension_directories);
|
||||
r = append_extensions(&ml, root, p->private_namespace_dir, hierarchies, p->extension_images, p->n_extension_images, p->extension_directories);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -2530,10 +2599,12 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
|
||||
if (setup_propagate)
|
||||
(void) mkdir_p(p->propagate_dir, 0600);
|
||||
|
||||
if (p->n_extension_images > 0 || !strv_isempty(p->extension_directories))
|
||||
if (p->n_extension_images > 0 || !strv_isempty(p->extension_directories)) {
|
||||
/* ExtensionImages/Directories mountpoint directories will be created while parsing the
|
||||
* mounts to create, so have the parent ready */
|
||||
(void) mkdir_p(p->extension_dir, 0600);
|
||||
char *extension_dir = strjoina(p->private_namespace_dir, "/unit-extensions");
|
||||
(void) mkdir_p(extension_dir, 0600);
|
||||
}
|
||||
|
||||
/* Remount / as SLAVE so that nothing now mounted in the namespace
|
||||
* shows up in the parent */
|
||||
@ -3074,3 +3145,11 @@ static const char* const proc_subset_table[_PROC_SUBSET_MAX] = {
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(proc_subset, ProcSubset);
|
||||
|
||||
static const char* const private_tmp_table[_PRIVATE_TMP_MAX] = {
|
||||
[PRIVATE_TMP_OFF] = "off",
|
||||
[PRIVATE_TMP_CONNECTED] = "connected",
|
||||
[PRIVATE_TMP_DISCONNECTED] = "disconnected",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(private_tmp, PrivateTmp);
|
||||
|
@ -53,6 +53,14 @@ typedef enum ProcSubset {
|
||||
_PROC_SUBSET_INVALID = -EINVAL,
|
||||
} ProcSubset;
|
||||
|
||||
typedef enum PrivateTmp {
|
||||
PRIVATE_TMP_OFF,
|
||||
PRIVATE_TMP_CONNECTED, /* Bind mounted from the host's filesystem */
|
||||
PRIVATE_TMP_DISCONNECTED, /* A completely private tmpfs, invisible from the host */
|
||||
_PRIVATE_TMP_MAX,
|
||||
_PRIVATE_TMP_INVALID = -EINVAL,
|
||||
} PrivateTmp;
|
||||
|
||||
struct BindMount {
|
||||
char *source;
|
||||
char *destination;
|
||||
@ -127,7 +135,7 @@ struct NamespaceParameters {
|
||||
const char *propagate_dir;
|
||||
const char *incoming_dir;
|
||||
|
||||
const char *extension_dir;
|
||||
const char *private_namespace_dir;
|
||||
const char *notify_socket;
|
||||
const char *host_os_release_stage;
|
||||
|
||||
@ -150,6 +158,7 @@ struct NamespaceParameters {
|
||||
ProtectSystem protect_system;
|
||||
ProtectProc protect_proc;
|
||||
ProcSubset proc_subset;
|
||||
PrivateTmp private_tmp;
|
||||
};
|
||||
|
||||
int setup_namespace(const NamespaceParameters *p, char **error_path);
|
||||
@ -184,6 +193,9 @@ ProtectProc protect_proc_from_string(const char *s) _pure_;
|
||||
const char* proc_subset_to_string(ProcSubset i) _const_;
|
||||
ProcSubset proc_subset_from_string(const char *s) _pure_;
|
||||
|
||||
const char* private_tmp_to_string(PrivateTmp i) _const_;
|
||||
PrivateTmp private_tmp_from_string(const char *s) _pure_;
|
||||
|
||||
void bind_mount_free_many(BindMount *b, size_t n);
|
||||
int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);
|
||||
|
||||
|
@ -1275,7 +1275,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
|
||||
return r;
|
||||
}
|
||||
|
||||
if (c->private_tmp) {
|
||||
if (c->private_tmp == PRIVATE_TMP_CONNECTED) {
|
||||
r = unit_add_mounts_for(u, "/tmp", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -4287,7 +4287,11 @@ int unit_patch_contexts(Unit *u) {
|
||||
* UID/GID around in the file system or on IPC objects. Hence enforce a strict
|
||||
* sandbox. */
|
||||
|
||||
ec->private_tmp = true;
|
||||
/* With DynamicUser= we want private directories, so if the user hasn't manually
|
||||
* selected PrivateTmp=, enable it, but to a fully private (disconnected) tmpfs
|
||||
* instance. */
|
||||
if (ec->private_tmp == PRIVATE_TMP_OFF)
|
||||
ec->private_tmp = PRIVATE_TMP_DISCONNECTED;
|
||||
ec->remove_ipc = true;
|
||||
ec->protect_system = PROTECT_SYSTEM_STRICT;
|
||||
if (ec->protect_home == PROTECT_HOME_NO)
|
||||
|
@ -340,6 +340,19 @@ if [[ ! -v ASAN_OPTIONS ]] && systemctl --version | grep "+BPF_FRAMEWORK" && ker
|
||||
(! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /sys)
|
||||
fi
|
||||
|
||||
if [[ ! -v ASAN_OPTIONS ]]; then
|
||||
# Ensure DynamicUser=yes does not imply PrivateTmp=yes if TemporaryFileSystem=/tmp /var/tmp is set
|
||||
systemd-run --unit test-07-dynamic-user-tmp.service \
|
||||
--service-type=notify \
|
||||
-p DynamicUser=yes \
|
||||
-p NotifyAccess=all \
|
||||
sh -c 'touch /tmp/a && touch /var/tmp/b && ! test -f /tmp/b && ! test -f /var/tmp/a && systemd-notify --ready && sleep infinity'
|
||||
(! ls /tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
|
||||
(! ls /var/tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
|
||||
systemctl is-active test-07-dynamic-user-tmp.service
|
||||
systemctl stop test-07-dynamic-user-tmp.service
|
||||
fi
|
||||
|
||||
# Make sure we properly (de)serialize various string arrays, including whitespaces
|
||||
# See: https://github.com/systemd/systemd/issues/31214
|
||||
systemd-run --wait --pipe -p Environment="FOO='bar4 '" \
|
||||
|
Loading…
x
Reference in New Issue
Block a user