1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-10 05:18:17 +03:00

core: Bind mount notify socket to /run/host/notify in sandboxed units (#35573)

To be able to run systemd in a Type=notify transient unit, the notify
socket can't be bind mounted to /run/systemd/notify as systemd in the
transient unit wants to use that as its own notify socket which
conflicts with systemd on the host.

Instead, for sandboxed units, let's bind mount the notify socket to
/run/host/notify as documented in the container interface. Since we
don't guarantee a stable location for the notify socket and insist users
use $NOTIFY_SOCKET to get its path, this is safe to do.
This commit is contained in:
Daan De Meyer 2024-12-13 13:48:07 +00:00 committed by GitHub
commit 18bb30c3b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 48 additions and 11 deletions

View File

@ -1796,6 +1796,7 @@ static int build_environment(
dev_t journal_stream_dev,
ino_t journal_stream_ino,
const char *memory_pressure_path,
bool needs_sandboxing,
char ***ret) {
_cleanup_strv_free_ char **our_env = NULL;
@ -1807,7 +1808,7 @@ static int build_environment(
assert(p);
assert(ret);
#define N_ENV_VARS 19
#define N_ENV_VARS 20
our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
if (!our_env)
return -ENOMEM;
@ -2044,6 +2045,14 @@ static int build_environment(
}
}
if (p->notify_socket) {
x = strjoin("NOTIFY_SOCKET=", exec_get_private_notify_socket_path(c, p, needs_sandboxing) ?: p->notify_socket);
if (!x)
return -ENOMEM;
our_env[n_env++] = x;
}
assert(n_env < N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
#undef N_ENV_VARS
@ -3437,7 +3446,8 @@ static int apply_mount_namespace(
.propagate_dir = propagate_dir,
.incoming_dir = incoming_dir,
.private_namespace_dir = private_namespace_dir,
.notify_socket = root_dir || root_image ? params->notify_socket : NULL,
.host_notify_socket = params->notify_socket,
.notify_socket_path = exec_get_private_notify_socket_path(context, params, needs_sandboxing),
.host_os_release_stage = host_os_release_stage,
/* If DynamicUser=no and RootDirectory= is set then lets pass a relaxed sandbox info,
@ -4874,6 +4884,7 @@ int exec_invoke(
journal_stream_dev,
journal_stream_ino,
memory_pressure_path,
needs_sandboxing,
&our_env);
if (r < 0) {
*exit_status = EXIT_MEMORY;

View File

@ -346,6 +346,28 @@ bool exec_needs_mount_namespace(
return false;
}
const char* exec_get_private_notify_socket_path(const ExecContext *context, const ExecParameters *params, bool needs_sandboxing) {
assert(context);
assert(params);
if (!params->notify_socket)
return NULL;
if (!needs_sandboxing)
return NULL;
if (!context->root_directory && !context->root_image)
return NULL;
if (!exec_context_get_effective_mount_apivfs(context))
return NULL;
if (!FLAGS_SET(params->flags, EXEC_APPLY_CHROOT))
return NULL;
return "/run/host/notify";
}
bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
assert(context);

View File

@ -632,6 +632,7 @@ ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context,
bool exec_needs_cgroup_namespace(const ExecContext *context, const ExecParameters *params);
bool exec_needs_cgroup_mount(const ExecContext *context, const ExecParameters *params);
bool exec_is_cgroup_mount_read_only(const ExecContext *context, const ExecParameters *params);
const char* exec_get_private_notify_socket_path(const ExecContext *context, const ExecParameters *params, bool needs_sandboxing);
/* These logging macros do the same logging as those in unit.h, but using ExecContext and ExecParameters
* instead of the unit object, so that it can be used in the sd-executor context (where the unit object is

View File

@ -2762,14 +2762,14 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) {
};
}
if (p->notify_socket) {
if (p->notify_socket_path) {
MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();
*me = (MountEntry) {
.path_const = p->notify_socket,
.source_const = p->notify_socket,
.path_const = p->notify_socket_path,
.source_const = p->host_notify_socket,
.mode = MOUNT_BIND,
.read_only = true,
};

View File

@ -174,7 +174,8 @@ struct NamespaceParameters {
const char *incoming_dir;
const char *private_namespace_dir;
const char *notify_socket;
const char *host_notify_socket;
const char *notify_socket_path;
const char *host_os_release_stage;
bool ignore_protect_paths;

View File

@ -1769,14 +1769,11 @@ static int service_spawn_internal(
if (r < 0)
return r;
our_env = new0(char*, 14);
our_env = new0(char*, 13);
if (!our_env)
return -ENOMEM;
if (service_exec_needs_notify_socket(s, exec_params.flags)) {
if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0)
return -ENOMEM;
exec_params.notify_socket = UNIT(s)->manager->notify_socket;
if (s->n_fd_store_max > 0)

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
# ex: ts=8 sw=4 sts=4 et filetype=sh
# shellcheck disable=SC2233,SC2235
# shellcheck disable=SC2233,SC2235,SC2016
set -eux
set -o pipefail
@ -79,6 +79,11 @@ systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" mountpoint /run/systemd/jou
(! systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p BindLogSockets=no ls /run/systemd/journal/socket)
(! systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p MountAPIVFS=no ls /run/systemd/journal/socket)
# Test that the notify socket is bind mounted to /run/host/notify in sandboxed environments and
# $NOTIFY_SOCKET is set correctly.
systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p NotifyAccess=all --service-type=notify --pipe sh -c 'echo READY=1 | ncat --unixsock --udp $NOTIFY_SOCKET --source /run/notify && ls /run/host/notify'
systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p NotifyAccess=all --service-type=notify --pipe sh -c 'echo READY=1 | ncat --unixsock --udp $NOTIFY_SOCKET --source /run/notify && env' | grep NOTIFY_SOCKET=/run/host/notify
systemd-run -P -p RootImage="$MINIMAL_IMAGE.raw" cat /usr/lib/os-release | grep -q -F "MARKER=1"
mv "$MINIMAL_IMAGE.verity" "$MINIMAL_IMAGE.fooverity"
mv "$MINIMAL_IMAGE.roothash" "$MINIMAL_IMAGE.foohash"