1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-09 01:18:19 +03:00

core: add systemd-executor binary

Currently we spawn services by forking a child process, doing a bunch
of work, and then exec'ing the service executable.

There are some advantages to this approach:

- quick: we immediately have access to all the enourmous amount of
  state simply by virtue of sharing the memory with the parent
- easy to refactor and add features
- part of the same binary, will never be out of sync

There are however significant drawbacks:

- doing work after fork and before exec is against glibc's supported
  case for several APIs we call
- copy-on-write trap: anytime any memory is touched in either parent
  or child, a copy of that page will be triggered
- memory footprint of the child process will be memory footprint of
  PID1, but using the cgroup memory limits of the unit

The last issue is especially problematic on resource constrained
systems where hard memory caps are enforced and swap is not allowed.
As soon as PID1 is under load, with no page out due to no swap, and a
service with a low MemoryMax= tries to start, hilarity ensues.

Add a new systemd-executor binary, that is able to receive all the
required state via memfd, deserialize it, prepare the appropriate
data structures and call exec_child.

Use posix_spawn which uses CLONE_VM + CLONE_VFORK, to ensure there is
no copy-on-write (same address space will be used, and parent process
will be frozen, until exec).
The sd-executor binary is pinned by FD on startup, so that we can
guarantee there will be no incompatibilities during upgrades.
This commit is contained in:
Luca Boccassi 2023-06-01 19:51:42 +01:00
parent 56df7a461f
commit bb5232b6a3
15 changed files with 497 additions and 35 deletions

View File

@ -201,3 +201,25 @@ can be found under various directories such as `factory/`, `modprobe.d/`, `netwo
`tools/`, `coccinelle/`, `.github/`, `.semaphore/`, `.mkosi/` host various
utilities and scripts that are used by maintainers and developers. They are not
shipped or installed.
# Service Manager Overview
The Service Manager takes configuration in the form of unit files, credentials,
kernel command line options and D-Bus commands, and based on those manages the
system and spawns other processes. It runs in system mode as PID1, and in user
mode with one instance per user session.
When starting a unit requires forking a new process, configuration for the new
process will be serialized and passed over to the new process, created via a
posix_spawn() call. This is done in order to avoid excessive processing after
a fork() but before an exec(), which is against glibc's best practices and can
also result in a copy-on-write trap. The new process will start as the
`systemd-executor` binary, which will deserialize the configuration and apply
all the options (sandboxing, namespacing, cgroup, etc.) before exec'ing the
configured executable.
```
┌──────┐posix_spawn() ┌───────────┐execve() ┌────────┐
│ PID1 ├─────────────►│sd-executor├────────►│program │
└──────┘ (memfd) └───────────┘ └────────┘
```

View File

@ -225,6 +225,7 @@ conf.set_quoted('SYSCONF_DIR', sysconfdir)
conf.set_quoted('SYSCTL_DIR', sysctldir)
conf.set_quoted('SYSTEMCTL_BINARY_PATH', bindir / 'systemctl')
conf.set_quoted('SYSTEMD_BINARY_PATH', libexecdir / 'systemd')
conf.set_quoted('SYSTEMD_EXECUTOR_BINARY_PATH', libexecdir / 'systemd-executor')
conf.set_quoted('SYSTEMD_CATALOG_DIR', catalogdir)
conf.set_quoted('SYSTEMD_CGROUPS_AGENT_PATH', libexecdir / 'systemd-cgroups-agent')
conf.set_quoted('SYSTEMD_CRYPTSETUP_PATH', bindir / 'systemd-cryptsetup')

View File

@ -36,7 +36,7 @@ typedef enum CGroupController {
CGROUP_CONTROLLER_BPF_SOCKET_BIND,
CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
/* The BPF hook implementing RestrictFileSystems= is not defined here.
* It's applied as late as possible in exec_child() so we don't block
* It's applied as late as possible in exec_invoke() so we don't block
* our own unit setup code. */
_CGROUP_CONTROLLER_MAX,

View File

@ -28,7 +28,7 @@
DEFINE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
static DynamicUser* dynamic_user_free(DynamicUser *d) {
DynamicUser* dynamic_user_free(DynamicUser *d) {
if (!d)
return NULL;
@ -850,3 +850,12 @@ DynamicCreds* dynamic_creds_destroy(DynamicCreds *creds) {
return mfree(creds);
}
void dynamic_creds_done(DynamicCreds *creds) {
if (!creds)
return;
if (creds->group != creds->user)
dynamic_user_free(creds->group);
creds->group = creds->user = dynamic_user_free(creds->user);
}

View File

@ -28,6 +28,7 @@ struct DynamicUser {
int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
int dynamic_user_serialize_one(DynamicUser *d, const char *key, FILE *f, FDSet *fds);
void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds, DynamicUser **ret);
DynamicUser* dynamic_user_free(DynamicUser *d);
void dynamic_user_vacuum(Manager *m, bool close_user);
int dynamic_user_current(DynamicUser *d, uid_t *ret);
@ -39,6 +40,7 @@ int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *ui
DynamicCreds *dynamic_creds_unref(DynamicCreds *creds);
DynamicCreds *dynamic_creds_destroy(DynamicCreds *creds);
void dynamic_creds_done(DynamicCreds *creds);
DEFINE_TRIVIAL_CLEANUP_FUNC(DynamicCreds*, dynamic_creds_unref);
DEFINE_TRIVIAL_CLEANUP_FUNC(DynamicCreds*, dynamic_creds_destroy);

View File

@ -39,6 +39,7 @@
#include "argv-util.h"
#include "async.h"
#include "barrier.h"
#include "bpf-dlopen.h"
#include "bpf-lsm.h"
#include "btrfs-util.h"
#include "cap-list.h"
@ -56,6 +57,7 @@
#include "escape.h"
#include "exec-credential.h"
#include "execute.h"
#include "execute-serialize.h"
#include "exit-status.h"
#include "fd-util.h"
#include "fileio.h"
@ -85,6 +87,7 @@
#include "seccomp-util.h"
#include "securebits-util.h"
#include "selinux-util.h"
#include "serialize.h"
#include "signal-util.h"
#include "smack-util.h"
#include "socket-util.h"
@ -1789,6 +1792,8 @@ static int apply_lock_personality(const ExecContext *c, const ExecParameters *p)
#if HAVE_LIBBPF
static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters *p) {
int r;
assert(c);
assert(p);
@ -1801,6 +1806,11 @@ static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters
return 0;
}
/* We are in a new binary, so dl-open again */
r = dlopen_bpf();
if (r < 0)
return r;
return lsm_bpf_restrict_filesystems(c->restrict_filesystems, p->cgroup_id, p->bpf_outer_map_fd, c->restrict_filesystems_allow_list);
}
#endif
@ -4062,7 +4072,7 @@ static bool exec_context_shall_confirm_spawn(const ExecContext *context) {
static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
static int exec_child(
int exec_invoke(
const ExecCommand *command,
const ExecContext *context,
ExecParameters *params,
@ -4117,6 +4127,8 @@ static int exec_child(
assert(command->path);
assert(!strv_isempty(command->argv));
LOG_CONTEXT_PUSH_EXEC(context, params);
if (context->std_input == EXEC_INPUT_SOCKET ||
context->std_output == EXEC_OUTPUT_SOCKET ||
context->std_error == EXEC_OUTPUT_SOCKET) {
@ -5283,7 +5295,6 @@ static int exec_child(
return log_exec_error_errno(context, params, r, "Failed to execute %s: %m", executable);
}
int exec_spawn(Unit *unit,
ExecCommand *command,
const ExecContext *context,
@ -5292,12 +5303,16 @@ int exec_spawn(Unit *unit,
const CGroupContext *cgroup_context,
pid_t *ret) {
_cleanup_free_ char *subcgroup_path = NULL;
char serialization_fd_number[DECIMAL_STR_MAX(int) + 1];
_cleanup_free_ char *subcgroup_path = NULL, *log_level = NULL, *executor_path = NULL;
_cleanup_fdset_free_ FDSet *fdset = NULL;
_cleanup_fclose_ FILE *f = NULL;
pid_t pid;
int r;
assert(unit);
assert(unit->manager);
assert(unit->manager->executor_fd >= 0);
assert(command);
assert(context);
assert(ret);
@ -5333,35 +5348,56 @@ int exec_spawn(Unit *unit,
}
}
pid = fork();
if (pid < 0)
return log_unit_error_errno(unit, errno, "Failed to fork: %m");
/* In order to avoid copy-on-write traps and OOM-kills when pid1's memory.current is above the
* child's memory.max, serialize all the state needed to start the unit, and pass it to the
* systemd-executor binary. clone() with CLONE_VM + CLONE_VFORK will pause the parent until the exec
* and ensure all memory is shared. The child immediately execs the new binary so the delay should
* be minimal. Once glibc provides a clone3 wrapper we can switch to that, and clone directly in the
* target cgroup. */
if (pid == 0) {
int exit_status;
r = open_serialization_file("sd-executor-state", &f);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to open serialization stream: %m");
r = exec_child(command,
context,
params,
runtime,
cgroup_context,
&exit_status);
fdset = fdset_new();
if (!fdset)
return log_oom();
if (r < 0) {
const char *status = ASSERT_PTR(
exit_status_to_string(exit_status, EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD));
r = exec_serialize_invocation(f, fdset, context, command, params, runtime, cgroup_context);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to serialize parameters: %m");
log_unit_struct_errno(unit, LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
status, command->path),
"EXECUTABLE=%s", command->path);
} else
assert(exit_status == EXIT_SUCCESS);
if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
return log_unit_error_errno(unit, errno, "Failed to reseek on serialization stream: %m");
_exit(exit_status);
}
r = fd_cloexec(fileno(f), false);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to set O_CLOEXEC on serialization fd: %m");
r = fdset_cloexec(fdset, false);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to set O_CLOEXEC on serialized fds: %m");
r = log_level_to_string_alloc(log_get_max_level(), &log_level);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to convert log level to string: %m");
r = fd_get_path(unit->manager->executor_fd, &executor_path);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to get executor path from fd: %m");
xsprintf(serialization_fd_number, "%i", fileno(f));
/* The executor binary is pinned, to avoid compatibility problems during upgrades. */
r = posix_spawn_wrapper(FORMAT_PROC_FD_PATH(unit->manager->executor_fd),
STRV_MAKE(executor_path,
"--deserialize", serialization_fd_number,
"--log-level", log_level,
"--log-target", log_target_to_string(manager_get_executor_log_target(unit->manager))),
environ,
&pid);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to spawn executor: %m");
log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
@ -5563,7 +5599,7 @@ int exec_context_destroy_mount_ns_dir(Unit *u) {
return 0;
}
static void exec_command_done(ExecCommand *c) {
void exec_command_done(ExecCommand *c) {
assert(c);
c->path = mfree(c->path);
@ -6679,9 +6715,9 @@ static char *destroy_tree(char *path) {
return mfree(path);
}
static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
void exec_shared_runtime_done(ExecSharedRuntime *rt) {
if (!rt)
return NULL;
return;
if (rt->manager)
(void) hashmap_remove(rt->manager->exec_shared_runtime_by_id, rt->id);
@ -6691,6 +6727,11 @@ static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
rt->var_tmp_dir = mfree(rt->var_tmp_dir);
safe_close_pair(rt->netns_storage_socket);
safe_close_pair(rt->ipcns_storage_socket);
}
static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
exec_shared_runtime_done(rt);
return mfree(rt);
}
@ -7216,6 +7257,14 @@ ExecRuntime* exec_runtime_destroy(ExecRuntime *rt) {
return exec_runtime_free(rt);
}
void exec_runtime_clear(ExecRuntime *rt) {
if (!rt)
return;
safe_close_pair(rt->ephemeral_storage_socket);
rt->ephemeral_copy = mfree(rt->ephemeral_copy);
}
void exec_params_clear(ExecParameters *p) {
if (!p)
return;
@ -7230,6 +7279,37 @@ void exec_params_clear(ExecParameters *p) {
p->unit_id = mfree(p->unit_id);
p->invocation_id = SD_ID128_NULL;
p->invocation_id_string[0] = '\0';
p->confirm_spawn = mfree(p->confirm_spawn);
}
void exec_params_serialized_done(ExecParameters *p) {
if (!p)
return;
for (size_t i = 0; p->fds && i < p->n_socket_fds + p->n_storage_fds; i++)
p->fds[i] = safe_close(p->fds[i]);
p->cgroup_path = mfree(p->cgroup_path);
p->prefix = strv_free(p->prefix);
p->received_credentials_directory = mfree(p->received_credentials_directory);
p->received_encrypted_credentials_directory = mfree(p->received_encrypted_credentials_directory);
for (size_t i = 0; p->idle_pipe && i < 4; i++)
p->idle_pipe[i] = safe_close(p->idle_pipe[i]);
p->idle_pipe = mfree(p->idle_pipe);
p->stdin_fd = safe_close(p->stdin_fd);
p->stdout_fd = safe_close(p->stdout_fd);
p->stderr_fd = safe_close(p->stderr_fd);
p->notify_socket = mfree(p->notify_socket);
open_file_free_many(&p->open_files);
p->fallback_smack_process_label = mfree(p->fallback_smack_process_label);
exec_params_clear(p);
}
void exec_directory_done(ExecDirectory *d) {

View File

@ -471,6 +471,13 @@ struct ExecParameters {
#include "unit.h"
#include "dynamic-user.h"
int exec_invoke(const ExecCommand *command,
const ExecContext *context,
ExecParameters *params,
ExecRuntime *runtime,
const CGroupContext *cgroup_context,
int *exit_status);
int exec_spawn(Unit *unit,
ExecCommand *command,
const ExecContext *context,
@ -479,6 +486,7 @@ int exec_spawn(Unit *unit,
const CGroupContext *cgroup_context,
pid_t *ret);
void exec_command_done(ExecCommand *c);
void exec_command_done_array(ExecCommand *c, size_t n);
ExecCommand* exec_command_free_list(ExecCommand *c);
void exec_command_free_array(ExecCommand **c, size_t n);
@ -524,15 +532,18 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSharedRuntime*, exec_shared_runtime_unref);
int exec_shared_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
int exec_shared_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
void exec_shared_runtime_done(ExecSharedRuntime *rt);
void exec_shared_runtime_vacuum(Manager *m);
int exec_runtime_make(const Unit *unit, const ExecContext *context, ExecSharedRuntime *shared, DynamicCreds *creds, ExecRuntime **ret);
ExecRuntime* exec_runtime_free(ExecRuntime *rt);
DEFINE_TRIVIAL_CLEANUP_FUNC(ExecRuntime*, exec_runtime_free);
ExecRuntime* exec_runtime_destroy(ExecRuntime *rt);
void exec_runtime_clear(ExecRuntime *rt);
void exec_params_clear(ExecParameters *p);
void exec_params_dump(const ExecParameters *p, FILE* f, const char *prefix);
void exec_params_serialized_done(ExecParameters *p);
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);

251
src/core/executor.c Normal file
View File

@ -0,0 +1,251 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <getopt.h>
#include <unistd.h>
#include "sd-messages.h"
#include "alloc-util.h"
#include "build.h"
#include "execute-serialize.h"
#include "execute.h"
#include "exit-status.h"
#include "fdset.h"
#include "fd-util.h"
#include "fileio.h"
#include "getopt-defs.h"
#include "parse-util.h"
#include "pretty-print.h"
#include "static-destruct.h"
static FILE* arg_serialization = NULL;
STATIC_DESTRUCTOR_REGISTER(arg_serialization, fclosep);
static int help(void) {
_cleanup_free_ char *link = NULL;
int r;
r = terminal_urlify_man("systemd", "1", &link);
if (r < 0)
return log_oom();
printf("%s [OPTIONS...]\n\n"
"%sSandbox and execute processes.%s\n\n"
" -h --help Show this help and exit\n"
" --version Print version string and exit\n"
" --log-target=TARGET Set log target (console, journal,\n"
" journal-or-kmsg,\n"
" kmsg, null)\n"
" --log-level=LEVEL Set log level (debug, info, notice,\n"
" warning, err, crit,\n"
" alert, emerg)\n"
" --log-color=BOOL Highlight important messages\n"
" --log-location=BOOL Include code location in messages\n"
" --log-time=BOOL Prefix messages with current time\n"
" --deserialize=FD Deserialize process config from FD\n"
"\nSee the %s for details.\n",
program_invocation_short_name,
ansi_highlight(),
ansi_normal(),
link);
return 0;
}
static int parse_argv(int argc, char *argv[]) {
enum {
COMMON_GETOPT_ARGS,
ARG_VERSION,
ARG_DESERIALIZE,
};
static const struct option options[] = {
{ "log-level", required_argument, NULL, ARG_LOG_LEVEL },
{ "log-target", required_argument, NULL, ARG_LOG_TARGET },
{ "log-color", required_argument, NULL, ARG_LOG_COLOR },
{ "log-location", required_argument, NULL, ARG_LOG_LOCATION },
{ "log-time", required_argument, NULL, ARG_LOG_TIME },
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "deserialize", required_argument, NULL, ARG_DESERIALIZE },
{}
};
int c, r;
assert(argc >= 0);
assert(argv);
while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
switch (c) {
case 'h':
return help();
case ARG_VERSION:
return version();
case ARG_LOG_LEVEL:
r = log_set_max_level_from_string(optarg);
if (r < 0)
return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
break;
case ARG_LOG_TARGET:
r = log_set_target_from_string(optarg);
if (r < 0)
return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
break;
case ARG_LOG_COLOR:
r = log_show_color_from_string(optarg);
if (r < 0)
return log_error_errno(
r,
"Failed to parse log color setting \"%s\": %m",
optarg);
break;
case ARG_LOG_LOCATION:
r = log_show_location_from_string(optarg);
if (r < 0)
return log_error_errno(
r,
"Failed to parse log location setting \"%s\": %m",
optarg);
break;
case ARG_LOG_TIME:
r = log_show_time_from_string(optarg);
if (r < 0)
return log_error_errno(
r,
"Failed to parse log time setting \"%s\": %m",
optarg);
break;
case ARG_DESERIALIZE: {
FILE *f;
int fd;
fd = parse_fd(optarg);
if (fd < 0)
return log_error_errno(
fd,
"Failed to parse serialization fd \"%s\": %m",
optarg);
r = fd_cloexec(fd, /* cloexec= */ true);
if (r < 0)
return log_error_errno(
r,
"Failed to set serialization fd \"%s\" to close-on-exec: %m",
optarg);
f = fdopen(fd, "r");
if (!f)
return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
safe_fclose(arg_serialization);
arg_serialization = f;
break;
}
case '?':
return -EINVAL;
default:
assert_not_reached();
}
if (!arg_serialization)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"No serialization fd specified.");
return 1 /* work to do */;
}
int main(int argc, char *argv[]) {
_cleanup_fdset_free_ FDSet *fdset = NULL;
int exit_status = EXIT_SUCCESS, r;
_cleanup_(cgroup_context_done) CGroupContext cgroup_context = {};
_cleanup_(exec_context_done) ExecContext context = {};
_cleanup_(exec_command_done) ExecCommand command = {};
_cleanup_(exec_params_serialized_done) ExecParameters params = EXEC_PARAMETERS_INIT(/* flags= */ 0);
_cleanup_(exec_shared_runtime_done) ExecSharedRuntime shared = {
.netns_storage_socket = PIPE_EBADF,
.ipcns_storage_socket = PIPE_EBADF,
};
_cleanup_(dynamic_creds_done) DynamicCreds dynamic_creds = {};
_cleanup_(exec_runtime_clear) ExecRuntime runtime = {
.ephemeral_storage_socket = PIPE_EBADF,
.shared = &shared,
.dynamic_creds = &dynamic_creds,
};
exec_context_init(&context);
cgroup_context_init(&cgroup_context);
/* We might be starting the journal itself, we'll be told by the caller what to do */
log_set_always_reopen_console(true);
log_set_prohibit_ipc(true);
log_setup();
r = fdset_new_fill(/* filter_cloexec= */ 0, &fdset);
if (r < 0)
return log_error_errno(r, "Failed to create fd set: %m");
r = parse_argv(argc, argv);
if (r <= 0)
return r;
/* Now try again if we were told it's fine to use a different target */
if (log_get_target() != LOG_TARGET_KMSG) {
log_set_prohibit_ipc(false);
log_open();
}
r = fdset_remove(fdset, fileno(arg_serialization));
if (r < 0)
return log_error_errno(r, "Failed to remove serialization fd from fd set: %m");
r = exec_deserialize_invocation(arg_serialization,
fdset,
&context,
&command,
&params,
&runtime,
&cgroup_context);
if (r < 0)
return log_error_errno(r, "Failed to deserialize: %m");
arg_serialization = safe_fclose(arg_serialization);
fdset = fdset_free(fdset);
r = exec_invoke(&command,
&context,
&params,
&runtime,
&cgroup_context,
&exit_status);
if (r < 0) {
const char *status = ASSERT_PTR(
exit_status_to_string(exit_status, EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD));
log_exec_struct_errno(&context, &params, LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_EXEC_INVOCATION_ID(&params),
LOG_EXEC_MESSAGE(&params, "Failed at step %s spawning %s: %m",
status, command.path),
"EXECUTABLE=%s", command.path);
} else
assert(exit_status == EXIT_SUCCESS); /* When 'skip' is chosen in the confirm spawn prompt */
return exit_status;
}

View File

@ -24,7 +24,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
log_set_target(LOG_TARGET_NULL);
}
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL|MANAGER_TEST_DONT_OPEN_EXECUTOR, &m) >= 0);
/* Set log overrides as well to make it harder for a serialization file
* to switch log levels/targets during fuzzing */
manager_override_log_level(m, log_get_max_level());

View File

@ -65,7 +65,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (!getenv("SYSTEMD_LOG_LEVEL"))
log_set_max_level(LOG_CRIT);
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL|MANAGER_TEST_DONT_OPEN_EXECUTOR, &m) >= 0);
name = strjoina("a.", unit_type_to_string(t));
assert_se(unit_new_for_name(m, unit_vtable[t]->object_size, name, &u) >= 0);

View File

@ -921,6 +921,8 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
.interval = 10 * USEC_PER_MINUTE,
.burst = 10,
},
.executor_fd = -EBADF,
};
unit_defaults_init(&m->defaults, runtime_scope);
@ -1039,6 +1041,42 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
if (r < 0 && r != -EEXIST)
return r;
m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
if (m->executor_fd < 0)
return log_warning_errno(errno,
"Failed to open executor binary '%s': %m",
SYSTEMD_EXECUTOR_BINARY_PATH);
} else if (!FLAGS_SET(test_run_flags, MANAGER_TEST_DONT_OPEN_EXECUTOR)) {
_cleanup_free_ char *self_exe = NULL, *executor_path = NULL;
_cleanup_close_ int self_dir_fd = -EBADF;
int level = LOG_DEBUG;
/* Prefer sd-executor from the same directory as the test, e.g.: when running unit tests from the
* build directory. Fallback to working directory and then the installation path. */
r = readlink_and_make_absolute("/proc/self/exe", &self_exe);
if (r < 0)
return r;
self_dir_fd = open_parent(self_exe, O_CLOEXEC|O_DIRECTORY, 0);
if (self_dir_fd < 0)
return -errno;
m->executor_fd = openat(self_dir_fd, "systemd-executor", O_CLOEXEC|O_PATH);
if (m->executor_fd < 0 && errno == ENOENT)
m->executor_fd = openat(AT_FDCWD, "systemd-executor", O_CLOEXEC|O_PATH);
if (m->executor_fd < 0 && errno == ENOENT) {
m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
level = LOG_WARNING; /* Tests should normally use local builds */
}
if (m->executor_fd < 0)
return -errno;
r = fd_get_path(m->executor_fd, &executor_path);
if (r < 0)
return r;
log_full(level, "Using systemd-executor binary from '%s'", executor_path);
}
/* Note that we do not set up the notify fd here. We do that after deserialization,
@ -1701,6 +1739,8 @@ Manager* manager_free(Manager *m) {
lsm_bpf_destroy(m->restrict_fs);
#endif
safe_close(m->executor_fd);
return mfree(m);
}
@ -4956,6 +4996,17 @@ void unit_defaults_done(UnitDefaults *defaults) {
rlimit_free_all(defaults->rlimit);
}
LogTarget manager_get_executor_log_target(Manager *m) {
assert(m);
/* If journald is not available tell sd-executor to go to kmsg, as it might be starting journald */
if (manager_journal_is_running(m))
return log_get_target();
return LOG_TARGET_KMSG;
}
static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
[MANAGER_INITIALIZING] = "initializing",
[MANAGER_STARTING] = "starting",

View File

@ -145,6 +145,7 @@ typedef enum ManagerTestRunFlags {
MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
MANAGER_TEST_RUN_IGNORE_DEPENDENCIES = 1 << 4, /* run while ignoring dependencies */
MANAGER_TEST_DONT_OPEN_EXECUTOR = 1 << 5, /* avoid trying to load sd-executor */
MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
} ManagerTestRunFlags;
@ -496,6 +497,10 @@ struct Manager {
/* For NFTSet= */
FirewallContext *fw_ctx;
/* Pin the systemd-executor binary, so that it never changes until re-exec, ensuring we don't have
* serialization/deserialization compatibility issues during upgrades. */
int executor_fd;
};
static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
@ -628,6 +633,8 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor);
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor);
LogTarget manager_get_executor_log_target(Manager *m);
const char* oom_policy_to_string(OOMPolicy i) _const_;
OOMPolicy oom_policy_from_string(const char *s) _pure_;

View File

@ -144,6 +144,10 @@ systemd_sources = files(
'crash-handler.c',
)
systemd_executor_sources = files(
'executor.c',
)
executables += [
libexec_template + {
'name' : 'systemd',
@ -156,6 +160,17 @@ executables += [
],
'dependencies' : libseccomp,
},
libexec_template + {
'name' : 'systemd-executor',
'public' : true,
'sources' : systemd_executor_sources,
'include_directories' : core_includes,
'link_with' : [
libcore,
libshared,
],
'dependencies' : libseccomp,
},
fuzz_template + {
'sources' : files('fuzz-unit-file.c'),
'link_with' : [

View File

@ -5357,6 +5357,7 @@ int unit_acquire_invocation_id(Unit *u) {
}
int unit_set_exec_params(Unit *u, ExecParameters *p) {
const char *confirm_spawn;
int r;
assert(u);
@ -5369,7 +5370,13 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
p->runtime_scope = u->manager->runtime_scope;
p->confirm_spawn = (char *)manager_get_confirm_spawn(u->manager);
confirm_spawn = manager_get_confirm_spawn(u->manager);
if (confirm_spawn) {
p->confirm_spawn = strdup(confirm_spawn);
if (!p->confirm_spawn)
return -ENOMEM;
}
p->cgroup_supported = u->manager->cgroup_supported;
p->prefix = u->manager->prefix;
SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));

View File

@ -68,6 +68,12 @@ if systemctl is-active systemd-oomd.service; then
systemctl restart systemd-oomd.service
fi
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip under
# sanitizers as they balloon memory usage.
if ! [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 /bin/true
fi
systemctl start testsuite-55-testchill.service
systemctl start testsuite-55-testbloat.service