mirror of
https://github.com/systemd/systemd.git
synced 2025-01-09 01:18:19 +03:00
core: add systemd-executor binary
Currently we spawn services by forking a child process, doing a bunch of work, and then exec'ing the service executable. There are some advantages to this approach: - quick: we immediately have access to all the enourmous amount of state simply by virtue of sharing the memory with the parent - easy to refactor and add features - part of the same binary, will never be out of sync There are however significant drawbacks: - doing work after fork and before exec is against glibc's supported case for several APIs we call - copy-on-write trap: anytime any memory is touched in either parent or child, a copy of that page will be triggered - memory footprint of the child process will be memory footprint of PID1, but using the cgroup memory limits of the unit The last issue is especially problematic on resource constrained systems where hard memory caps are enforced and swap is not allowed. As soon as PID1 is under load, with no page out due to no swap, and a service with a low MemoryMax= tries to start, hilarity ensues. Add a new systemd-executor binary, that is able to receive all the required state via memfd, deserialize it, prepare the appropriate data structures and call exec_child. Use posix_spawn which uses CLONE_VM + CLONE_VFORK, to ensure there is no copy-on-write (same address space will be used, and parent process will be frozen, until exec). The sd-executor binary is pinned by FD on startup, so that we can guarantee there will be no incompatibilities during upgrades.
This commit is contained in:
parent
56df7a461f
commit
bb5232b6a3
@ -201,3 +201,25 @@ can be found under various directories such as `factory/`, `modprobe.d/`, `netwo
|
||||
`tools/`, `coccinelle/`, `.github/`, `.semaphore/`, `.mkosi/` host various
|
||||
utilities and scripts that are used by maintainers and developers. They are not
|
||||
shipped or installed.
|
||||
|
||||
# Service Manager Overview
|
||||
|
||||
The Service Manager takes configuration in the form of unit files, credentials,
|
||||
kernel command line options and D-Bus commands, and based on those manages the
|
||||
system and spawns other processes. It runs in system mode as PID1, and in user
|
||||
mode with one instance per user session.
|
||||
|
||||
When starting a unit requires forking a new process, configuration for the new
|
||||
process will be serialized and passed over to the new process, created via a
|
||||
posix_spawn() call. This is done in order to avoid excessive processing after
|
||||
a fork() but before an exec(), which is against glibc's best practices and can
|
||||
also result in a copy-on-write trap. The new process will start as the
|
||||
`systemd-executor` binary, which will deserialize the configuration and apply
|
||||
all the options (sandboxing, namespacing, cgroup, etc.) before exec'ing the
|
||||
configured executable.
|
||||
|
||||
```
|
||||
┌──────┐posix_spawn() ┌───────────┐execve() ┌────────┐
|
||||
│ PID1 ├─────────────►│sd-executor├────────►│program │
|
||||
└──────┘ (memfd) └───────────┘ └────────┘
|
||||
```
|
||||
|
@ -225,6 +225,7 @@ conf.set_quoted('SYSCONF_DIR', sysconfdir)
|
||||
conf.set_quoted('SYSCTL_DIR', sysctldir)
|
||||
conf.set_quoted('SYSTEMCTL_BINARY_PATH', bindir / 'systemctl')
|
||||
conf.set_quoted('SYSTEMD_BINARY_PATH', libexecdir / 'systemd')
|
||||
conf.set_quoted('SYSTEMD_EXECUTOR_BINARY_PATH', libexecdir / 'systemd-executor')
|
||||
conf.set_quoted('SYSTEMD_CATALOG_DIR', catalogdir)
|
||||
conf.set_quoted('SYSTEMD_CGROUPS_AGENT_PATH', libexecdir / 'systemd-cgroups-agent')
|
||||
conf.set_quoted('SYSTEMD_CRYPTSETUP_PATH', bindir / 'systemd-cryptsetup')
|
||||
|
@ -36,7 +36,7 @@ typedef enum CGroupController {
|
||||
CGROUP_CONTROLLER_BPF_SOCKET_BIND,
|
||||
CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
|
||||
/* The BPF hook implementing RestrictFileSystems= is not defined here.
|
||||
* It's applied as late as possible in exec_child() so we don't block
|
||||
* It's applied as late as possible in exec_invoke() so we don't block
|
||||
* our own unit setup code. */
|
||||
|
||||
_CGROUP_CONTROLLER_MAX,
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
DEFINE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
|
||||
|
||||
static DynamicUser* dynamic_user_free(DynamicUser *d) {
|
||||
DynamicUser* dynamic_user_free(DynamicUser *d) {
|
||||
if (!d)
|
||||
return NULL;
|
||||
|
||||
@ -850,3 +850,12 @@ DynamicCreds* dynamic_creds_destroy(DynamicCreds *creds) {
|
||||
|
||||
return mfree(creds);
|
||||
}
|
||||
|
||||
void dynamic_creds_done(DynamicCreds *creds) {
|
||||
if (!creds)
|
||||
return;
|
||||
|
||||
if (creds->group != creds->user)
|
||||
dynamic_user_free(creds->group);
|
||||
creds->group = creds->user = dynamic_user_free(creds->user);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ struct DynamicUser {
|
||||
int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
|
||||
int dynamic_user_serialize_one(DynamicUser *d, const char *key, FILE *f, FDSet *fds);
|
||||
void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds, DynamicUser **ret);
|
||||
DynamicUser* dynamic_user_free(DynamicUser *d);
|
||||
void dynamic_user_vacuum(Manager *m, bool close_user);
|
||||
|
||||
int dynamic_user_current(DynamicUser *d, uid_t *ret);
|
||||
@ -39,6 +40,7 @@ int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *ui
|
||||
|
||||
DynamicCreds *dynamic_creds_unref(DynamicCreds *creds);
|
||||
DynamicCreds *dynamic_creds_destroy(DynamicCreds *creds);
|
||||
void dynamic_creds_done(DynamicCreds *creds);
|
||||
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(DynamicCreds*, dynamic_creds_unref);
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(DynamicCreds*, dynamic_creds_destroy);
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "argv-util.h"
|
||||
#include "async.h"
|
||||
#include "barrier.h"
|
||||
#include "bpf-dlopen.h"
|
||||
#include "bpf-lsm.h"
|
||||
#include "btrfs-util.h"
|
||||
#include "cap-list.h"
|
||||
@ -56,6 +57,7 @@
|
||||
#include "escape.h"
|
||||
#include "exec-credential.h"
|
||||
#include "execute.h"
|
||||
#include "execute-serialize.h"
|
||||
#include "exit-status.h"
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
@ -85,6 +87,7 @@
|
||||
#include "seccomp-util.h"
|
||||
#include "securebits-util.h"
|
||||
#include "selinux-util.h"
|
||||
#include "serialize.h"
|
||||
#include "signal-util.h"
|
||||
#include "smack-util.h"
|
||||
#include "socket-util.h"
|
||||
@ -1789,6 +1792,8 @@ static int apply_lock_personality(const ExecContext *c, const ExecParameters *p)
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters *p) {
|
||||
int r;
|
||||
|
||||
assert(c);
|
||||
assert(p);
|
||||
|
||||
@ -1801,6 +1806,11 @@ static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We are in a new binary, so dl-open again */
|
||||
r = dlopen_bpf();
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return lsm_bpf_restrict_filesystems(c->restrict_filesystems, p->cgroup_id, p->bpf_outer_map_fd, c->restrict_filesystems_allow_list);
|
||||
}
|
||||
#endif
|
||||
@ -4062,7 +4072,7 @@ static bool exec_context_shall_confirm_spawn(const ExecContext *context) {
|
||||
static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
|
||||
static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
|
||||
|
||||
static int exec_child(
|
||||
int exec_invoke(
|
||||
const ExecCommand *command,
|
||||
const ExecContext *context,
|
||||
ExecParameters *params,
|
||||
@ -4117,6 +4127,8 @@ static int exec_child(
|
||||
assert(command->path);
|
||||
assert(!strv_isempty(command->argv));
|
||||
|
||||
LOG_CONTEXT_PUSH_EXEC(context, params);
|
||||
|
||||
if (context->std_input == EXEC_INPUT_SOCKET ||
|
||||
context->std_output == EXEC_OUTPUT_SOCKET ||
|
||||
context->std_error == EXEC_OUTPUT_SOCKET) {
|
||||
@ -5283,7 +5295,6 @@ static int exec_child(
|
||||
return log_exec_error_errno(context, params, r, "Failed to execute %s: %m", executable);
|
||||
}
|
||||
|
||||
|
||||
int exec_spawn(Unit *unit,
|
||||
ExecCommand *command,
|
||||
const ExecContext *context,
|
||||
@ -5292,12 +5303,16 @@ int exec_spawn(Unit *unit,
|
||||
const CGroupContext *cgroup_context,
|
||||
pid_t *ret) {
|
||||
|
||||
_cleanup_free_ char *subcgroup_path = NULL;
|
||||
char serialization_fd_number[DECIMAL_STR_MAX(int) + 1];
|
||||
_cleanup_free_ char *subcgroup_path = NULL, *log_level = NULL, *executor_path = NULL;
|
||||
_cleanup_fdset_free_ FDSet *fdset = NULL;
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
pid_t pid;
|
||||
int r;
|
||||
|
||||
assert(unit);
|
||||
assert(unit->manager);
|
||||
assert(unit->manager->executor_fd >= 0);
|
||||
assert(command);
|
||||
assert(context);
|
||||
assert(ret);
|
||||
@ -5333,35 +5348,56 @@ int exec_spawn(Unit *unit,
|
||||
}
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
return log_unit_error_errno(unit, errno, "Failed to fork: %m");
|
||||
/* In order to avoid copy-on-write traps and OOM-kills when pid1's memory.current is above the
|
||||
* child's memory.max, serialize all the state needed to start the unit, and pass it to the
|
||||
* systemd-executor binary. clone() with CLONE_VM + CLONE_VFORK will pause the parent until the exec
|
||||
* and ensure all memory is shared. The child immediately execs the new binary so the delay should
|
||||
* be minimal. Once glibc provides a clone3 wrapper we can switch to that, and clone directly in the
|
||||
* target cgroup. */
|
||||
|
||||
if (pid == 0) {
|
||||
int exit_status;
|
||||
r = open_serialization_file("sd-executor-state", &f);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to open serialization stream: %m");
|
||||
|
||||
r = exec_child(command,
|
||||
context,
|
||||
params,
|
||||
runtime,
|
||||
cgroup_context,
|
||||
&exit_status);
|
||||
fdset = fdset_new();
|
||||
if (!fdset)
|
||||
return log_oom();
|
||||
|
||||
if (r < 0) {
|
||||
const char *status = ASSERT_PTR(
|
||||
exit_status_to_string(exit_status, EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD));
|
||||
r = exec_serialize_invocation(f, fdset, context, command, params, runtime, cgroup_context);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to serialize parameters: %m");
|
||||
|
||||
log_unit_struct_errno(unit, LOG_ERR, r,
|
||||
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
|
||||
LOG_UNIT_INVOCATION_ID(unit),
|
||||
LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
|
||||
status, command->path),
|
||||
"EXECUTABLE=%s", command->path);
|
||||
} else
|
||||
assert(exit_status == EXIT_SUCCESS);
|
||||
if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
|
||||
return log_unit_error_errno(unit, errno, "Failed to reseek on serialization stream: %m");
|
||||
|
||||
_exit(exit_status);
|
||||
}
|
||||
r = fd_cloexec(fileno(f), false);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to set O_CLOEXEC on serialization fd: %m");
|
||||
|
||||
r = fdset_cloexec(fdset, false);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to set O_CLOEXEC on serialized fds: %m");
|
||||
|
||||
r = log_level_to_string_alloc(log_get_max_level(), &log_level);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to convert log level to string: %m");
|
||||
|
||||
r = fd_get_path(unit->manager->executor_fd, &executor_path);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to get executor path from fd: %m");
|
||||
|
||||
xsprintf(serialization_fd_number, "%i", fileno(f));
|
||||
|
||||
/* The executor binary is pinned, to avoid compatibility problems during upgrades. */
|
||||
r = posix_spawn_wrapper(FORMAT_PROC_FD_PATH(unit->manager->executor_fd),
|
||||
STRV_MAKE(executor_path,
|
||||
"--deserialize", serialization_fd_number,
|
||||
"--log-level", log_level,
|
||||
"--log-target", log_target_to_string(manager_get_executor_log_target(unit->manager))),
|
||||
environ,
|
||||
&pid);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to spawn executor: %m");
|
||||
|
||||
log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
|
||||
|
||||
@ -5563,7 +5599,7 @@ int exec_context_destroy_mount_ns_dir(Unit *u) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void exec_command_done(ExecCommand *c) {
|
||||
void exec_command_done(ExecCommand *c) {
|
||||
assert(c);
|
||||
|
||||
c->path = mfree(c->path);
|
||||
@ -6679,9 +6715,9 @@ static char *destroy_tree(char *path) {
|
||||
return mfree(path);
|
||||
}
|
||||
|
||||
static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
|
||||
void exec_shared_runtime_done(ExecSharedRuntime *rt) {
|
||||
if (!rt)
|
||||
return NULL;
|
||||
return;
|
||||
|
||||
if (rt->manager)
|
||||
(void) hashmap_remove(rt->manager->exec_shared_runtime_by_id, rt->id);
|
||||
@ -6691,6 +6727,11 @@ static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
|
||||
rt->var_tmp_dir = mfree(rt->var_tmp_dir);
|
||||
safe_close_pair(rt->netns_storage_socket);
|
||||
safe_close_pair(rt->ipcns_storage_socket);
|
||||
}
|
||||
|
||||
static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
|
||||
exec_shared_runtime_done(rt);
|
||||
|
||||
return mfree(rt);
|
||||
}
|
||||
|
||||
@ -7216,6 +7257,14 @@ ExecRuntime* exec_runtime_destroy(ExecRuntime *rt) {
|
||||
return exec_runtime_free(rt);
|
||||
}
|
||||
|
||||
void exec_runtime_clear(ExecRuntime *rt) {
|
||||
if (!rt)
|
||||
return;
|
||||
|
||||
safe_close_pair(rt->ephemeral_storage_socket);
|
||||
rt->ephemeral_copy = mfree(rt->ephemeral_copy);
|
||||
}
|
||||
|
||||
void exec_params_clear(ExecParameters *p) {
|
||||
if (!p)
|
||||
return;
|
||||
@ -7230,6 +7279,37 @@ void exec_params_clear(ExecParameters *p) {
|
||||
p->unit_id = mfree(p->unit_id);
|
||||
p->invocation_id = SD_ID128_NULL;
|
||||
p->invocation_id_string[0] = '\0';
|
||||
p->confirm_spawn = mfree(p->confirm_spawn);
|
||||
}
|
||||
|
||||
void exec_params_serialized_done(ExecParameters *p) {
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
for (size_t i = 0; p->fds && i < p->n_socket_fds + p->n_storage_fds; i++)
|
||||
p->fds[i] = safe_close(p->fds[i]);
|
||||
|
||||
p->cgroup_path = mfree(p->cgroup_path);
|
||||
|
||||
p->prefix = strv_free(p->prefix);
|
||||
p->received_credentials_directory = mfree(p->received_credentials_directory);
|
||||
p->received_encrypted_credentials_directory = mfree(p->received_encrypted_credentials_directory);
|
||||
|
||||
for (size_t i = 0; p->idle_pipe && i < 4; i++)
|
||||
p->idle_pipe[i] = safe_close(p->idle_pipe[i]);
|
||||
p->idle_pipe = mfree(p->idle_pipe);
|
||||
|
||||
p->stdin_fd = safe_close(p->stdin_fd);
|
||||
p->stdout_fd = safe_close(p->stdout_fd);
|
||||
p->stderr_fd = safe_close(p->stderr_fd);
|
||||
|
||||
p->notify_socket = mfree(p->notify_socket);
|
||||
|
||||
open_file_free_many(&p->open_files);
|
||||
|
||||
p->fallback_smack_process_label = mfree(p->fallback_smack_process_label);
|
||||
|
||||
exec_params_clear(p);
|
||||
}
|
||||
|
||||
void exec_directory_done(ExecDirectory *d) {
|
||||
|
@ -471,6 +471,13 @@ struct ExecParameters {
|
||||
#include "unit.h"
|
||||
#include "dynamic-user.h"
|
||||
|
||||
int exec_invoke(const ExecCommand *command,
|
||||
const ExecContext *context,
|
||||
ExecParameters *params,
|
||||
ExecRuntime *runtime,
|
||||
const CGroupContext *cgroup_context,
|
||||
int *exit_status);
|
||||
|
||||
int exec_spawn(Unit *unit,
|
||||
ExecCommand *command,
|
||||
const ExecContext *context,
|
||||
@ -479,6 +486,7 @@ int exec_spawn(Unit *unit,
|
||||
const CGroupContext *cgroup_context,
|
||||
pid_t *ret);
|
||||
|
||||
void exec_command_done(ExecCommand *c);
|
||||
void exec_command_done_array(ExecCommand *c, size_t n);
|
||||
ExecCommand* exec_command_free_list(ExecCommand *c);
|
||||
void exec_command_free_array(ExecCommand **c, size_t n);
|
||||
@ -524,15 +532,18 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSharedRuntime*, exec_shared_runtime_unref);
|
||||
int exec_shared_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
|
||||
int exec_shared_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
|
||||
int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
|
||||
void exec_shared_runtime_done(ExecSharedRuntime *rt);
|
||||
void exec_shared_runtime_vacuum(Manager *m);
|
||||
|
||||
int exec_runtime_make(const Unit *unit, const ExecContext *context, ExecSharedRuntime *shared, DynamicCreds *creds, ExecRuntime **ret);
|
||||
ExecRuntime* exec_runtime_free(ExecRuntime *rt);
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(ExecRuntime*, exec_runtime_free);
|
||||
ExecRuntime* exec_runtime_destroy(ExecRuntime *rt);
|
||||
void exec_runtime_clear(ExecRuntime *rt);
|
||||
|
||||
void exec_params_clear(ExecParameters *p);
|
||||
void exec_params_dump(const ExecParameters *p, FILE* f, const char *prefix);
|
||||
void exec_params_serialized_done(ExecParameters *p);
|
||||
|
||||
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
|
||||
|
||||
|
251
src/core/executor.c
Normal file
251
src/core/executor.c
Normal file
@ -0,0 +1,251 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "sd-messages.h"
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "build.h"
|
||||
#include "execute-serialize.h"
|
||||
#include "execute.h"
|
||||
#include "exit-status.h"
|
||||
#include "fdset.h"
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
#include "getopt-defs.h"
|
||||
#include "parse-util.h"
|
||||
#include "pretty-print.h"
|
||||
#include "static-destruct.h"
|
||||
|
||||
static FILE* arg_serialization = NULL;
|
||||
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_serialization, fclosep);
|
||||
|
||||
static int help(void) {
|
||||
_cleanup_free_ char *link = NULL;
|
||||
int r;
|
||||
|
||||
r = terminal_urlify_man("systemd", "1", &link);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
printf("%s [OPTIONS...]\n\n"
|
||||
"%sSandbox and execute processes.%s\n\n"
|
||||
" -h --help Show this help and exit\n"
|
||||
" --version Print version string and exit\n"
|
||||
" --log-target=TARGET Set log target (console, journal,\n"
|
||||
" journal-or-kmsg,\n"
|
||||
" kmsg, null)\n"
|
||||
" --log-level=LEVEL Set log level (debug, info, notice,\n"
|
||||
" warning, err, crit,\n"
|
||||
" alert, emerg)\n"
|
||||
" --log-color=BOOL Highlight important messages\n"
|
||||
" --log-location=BOOL Include code location in messages\n"
|
||||
" --log-time=BOOL Prefix messages with current time\n"
|
||||
" --deserialize=FD Deserialize process config from FD\n"
|
||||
"\nSee the %s for details.\n",
|
||||
program_invocation_short_name,
|
||||
ansi_highlight(),
|
||||
ansi_normal(),
|
||||
link);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_argv(int argc, char *argv[]) {
|
||||
enum {
|
||||
COMMON_GETOPT_ARGS,
|
||||
ARG_VERSION,
|
||||
ARG_DESERIALIZE,
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
{ "log-level", required_argument, NULL, ARG_LOG_LEVEL },
|
||||
{ "log-target", required_argument, NULL, ARG_LOG_TARGET },
|
||||
{ "log-color", required_argument, NULL, ARG_LOG_COLOR },
|
||||
{ "log-location", required_argument, NULL, ARG_LOG_LOCATION },
|
||||
{ "log-time", required_argument, NULL, ARG_LOG_TIME },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "version", no_argument, NULL, ARG_VERSION },
|
||||
{ "deserialize", required_argument, NULL, ARG_DESERIALIZE },
|
||||
{}
|
||||
};
|
||||
|
||||
int c, r;
|
||||
|
||||
assert(argc >= 0);
|
||||
assert(argv);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
|
||||
switch (c) {
|
||||
case 'h':
|
||||
return help();
|
||||
|
||||
case ARG_VERSION:
|
||||
return version();
|
||||
|
||||
case ARG_LOG_LEVEL:
|
||||
r = log_set_max_level_from_string(optarg);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
|
||||
|
||||
break;
|
||||
|
||||
case ARG_LOG_TARGET:
|
||||
r = log_set_target_from_string(optarg);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
|
||||
|
||||
break;
|
||||
|
||||
case ARG_LOG_COLOR:
|
||||
r = log_show_color_from_string(optarg);
|
||||
if (r < 0)
|
||||
return log_error_errno(
|
||||
r,
|
||||
"Failed to parse log color setting \"%s\": %m",
|
||||
optarg);
|
||||
|
||||
break;
|
||||
|
||||
case ARG_LOG_LOCATION:
|
||||
r = log_show_location_from_string(optarg);
|
||||
if (r < 0)
|
||||
return log_error_errno(
|
||||
r,
|
||||
"Failed to parse log location setting \"%s\": %m",
|
||||
optarg);
|
||||
|
||||
break;
|
||||
|
||||
case ARG_LOG_TIME:
|
||||
r = log_show_time_from_string(optarg);
|
||||
if (r < 0)
|
||||
return log_error_errno(
|
||||
r,
|
||||
"Failed to parse log time setting \"%s\": %m",
|
||||
optarg);
|
||||
|
||||
break;
|
||||
|
||||
case ARG_DESERIALIZE: {
|
||||
FILE *f;
|
||||
int fd;
|
||||
|
||||
fd = parse_fd(optarg);
|
||||
if (fd < 0)
|
||||
return log_error_errno(
|
||||
fd,
|
||||
"Failed to parse serialization fd \"%s\": %m",
|
||||
optarg);
|
||||
|
||||
r = fd_cloexec(fd, /* cloexec= */ true);
|
||||
if (r < 0)
|
||||
return log_error_errno(
|
||||
r,
|
||||
"Failed to set serialization fd \"%s\" to close-on-exec: %m",
|
||||
optarg);
|
||||
|
||||
f = fdopen(fd, "r");
|
||||
if (!f)
|
||||
return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
|
||||
|
||||
safe_fclose(arg_serialization);
|
||||
arg_serialization = f;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case '?':
|
||||
return -EINVAL;
|
||||
|
||||
default:
|
||||
assert_not_reached();
|
||||
}
|
||||
|
||||
if (!arg_serialization)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"No serialization fd specified.");
|
||||
|
||||
return 1 /* work to do */;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
_cleanup_fdset_free_ FDSet *fdset = NULL;
|
||||
int exit_status = EXIT_SUCCESS, r;
|
||||
_cleanup_(cgroup_context_done) CGroupContext cgroup_context = {};
|
||||
_cleanup_(exec_context_done) ExecContext context = {};
|
||||
_cleanup_(exec_command_done) ExecCommand command = {};
|
||||
_cleanup_(exec_params_serialized_done) ExecParameters params = EXEC_PARAMETERS_INIT(/* flags= */ 0);
|
||||
_cleanup_(exec_shared_runtime_done) ExecSharedRuntime shared = {
|
||||
.netns_storage_socket = PIPE_EBADF,
|
||||
.ipcns_storage_socket = PIPE_EBADF,
|
||||
};
|
||||
_cleanup_(dynamic_creds_done) DynamicCreds dynamic_creds = {};
|
||||
_cleanup_(exec_runtime_clear) ExecRuntime runtime = {
|
||||
.ephemeral_storage_socket = PIPE_EBADF,
|
||||
.shared = &shared,
|
||||
.dynamic_creds = &dynamic_creds,
|
||||
};
|
||||
|
||||
exec_context_init(&context);
|
||||
cgroup_context_init(&cgroup_context);
|
||||
|
||||
/* We might be starting the journal itself, we'll be told by the caller what to do */
|
||||
log_set_always_reopen_console(true);
|
||||
log_set_prohibit_ipc(true);
|
||||
log_setup();
|
||||
|
||||
r = fdset_new_fill(/* filter_cloexec= */ 0, &fdset);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create fd set: %m");
|
||||
|
||||
r = parse_argv(argc, argv);
|
||||
if (r <= 0)
|
||||
return r;
|
||||
|
||||
/* Now try again if we were told it's fine to use a different target */
|
||||
if (log_get_target() != LOG_TARGET_KMSG) {
|
||||
log_set_prohibit_ipc(false);
|
||||
log_open();
|
||||
}
|
||||
|
||||
r = fdset_remove(fdset, fileno(arg_serialization));
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to remove serialization fd from fd set: %m");
|
||||
|
||||
r = exec_deserialize_invocation(arg_serialization,
|
||||
fdset,
|
||||
&context,
|
||||
&command,
|
||||
¶ms,
|
||||
&runtime,
|
||||
&cgroup_context);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to deserialize: %m");
|
||||
|
||||
arg_serialization = safe_fclose(arg_serialization);
|
||||
fdset = fdset_free(fdset);
|
||||
|
||||
r = exec_invoke(&command,
|
||||
&context,
|
||||
¶ms,
|
||||
&runtime,
|
||||
&cgroup_context,
|
||||
&exit_status);
|
||||
if (r < 0) {
|
||||
const char *status = ASSERT_PTR(
|
||||
exit_status_to_string(exit_status, EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD));
|
||||
|
||||
log_exec_struct_errno(&context, ¶ms, LOG_ERR, r,
|
||||
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
|
||||
LOG_EXEC_INVOCATION_ID(¶ms),
|
||||
LOG_EXEC_MESSAGE(¶ms, "Failed at step %s spawning %s: %m",
|
||||
status, command.path),
|
||||
"EXECUTABLE=%s", command.path);
|
||||
} else
|
||||
assert(exit_status == EXIT_SUCCESS); /* When 'skip' is chosen in the confirm spawn prompt */
|
||||
|
||||
return exit_status;
|
||||
}
|
@ -24,7 +24,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
log_set_target(LOG_TARGET_NULL);
|
||||
}
|
||||
|
||||
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
|
||||
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL|MANAGER_TEST_DONT_OPEN_EXECUTOR, &m) >= 0);
|
||||
/* Set log overrides as well to make it harder for a serialization file
|
||||
* to switch log levels/targets during fuzzing */
|
||||
manager_override_log_level(m, log_get_max_level());
|
||||
|
@ -65,7 +65,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
if (!getenv("SYSTEMD_LOG_LEVEL"))
|
||||
log_set_max_level(LOG_CRIT);
|
||||
|
||||
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
|
||||
assert_se(manager_new(RUNTIME_SCOPE_SYSTEM, MANAGER_TEST_RUN_MINIMAL|MANAGER_TEST_DONT_OPEN_EXECUTOR, &m) >= 0);
|
||||
|
||||
name = strjoina("a.", unit_type_to_string(t));
|
||||
assert_se(unit_new_for_name(m, unit_vtable[t]->object_size, name, &u) >= 0);
|
||||
|
@ -921,6 +921,8 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
|
||||
.interval = 10 * USEC_PER_MINUTE,
|
||||
.burst = 10,
|
||||
},
|
||||
|
||||
.executor_fd = -EBADF,
|
||||
};
|
||||
|
||||
unit_defaults_init(&m->defaults, runtime_scope);
|
||||
@ -1039,6 +1041,42 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
|
||||
|
||||
if (r < 0 && r != -EEXIST)
|
||||
return r;
|
||||
|
||||
m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
|
||||
if (m->executor_fd < 0)
|
||||
return log_warning_errno(errno,
|
||||
"Failed to open executor binary '%s': %m",
|
||||
SYSTEMD_EXECUTOR_BINARY_PATH);
|
||||
} else if (!FLAGS_SET(test_run_flags, MANAGER_TEST_DONT_OPEN_EXECUTOR)) {
|
||||
_cleanup_free_ char *self_exe = NULL, *executor_path = NULL;
|
||||
_cleanup_close_ int self_dir_fd = -EBADF;
|
||||
int level = LOG_DEBUG;
|
||||
|
||||
/* Prefer sd-executor from the same directory as the test, e.g.: when running unit tests from the
|
||||
* build directory. Fallback to working directory and then the installation path. */
|
||||
r = readlink_and_make_absolute("/proc/self/exe", &self_exe);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
self_dir_fd = open_parent(self_exe, O_CLOEXEC|O_DIRECTORY, 0);
|
||||
if (self_dir_fd < 0)
|
||||
return -errno;
|
||||
|
||||
m->executor_fd = openat(self_dir_fd, "systemd-executor", O_CLOEXEC|O_PATH);
|
||||
if (m->executor_fd < 0 && errno == ENOENT)
|
||||
m->executor_fd = openat(AT_FDCWD, "systemd-executor", O_CLOEXEC|O_PATH);
|
||||
if (m->executor_fd < 0 && errno == ENOENT) {
|
||||
m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
|
||||
level = LOG_WARNING; /* Tests should normally use local builds */
|
||||
}
|
||||
if (m->executor_fd < 0)
|
||||
return -errno;
|
||||
|
||||
r = fd_get_path(m->executor_fd, &executor_path);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
log_full(level, "Using systemd-executor binary from '%s'", executor_path);
|
||||
}
|
||||
|
||||
/* Note that we do not set up the notify fd here. We do that after deserialization,
|
||||
@ -1701,6 +1739,8 @@ Manager* manager_free(Manager *m) {
|
||||
lsm_bpf_destroy(m->restrict_fs);
|
||||
#endif
|
||||
|
||||
safe_close(m->executor_fd);
|
||||
|
||||
return mfree(m);
|
||||
}
|
||||
|
||||
@ -4956,6 +4996,17 @@ void unit_defaults_done(UnitDefaults *defaults) {
|
||||
rlimit_free_all(defaults->rlimit);
|
||||
}
|
||||
|
||||
LogTarget manager_get_executor_log_target(Manager *m) {
|
||||
assert(m);
|
||||
|
||||
/* If journald is not available tell sd-executor to go to kmsg, as it might be starting journald */
|
||||
|
||||
if (manager_journal_is_running(m))
|
||||
return log_get_target();
|
||||
|
||||
return LOG_TARGET_KMSG;
|
||||
}
|
||||
|
||||
static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
|
||||
[MANAGER_INITIALIZING] = "initializing",
|
||||
[MANAGER_STARTING] = "starting",
|
||||
|
@ -145,6 +145,7 @@ typedef enum ManagerTestRunFlags {
|
||||
MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
|
||||
MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
|
||||
MANAGER_TEST_RUN_IGNORE_DEPENDENCIES = 1 << 4, /* run while ignoring dependencies */
|
||||
MANAGER_TEST_DONT_OPEN_EXECUTOR = 1 << 5, /* avoid trying to load sd-executor */
|
||||
MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
|
||||
} ManagerTestRunFlags;
|
||||
|
||||
@ -496,6 +497,10 @@ struct Manager {
|
||||
|
||||
/* For NFTSet= */
|
||||
FirewallContext *fw_ctx;
|
||||
|
||||
/* Pin the systemd-executor binary, so that it never changes until re-exec, ensuring we don't have
|
||||
* serialization/deserialization compatibility issues during upgrades. */
|
||||
int executor_fd;
|
||||
};
|
||||
|
||||
static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
|
||||
@ -628,6 +633,8 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
|
||||
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor);
|
||||
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor);
|
||||
|
||||
LogTarget manager_get_executor_log_target(Manager *m);
|
||||
|
||||
const char* oom_policy_to_string(OOMPolicy i) _const_;
|
||||
OOMPolicy oom_policy_from_string(const char *s) _pure_;
|
||||
|
||||
|
@ -144,6 +144,10 @@ systemd_sources = files(
|
||||
'crash-handler.c',
|
||||
)
|
||||
|
||||
systemd_executor_sources = files(
|
||||
'executor.c',
|
||||
)
|
||||
|
||||
executables += [
|
||||
libexec_template + {
|
||||
'name' : 'systemd',
|
||||
@ -156,6 +160,17 @@ executables += [
|
||||
],
|
||||
'dependencies' : libseccomp,
|
||||
},
|
||||
libexec_template + {
|
||||
'name' : 'systemd-executor',
|
||||
'public' : true,
|
||||
'sources' : systemd_executor_sources,
|
||||
'include_directories' : core_includes,
|
||||
'link_with' : [
|
||||
libcore,
|
||||
libshared,
|
||||
],
|
||||
'dependencies' : libseccomp,
|
||||
},
|
||||
fuzz_template + {
|
||||
'sources' : files('fuzz-unit-file.c'),
|
||||
'link_with' : [
|
||||
|
@ -5357,6 +5357,7 @@ int unit_acquire_invocation_id(Unit *u) {
|
||||
}
|
||||
|
||||
int unit_set_exec_params(Unit *u, ExecParameters *p) {
|
||||
const char *confirm_spawn;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
@ -5369,7 +5370,13 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
|
||||
|
||||
p->runtime_scope = u->manager->runtime_scope;
|
||||
|
||||
p->confirm_spawn = (char *)manager_get_confirm_spawn(u->manager);
|
||||
confirm_spawn = manager_get_confirm_spawn(u->manager);
|
||||
if (confirm_spawn) {
|
||||
p->confirm_spawn = strdup(confirm_spawn);
|
||||
if (!p->confirm_spawn)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
p->cgroup_supported = u->manager->cgroup_supported;
|
||||
p->prefix = u->manager->prefix;
|
||||
SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));
|
||||
|
@ -68,6 +68,12 @@ if systemctl is-active systemd-oomd.service; then
|
||||
systemctl restart systemd-oomd.service
|
||||
fi
|
||||
|
||||
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip under
|
||||
# sanitizers as they balloon memory usage.
|
||||
if ! [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
|
||||
systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 /bin/true
|
||||
fi
|
||||
|
||||
systemctl start testsuite-55-testchill.service
|
||||
systemctl start testsuite-55-testbloat.service
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user