1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-22 17:35:35 +03:00

coredump: generate properly symbolized stacktrace for containerized processes

This commit is contained in:
Michal Sekletar 2024-07-09 15:21:34 +02:00 committed by Luca Boccassi
parent 18a8f03e51
commit 68511cebe5
8 changed files with 226 additions and 37 deletions

View File

@ -109,6 +109,21 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>AccessContainer=</varname></term>
<listitem><para>Controls whether <command>systemd-coredump</command> will attempt to use the mount tree of
a process that crashed within a container. Access to the container's filesystem might be necessary to generate
a fully symbolized backtrace. If set to <literal>yes</literal>, then <command>systemd-coredump</command> will
obtain the mount tree from corresponding mount namespace and will try to generate the stack trace using the
binary and libraries from the mount namespace. Note that the coredump of the containerized process might
still be saved in <filename>/var/lib/systemd/coredump/</filename> even if <varname>AccessContainer=</varname>
is set to <literal>no</literal>. Defaults to <literal>no</literal>.</para>
<xi:include href="version-info.xml" xpointer="v257"/>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>ExternalSizeMax=</varname></term>
<term><varname>JournalSizeMax=</varname></term>

View File

@ -1389,6 +1389,10 @@ conf.set10('HAVE_ELFUTILS', libdw.found())
conf.set10('HAVE_DWELF_ELF_E_MACHINE_STRING',
libdw.found() and cc.has_function('dwelf_elf_e_machine_string', dependencies : libdw))
# New in elfutils 0.192
conf.set10('HAVE_DWFL_SET_SYSROOT',
libdw.found() and cc.has_function('dwfl_set_sysroot', dependencies : libdw))
libz = dependency('zlib',
required : get_option('zlib'))
conf.set10('HAVE_ZLIB', libz.found())

View File

@ -19,20 +19,25 @@ static int analyze_elf(char **filenames, sd_json_format_flags_t json_flags) {
STRV_FOREACH(filename, filenames) {
_cleanup_(sd_json_variant_unrefp) sd_json_variant *package_metadata = NULL;
_cleanup_(table_unrefp) Table *t = NULL;
_cleanup_free_ char *abspath = NULL;
_cleanup_free_ char *abspath = NULL, *path = NULL, *stacktrace = NULL;
_cleanup_close_ int fd = -EBADF;
bool coredump = false;
r = path_make_absolute_cwd(*filename, &abspath);
if (r < 0)
return log_error_errno(r, "Could not make an absolute path out of \"%s\": %m", *filename);
path_simplify(abspath);
path = path_join(empty_to_root(arg_root), abspath);
if (!path)
return log_oom();
fd = RET_NERRNO(open(abspath, O_RDONLY|O_CLOEXEC));
path_simplify(path);
fd = RET_NERRNO(open(path, O_RDONLY|O_CLOEXEC));
if (fd < 0)
return log_error_errno(fd, "Could not open \"%s\": %m", abspath);
return log_error_errno(fd, "Could not open \"%s\": %m", path);
r = parse_elf_object(fd, abspath, /* fork_disable_dump= */false, NULL, &package_metadata);
r = parse_elf_object(fd, abspath, arg_root, /* fork_disable_dump= */false, &stacktrace, &package_metadata);
if (r < 0)
return log_error_errno(r, "Parsing \"%s\" as ELF object failed: %m", abspath);
@ -60,6 +65,9 @@ static int analyze_elf(char **filenames, sd_json_format_flags_t json_flags) {
* metadata is parsed recursively in core files, so there might be
* multiple modules. */
if (STR_IN_SET(module_name, "elfType", "elfArchitecture")) {
if (streq(module_name, "elfType") && streq("coredump", sd_json_variant_string(module_json)))
coredump = true;
r = table_add_many(
t,
TABLE_FIELD, module_name,
@ -100,6 +108,16 @@ static int analyze_elf(char **filenames, sd_json_format_flags_t json_flags) {
}
}
}
if (coredump) {
r = table_add_many(t,
TABLE_EMPTY, TABLE_EMPTY,
TABLE_FIELD, "stacktrace",
TABLE_STRING, stacktrace);
if (r < 0)
return table_log_add_error(r);
}
if (json_flags & SD_JSON_FORMAT_OFF) {
r = table_print(t, NULL);
if (r < 0)

View File

@ -609,7 +609,7 @@ static int parse_argv(int argc, char *argv[]) {
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Option --security-policy= is only supported for security.");
if ((arg_root || arg_image) && (!STRPTR_IN_SET(argv[optind], "cat-config", "verify", "condition")) &&
if ((arg_root || arg_image) && (!STRPTR_IN_SET(argv[optind], "cat-config", "verify", "condition", "inspect-elf")) &&
(!(streq_ptr(argv[optind], "security") && arg_offline)))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Options --root= and --image= are only supported for cat-config, verify, condition and security when used with --offline= right now.");

View File

@ -39,6 +39,8 @@
#include "main-func.h"
#include "memory-util.h"
#include "memstream-util.h"
#include "missing_mount.h"
#include "missing_syscall.h"
#include "mkdir-label.h"
#include "namespace-util.h"
#include "parse-util.h"
@ -165,16 +167,22 @@ static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX;
static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX;
static uint64_t arg_keep_free = UINT64_MAX;
static uint64_t arg_max_use = UINT64_MAX;
static bool arg_access_container = false;
static int parse_config(void) {
static const ConfigTableItem items[] = {
{ "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
{ "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
{ "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
{ "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
{ "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
{ "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
{ "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
{ "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
{ "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
{ "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
{ "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
{ "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
{ "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
{ "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
#if HAVE_DWFL_SET_SYSROOT
{ "Coredump", "AccessContainer", config_parse_bool, 0, &arg_access_container },
#else
{ "Coredump", "AccessContainer", config_parse_warn_compat, DISABLED_CONFIGURATION, 0 },
#endif
{}
};
@ -774,15 +782,44 @@ static int change_uid_gid(const Context *context) {
return drop_privileges(uid, gid, 0);
}
static int setup_container_mount_tree(int mount_tree_fd, char **container_root) {
_cleanup_free_ char *root = NULL;
int r;
assert(mount_tree_fd >= 0);
assert(container_root);
r = unshare(CLONE_NEWNS);
if (r < 0)
return log_warning_errno(errno, "Failed to unshare mount namespace: %m");
r = mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (r < 0)
return log_warning_errno(errno, "Failed to disable mount propagation: %m");
r = mkdtemp_malloc("/tmp/systemd-coredump-root-XXXXXX", &root);
if (r < 0)
return log_warning_errno(r, "Failed to create temporary directory: %m");
r = move_mount(mount_tree_fd, "", -EBADF, root, MOVE_MOUNT_F_EMPTY_PATH);
if (r < 0)
return log_warning_errno(errno, "Failed to move mount tree: %m");
*container_root = TAKE_PTR(root);
return 0;
}
static int submit_coredump(
const Context *context,
struct iovec_wrapper *iovw,
int input_fd) {
int input_fd,
int mount_tree_fd) {
_cleanup_(sd_json_variant_unrefp) sd_json_variant *json_metadata = NULL;
_cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF;
_cleanup_free_ char *filename = NULL, *coredump_data = NULL;
_cleanup_free_ char *stacktrace = NULL;
_cleanup_free_ char *root = NULL;
const char *module_name;
uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX;
bool truncated = false, written = false;
@ -819,6 +856,12 @@ static int submit_coredump(
(void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
}
if (mount_tree_fd >= 0 && arg_access_container) {
r = setup_container_mount_tree(mount_tree_fd, &root);
if (r < 0)
log_warning_errno(r, "Failed to setup container mount tree, ignoring: %m");
}
/* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the
* coredump memory under the user's uid. This also ensures that the credentials journald will see are
* the ones of the coredumping user, thus making sure the user gets access to the core dump. Let's
@ -826,7 +869,6 @@ static int submit_coredump(
r = change_uid_gid(context);
if (r < 0)
return log_error_errno(r, "Failed to drop privileges: %m");
if (written) {
/* Try to get a stack trace if we can */
if (coredump_size > arg_process_size_max)
@ -838,6 +880,7 @@ static int submit_coredump(
(void) parse_elf_object(coredump_fd,
context->meta[META_EXE],
root,
/* fork_disable_dump= */ skip, /* avoid loops */
&stacktrace,
&json_metadata);
@ -1000,10 +1043,11 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) {
}
static int process_socket(int fd) {
_cleanup_close_ int input_fd = -EBADF;
_cleanup_close_ int input_fd = -EBADF, mount_tree_fd = -EBADF;
Context context = {};
struct iovec_wrapper iovw = {};
struct iovec iovec;
bool first = true;
int r;
assert(fd >= 0);
@ -1051,16 +1095,34 @@ static int process_socket(int fd) {
free(iovec.iov_base);
found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
if (!found) {
cmsg_close_all(&mh);
r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
"Coredump file descriptor missing.");
goto finish;
found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int) * 2));
if (found) {
int fds[2] = EBADF_PAIR;
memcpy(fds, CMSG_TYPED_DATA(found, int), sizeof(int) * 2);
assert(mount_tree_fd < 0);
/* Maybe we already got coredump FD in previous iteration? */
safe_close(input_fd);
input_fd = fds[0];
mount_tree_fd = fds[1];
/* We have all FDs we need let's take a shortcut here. */
break;
} else {
found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
if (found)
input_fd = *CMSG_TYPED_DATA(found, int);
}
/* This is the first message that carries file descriptors, maybe there will be one more that actually contains array of descriptors. */
if (first) {
first = false;
continue;
}
assert(input_fd < 0);
input_fd = *CMSG_TYPED_DATA(found, int);
break;
} else
cmsg_close_all(&mh);
@ -1090,14 +1152,14 @@ static int process_socket(int fd) {
goto finish;
}
r = submit_coredump(&context, &iovw, input_fd);
r = submit_coredump(&context, &iovw, input_fd, mount_tree_fd);
finish:
iovw_free_contents(&iovw, true);
return r;
}
static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
static int send_iovec(const struct iovec_wrapper *iovw, int input_fd, int mounts_fd) {
_cleanup_close_ int fd = -EBADF;
int r;
@ -1154,6 +1216,12 @@ static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
if (r < 0)
return log_error_errno(r, "Failed to send coredump fd: %m");
if (mounts_fd >= 0) {
r = send_many_fds(fd, (int[]) { input_fd, mounts_fd }, 2, 0);
if (r < 0)
return log_error_errno(r, "Failed to send coredump fds: %m");
}
return 0;
}
@ -1532,7 +1600,7 @@ static int forward_coredump_to_container(Context *context) {
_exit(EXIT_FAILURE);
}
r = send_iovec(iovw, STDIN_FILENO);
r = send_iovec(iovw, STDIN_FILENO, -EBADF);
if (r < 0) {
log_debug_errno(r, "Failed to send iovec to coredump socket: %m");
_exit(EXIT_FAILURE);
@ -1560,8 +1628,68 @@ static int forward_coredump_to_container(Context *context) {
return 0;
}
static int gather_pid_mount_tree_fd(const Context *context) {
_cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF;
_cleanup_close_pair_ int pair[2] = EBADF_PAIR;
int fd = -EBADF, r;
pid_t child;
assert(context);
/* Don't bother preparing environment if we can't pass it to libdwfl. */
#if !HAVE_DWFL_SET_SYSROOT
return -EBADF;
#endif
if (!arg_access_container)
return -EBADF;
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair) < 0)
return log_error_errno(errno, "Failed to create socket pair: %m");
r = namespace_open(context->pid, NULL, &mntns_fd, NULL, NULL, &root_fd);
if (r < 0)
return log_error_errno(r, "Failed to open mount namespace of crashing process: %m");
r = namespace_fork("(sd-mount-tree-ns)", "(sd-mount-tree)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL, -1, mntns_fd, -1, -1, root_fd, &child);
if (r < 0)
return log_error_errno(r, "Failed to fork(): %m");
if (r == 0) {
pair[0] = safe_close(pair[0]);
r = open_tree(-EBADF, "/", AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
if (r < 0) {
log_error_errno(errno, "Failed to clone mount tree: %m");
_exit(EXIT_FAILURE);
}
r = send_one_fd(pair[1], r, 0);
if (r < 0) {
log_error_errno(r, "Failed to send mount tree to parent: %m");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
pair[1] = safe_close(pair[1]);
r = wait_for_terminate_and_check("(sd-mount-tree-ns)", child, 0);
if (r < 0)
return log_error_errno(r, "Failed to wait for child: %m");
if (r != EXIT_SUCCESS)
return log_error_errno(SYNTHETIC_ERRNO(ECHILD), "Child died abnormally.");
fd = receive_one_fd(pair[0], MSG_DONTWAIT);
if (fd < 0)
return log_error_errno(fd, "Failed to receive mount tree: %m");
return fd;
}
static int process_kernel(int argc, char* argv[]) {
_cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL;
_cleanup_close_ int mount_tree_fd = -EBADF;
Context context = {};
int r, signo;
@ -1607,6 +1735,12 @@ static int process_kernel(int argc, char* argv[]) {
r = forward_coredump_to_container(&context);
if (r >= 0)
return 0;
r = gather_pid_mount_tree_fd(&context);
if (r < 0 && r != -EBADF)
log_warning_errno(r, "Failed to access the mount tree of a container, ignoring: %m");
else
mount_tree_fd = r;
}
/* If this is PID 1 disable coredump collection, we'll unlikely be able to process
@ -1624,9 +1758,9 @@ static int process_kernel(int argc, char* argv[]) {
(void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
if (context.is_journald || context.is_pid1)
return submit_coredump(&context, iovw, STDIN_FILENO);
return submit_coredump(&context, iovw, STDIN_FILENO, mount_tree_fd);
return send_iovec(iovw, STDIN_FILENO);
return send_iovec(iovw, STDIN_FILENO, mount_tree_fd);
}
static int process_backtrace(int argc, char *argv[]) {

View File

@ -25,3 +25,4 @@
#JournalSizeMax=767M
#MaxUse=
#KeepFree=
#AccessContainer=no

View File

@ -23,6 +23,7 @@
#include "io-util.h"
#include "macro.h"
#include "memstream-util.h"
#include "path-util.h"
#include "process-util.h"
#include "rlimit-util.h"
#include "string-util.h"
@ -54,6 +55,9 @@ static DLSYM_PROTOTYPE(dwfl_begin) = NULL;
static DLSYM_PROTOTYPE(dwfl_build_id_find_elf) = NULL;
static DLSYM_PROTOTYPE(dwfl_core_file_attach) = NULL;
static DLSYM_PROTOTYPE(dwfl_core_file_report) = NULL;
#if HAVE_DWFL_SET_SYSROOT
static DLSYM_PROTOTYPE(dwfl_set_sysroot) = NULL;
#endif
static DLSYM_PROTOTYPE(dwfl_end) = NULL;
static DLSYM_PROTOTYPE(dwfl_errmsg) = NULL;
static DLSYM_PROTOTYPE(dwfl_errno) = NULL;
@ -114,6 +118,9 @@ int dlopen_dw(void) {
DLSYM_ARG(dwfl_module_getelf),
DLSYM_ARG(dwfl_begin),
DLSYM_ARG(dwfl_core_file_report),
#if HAVE_DWFL_SET_SYSROOT
DLSYM_ARG(dwfl_set_sysroot),
#endif
DLSYM_ARG(dwfl_report_end),
DLSYM_ARG(dwfl_getmodules),
DLSYM_ARG(dwfl_core_file_attach),
@ -580,7 +587,7 @@ static int module_callback(Dwfl_Module *mod, void **userdata, const char *name,
return DWARF_CB_OK;
}
static int parse_core(int fd, const char *executable, char **ret, sd_json_variant **ret_package_metadata) {
static int parse_core(int fd, const char *root, char **ret, sd_json_variant **ret_package_metadata) {
const Dwfl_Callbacks callbacks = {
.find_elf = sym_dwfl_build_id_find_elf,
@ -614,7 +621,17 @@ static int parse_core(int fd, const char *executable, char **ret, sd_json_varian
if (!c.dwfl)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not parse core file, dwfl_begin() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno()));
if (sym_dwfl_core_file_report(c.dwfl, c.elf, executable) < 0)
if (empty_or_root(root))
root = NULL;
#if HAVE_DWFL_SET_SYSROOT
if (root && sym_dwfl_set_sysroot(c.dwfl, root) < 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not set root directory, dwfl_set_sysroot() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno()));
#else
if (root)
log_warning("Compiled without dwfl_set_sysroot() support, ignoring provided root directory.");
#endif
if (sym_dwfl_core_file_report(c.dwfl, c.elf, NULL) < 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not parse core file, dwfl_core_file_report() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno()));
if (sym_dwfl_report_end(c.dwfl, NULL, NULL) != 0)
@ -641,7 +658,7 @@ static int parse_core(int fd, const char *executable, char **ret, sd_json_varian
return 0;
}
static int parse_elf(int fd, const char *executable, char **ret, sd_json_variant **ret_package_metadata) {
static int parse_elf(int fd, const char *executable, const char *root, char **ret, sd_json_variant **ret_package_metadata) {
_cleanup_(sd_json_variant_unrefp) sd_json_variant *package_metadata = NULL, *elf_metadata = NULL;
_cleanup_set_free_ Set *modules = NULL;
_cleanup_(stack_context_done) StackContext c = {
@ -672,7 +689,7 @@ static int parse_elf(int fd, const char *executable, char **ret, sd_json_variant
if (elf_header.e_type == ET_CORE) {
_cleanup_free_ char *out = NULL;
r = parse_core(fd, executable, ret ? &out : NULL, &package_metadata);
r = parse_core(fd, root, ret ? &out : NULL, &package_metadata);
if (r < 0)
return log_warning_errno(r, "Failed to inspect core file: %m");
@ -743,7 +760,7 @@ static int parse_elf(int fd, const char *executable, char **ret, sd_json_variant
return 0;
}
int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) {
int parse_elf_object(int fd, const char *executable, const char *root, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) {
_cleanup_close_pair_ int error_pipe[2] = EBADF_PAIR,
return_pipe[2] = EBADF_PAIR,
json_pipe[2] = EBADF_PAIR;
@ -813,7 +830,7 @@ int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, cha
goto child_fail;
}
r = parse_elf(fd, executable, ret ? &buf : NULL, ret_package_metadata ? &package_metadata : NULL);
r = parse_elf(fd, executable, root, ret ? &buf : NULL, ret_package_metadata ? &package_metadata : NULL);
if (r < 0)
goto child_fail;

View File

@ -10,9 +10,9 @@ int dlopen_elf(void);
/* Parse an ELF object in a forked process, so that errors while iterating over
* untrusted and potentially malicious data do not propagate to the main caller's process.
* If fork_disable_dump, the child process will not dump core if it crashes. */
int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata);
int parse_elf_object(int fd, const char *executable, const char *root, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata);
#else
static inline int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) {
static inline int parse_elf_object(int fd, const char *executable, const char *root, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) {
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "elfutils disabled, parsing ELF objects not supported");
}
#endif