1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-19 22:50:17 +03:00

Merge pull request #20288 from keszybz/freeze-no-malloc

Don't call malloc from freeze which is called in a signal handler
This commit is contained in:
Luca Boccassi 2021-07-23 15:35:34 +01:00 committed by GitHub
commit 80362ec56f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 182 additions and 207 deletions

View File

@ -208,10 +208,9 @@ static int get_max_fd(void) {
return (int) (m - 1);
}
int close_all_fds(const int except[], size_t n_except) {
int close_all_fds_full(int except[], size_t n_except, bool allow_alloc) {
static bool have_close_range = true; /* Assume we live in the future */
_cleanup_closedir_ DIR *d = NULL;
struct dirent *de;
int r = 0;
assert(n_except == 0 || except);
@ -227,129 +226,104 @@ int close_all_fds(const int except[], size_t n_except) {
/* Close everything. Yay! */
if (close_range(3, -1, 0) >= 0)
return 1;
return 0;
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno))
have_close_range = false;
else
return -errno;
have_close_range = false;
} else {
_cleanup_free_ int *sorted_malloc = NULL;
size_t n_sorted;
int *sorted;
typesafe_qsort(except, n_except, cmp_int);
assert(n_except < SIZE_MAX);
n_sorted = n_except + 1;
for (size_t i = 0; i < n_except; i++) {
int start = i == 0 ? 2 : MAX(except[i-1], 2); /* The first three fds shall always remain open */
int end = MAX(except[i], 2);
if (n_sorted > 64) /* Use heap for large numbers of fds, stack otherwise */
sorted = sorted_malloc = new(int, n_sorted);
else
sorted = newa(int, n_sorted);
assert(end >= start);
if (sorted) {
int c = 0;
memcpy(sorted, except, n_except * sizeof(int));
/* Let's add fd 2 to the list of fds, to simplify the loop below, as this
* allows us to cover the head of the array the same way as the body */
sorted[n_sorted-1] = 2;
typesafe_qsort(sorted, n_sorted, cmp_int);
for (size_t i = 0; i < n_sorted-1; i++) {
int start, end;
start = MAX(sorted[i], 2); /* The first three fds shall always remain open */
end = MAX(sorted[i+1], 2);
assert(end >= start);
if (end - start <= 1)
continue;
/* Close everything between the start and end fds (both of which shall stay open) */
if (close_range(start + 1, end - 1, 0) < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
return -errno;
if (end - start <= 1)
continue;
/* Close everything between the start and end fds (both of which shall stay open) */
if (close_range(start + 1, end - 1, 0) < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno))
have_close_range = false;
break;
}
c += end - start - 1;
}
if (have_close_range) {
/* The loop succeeded. Let's now close everything beyond the end */
if (sorted[n_sorted-1] >= INT_MAX) /* Dont let the addition below overflow */
return c;
if (close_range(sorted[n_sorted-1] + 1, -1, 0) >= 0)
return c + 1;
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
else
return -errno;
have_close_range = false;
goto opendir_fallback;
}
}
}
/* Fallback on OOM or if close_range() is not supported */
/* The loop succeeded. Let's now close everything beyond the end */
if (except[n_except-1] >= INT_MAX) /* Don't let the addition below overflow */
return 0;
int start = MAX(except[n_except-1], 2);
if (close_range(start + 1, -1, 0) >= 0)
return 0;
if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno))
have_close_range = false;
else
return -errno;
}
}
d = opendir("/proc/self/fd");
if (!d) {
int fd, max_fd;
/* Fallback for when close_range() is not supported */
opendir_fallback:
d = allow_alloc ? opendir("/proc/self/fd") : NULL;
if (d) {
struct dirent *de;
/* When /proc isn't available (for example in chroots) the fallback is brute forcing through
* the fd table */
FOREACH_DIRENT(de, d, return -errno) {
int fd = -1, q;
max_fd = get_max_fd();
if (max_fd < 0)
return max_fd;
if (safe_atoi(de->d_name, &fd) < 0)
/* Let's better ignore this, just in case */
continue;
/* Refuse to do the loop over more too many elements. It's better to fail immediately than to
* spin the CPU for a long time. */
if (max_fd > MAX_FD_LOOP_LIMIT)
return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
"/proc/self/fd is inaccessible. Refusing to loop over %d potential fds.",
max_fd);
if (fd < 3)
continue;
for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
int q;
if (fd == dirfd(d))
continue;
if (fd_in_set(fd, except, n_except))
continue;
q = close_nointr(fd);
if (q < 0 && q != -EBADF && r >= 0)
if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
r = q;
}
return r;
}
FOREACH_DIRENT(de, d, return -errno) {
int fd = -1, q;
/* Fallback for when /proc isn't available (for example in chroots) or when we cannot allocate by
* brute-forcing through the file descriptor table. */
if (safe_atoi(de->d_name, &fd) < 0)
/* Let's better ignore this, just in case */
continue;
int max_fd = get_max_fd();
if (max_fd < 0)
return max_fd;
if (fd < 3)
continue;
/* Refuse to do the loop over more too many elements. It's better to fail immediately than to
* spin the CPU for a long time. */
if (max_fd > MAX_FD_LOOP_LIMIT)
return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
"/proc/self/fd is inaccessible. Refusing to loop over %d potential fds.",
max_fd);
if (fd == dirfd(d))
continue;
for (int fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
int q;
if (fd_in_set(fd, except, n_except))
continue;
q = close_nointr(fd);
if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
if (q < 0 && q != -EBADF && r >= 0)
r = q;
}

View File

@ -56,7 +56,10 @@ DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(DIR*, closedir, NULL);
int fd_nonblock(int fd, bool nonblock);
int fd_cloexec(int fd, bool cloexec);
int close_all_fds(const int except[], size_t n_except);
int close_all_fds_full(int except[], size_t n_except, bool allow_alloc);
static inline int close_all_fds(int except[], size_t n_except) {
return close_all_fds_full(except, n_except, true);
}
int same_fd(int a, int b);

View File

@ -1037,34 +1037,6 @@ bool is_main_thread(void) {
return cached > 0;
}
_noreturn_ void freeze(void) {
log_close();
/* Make sure nobody waits for us on a socket anymore */
(void) close_all_fds(NULL, 0);
sync();
/* Let's not freeze right away, but keep reaping zombies. */
for (;;) {
int r;
siginfo_t si = {};
r = waitid(P_ALL, 0, &si, WEXITED);
if (r < 0 && errno != EINTR)
break;
}
/* waitid() failed with an unexpected error, things are really borked. Freeze now! */
for (;;)
pause();
}
bool oom_score_adjust_is_valid(int oa) {
return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
}
unsigned long personality_from_string(const char *p) {
int architecture;
@ -1271,7 +1243,7 @@ static void restore_sigsetp(sigset_t **ssp) {
int safe_fork_full(
const char *name,
const int except_fds[],
int except_fds[],
size_t n_except_fds,
ForkFlags flags,
pid_t *ret_pid) {
@ -1466,7 +1438,7 @@ int safe_fork_full(
int namespace_fork(
const char *outer_name,
const char *inner_name,
const int except_fds[],
int except_fds[],
size_t n_except_fds,
ForkFlags flags,
int pidns_fd,
@ -1482,7 +1454,8 @@ int namespace_fork(
* process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
* /proc/self/fd works correctly. */
r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
r = safe_fork_full(outer_name, except_fds, n_except_fds,
(flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
if (r < 0)
return r;
if (r == 0) {
@ -1517,86 +1490,10 @@ int namespace_fork(
return 1;
}
int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
bool stdout_is_tty, stderr_is_tty;
size_t n, i;
va_list ap;
char **l;
int r;
assert(path);
/* Spawns a temporary TTY agent, making sure it goes away when we go away */
r = safe_fork_full(name,
except,
n_except,
FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG,
ret_pid);
if (r < 0)
return r;
if (r > 0)
return 0;
/* In the child: */
stdout_is_tty = isatty(STDOUT_FILENO);
stderr_is_tty = isatty(STDERR_FILENO);
if (!stdout_is_tty || !stderr_is_tty) {
int fd;
/* Detach from stdout/stderr. and reopen
* /dev/tty for them. This is important to
* ensure that when systemctl is started via
* popen() or a similar call that expects to
* read EOF we actually do generate EOF and
* not delay this indefinitely by because we
* keep an unused copy of stdin around. */
fd = open("/dev/tty", O_WRONLY);
if (fd < 0) {
log_error_errno(errno, "Failed to open /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
_exit(EXIT_FAILURE);
}
safe_close_above_stdio(fd);
}
(void) rlimit_nofile_safe();
/* Count arguments */
va_start(ap, path);
for (n = 0; va_arg(ap, char*); n++)
;
va_end(ap);
/* Allocate strv */
l = newa(char*, n + 1);
/* Fill in arguments */
va_start(ap, path);
for (i = 0; i <= n; i++)
l[i] = va_arg(ap, char*);
va_end(ap);
execv(path, l);
_exit(EXIT_FAILURE);
}
int set_oom_score_adjust(int value) {
char t[DECIMAL_STR_MAX(int)];
sprintf(t, "%i", value);
xsprintf(t, "%i", value);
return write_string_file("/proc/self/oom_score_adj", t,
WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);

View File

@ -82,10 +82,6 @@ int pid_from_same_root_fs(pid_t pid);
bool is_main_thread(void);
_noreturn_ void freeze(void);
bool oom_score_adjust_is_valid(int oa);
#ifndef PERSONALITY_INVALID
/* personality(7) documents that 0xffffffffUL is used for querying the
* current personality, hence let's use that here as error
@ -168,15 +164,13 @@ typedef enum ForkFlags {
FORK_NEW_USERNS = 1 << 13, /* Run child in its own user namespace */
} ForkFlags;
int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
int safe_fork_full(const char *name, int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) {
return safe_fork_full(name, NULL, 0, flags, ret_pid);
}
int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid);
int fork_agent(const char *name, const int except[], size_t n_except, pid_t *pid, const char *path, ...) _sentinel_;
int namespace_fork(const char *outer_name, const char *inner_name, int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid);
int set_oom_score_adjust(int value);

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <linux/oom.h>
#include <sys/mount.h>
#include <sys/prctl.h>
@ -94,6 +95,10 @@ static int property_get_environment_files(
return sd_bus_message_close_container(reply);
}
static bool oom_score_adjust_is_valid(int oa) {
return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
}
static int property_get_oom_score_adjust(
sd_bus *bus,
const char *path,

View File

@ -5,6 +5,7 @@
#include "sd-event.h"
#include "alloc-util.h"
#include "exec-util.h"
#include "fd-util.h"
#include "fs-util.h"
#include "log.h"

View File

@ -448,6 +448,29 @@ ExecCommandFlags exec_command_flags_from_string(const char *s) {
return 1 << idx;
}
_noreturn_ void freeze(void) {
log_close();
/* Make sure nobody waits for us on a socket anymore */
(void) close_all_fds_full(NULL, 0, false);
sync();
/* Let's not freeze right away, but keep reaping zombies. */
for (;;) {
int r;
siginfo_t si = {};
r = waitid(P_ALL, 0, &si, WEXITED);
if (r < 0 && errno != EINTR)
break;
}
/* waitid() failed with an unexpected error, things are really borked. Freeze now! */
for (;;)
pause();
}
int fexecve_or_execve(int executable_fd, const char *executable, char *const argv[], char *const envp[]) {
#if ENABLE_FEXECVE
execveat(executable_fd, "", argv, envp, AT_EMPTY_PATH);
@ -470,3 +493,76 @@ int fexecve_or_execve(int executable_fd, const char *executable, char *const arg
execve(executable, argv, envp);
return -errno;
}
int fork_agent(const char *name, int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
bool stdout_is_tty, stderr_is_tty;
size_t n, i;
va_list ap;
char **l;
int r;
assert(path);
/* Spawns a temporary TTY agent, making sure it goes away when we go away */
r = safe_fork_full(name,
except,
n_except,
FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG,
ret_pid);
if (r < 0)
return r;
if (r > 0)
return 0;
/* In the child: */
stdout_is_tty = isatty(STDOUT_FILENO);
stderr_is_tty = isatty(STDERR_FILENO);
if (!stdout_is_tty || !stderr_is_tty) {
int fd;
/* Detach from stdout/stderr and reopen /dev/tty for them. This is important to ensure that
* when systemctl is started via popen() or a similar call that expects to read EOF we
* actually do generate EOF and not delay this indefinitely by keeping an unused copy of
* stdin around. */
fd = open("/dev/tty", O_WRONLY);
if (fd < 0) {
log_error_errno(errno, "Failed to open /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
_exit(EXIT_FAILURE);
}
safe_close_above_stdio(fd);
}
(void) rlimit_nofile_safe();
/* Count arguments */
va_start(ap, path);
for (n = 0; va_arg(ap, char*); n++)
;
va_end(ap);
/* Allocate strv */
l = newa(char*, n + 1);
/* Fill in arguments */
va_start(ap, path);
for (i = 0; i <= n; i++)
l[i] = va_arg(ap, char*);
va_end(ap);
execv(path, l);
_exit(EXIT_FAILURE);
}

View File

@ -47,4 +47,8 @@ extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX];
const char* exec_command_flags_to_string(ExecCommandFlags i);
ExecCommandFlags exec_command_flags_from_string(const char *s);
_noreturn_ void freeze(void);
int fexecve_or_execve(int executable_fd, const char *executable, char *const argv[], char *const envp[]);
int fork_agent(const char *name, int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) _sentinel_;

View File

@ -11,6 +11,7 @@
#include "alloc-util.h"
#include "dissect-image.h"
#include "exec-util.h"
#include "extract-word.h"
#include "fd-util.h"
#include "fileio.h"
@ -1010,11 +1011,9 @@ static int make_userns(uid_t uid_shift, uid_t uid_range) {
r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid);
if (r < 0)
return r;
if (r == 0) {
if (r == 0)
/* Child. We do nothing here, just freeze until somebody kills us. */
freeze();
_exit(EXIT_FAILURE);
}
xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range);

View File

@ -4,6 +4,7 @@
#include <stdlib.h>
#include <unistd.h>
#include "exec-util.h"
#include "log.h"
#include "process-util.h"
#include "spawn-ask-password-agent.h"

View File

@ -6,6 +6,7 @@
#include <stdlib.h>
#include <unistd.h>
#include "exec-util.h"
#include "fd-util.h"
#include "io-util.h"
#include "log.h"