1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-22 17:35:35 +03:00

remove fallbacks for memfd-less kernels (#35605)

Let the culling continue: let's remove support for kernels that lack
memfd, i.e. pre-3.17 kernels.

This is a nice simplification of things.
This commit is contained in:
Lennart Poettering 2024-12-17 23:01:29 +01:00 committed by GitHub
commit be883be6d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 170 additions and 530 deletions

2
README
View File

@ -43,7 +43,7 @@ REQUIREMENTS:
⛔ Kernel versions below 4.3 ("minimum baseline") are not supported at
all, and are missing required functionality (e.g. CLOCK_BOOTTIME
support for timerfd_create() or ambient capabilities).
support for timerfd_create(), ambient capabilities, or memfd_create()).
⚠️ Kernel versions below 5.4 ("recommended baseline") have significant
gaps in functionality and are not recommended for use with this version

3
TODO
View File

@ -101,9 +101,6 @@ Deprecations and removals:
and then rework cgroupsv2 support around fds, i.e. keep one fd per active
unit around, and always operate on that, instead of cgroup fs paths.
* drop support for kernels lacking memfd_create() (i.e. make 3.17 new
baseline), then drop all pipe() based fallbacks.
* drop support for getrandom()-less kernels. (GRND_INSECURE means once kernel
5.6 becomes our baseline). See
https://github.com/systemd/systemd/pull/24101#issuecomment-1193966468 for

View File

@ -24,6 +24,11 @@ int memfd_create_wrapper(const char *name, unsigned mode) {
unsigned mode_compat;
int mfd;
assert(name);
/* Wrapper around memfd_create() which adds compat with older kernels where memfd_create() didn't
* support MFD_EXEC/MFD_NOEXEC_SEAL. (kernel 6.3+) */
mfd = RET_NERRNO(memfd_create(name, mode));
if (mfd != -EINVAL)
return mfd;
@ -36,11 +41,11 @@ int memfd_create_wrapper(const char *name, unsigned mode) {
return RET_NERRNO(memfd_create(name, mode_compat));
}
int memfd_new(const char *name) {
int memfd_new_full(const char *name, unsigned extra_flags) {
_cleanup_free_ char *g = NULL;
if (!name) {
char pr[17] = {};
char pr[TASK_COMM_LEN] = {};
/* If no name is specified we generate one. We include
* a hint indicating our library implementation, and
@ -65,16 +70,18 @@ int memfd_new(const char *name) {
}
}
return memfd_create_wrapper(name, MFD_ALLOW_SEALING | MFD_CLOEXEC | MFD_NOEXEC_SEAL);
return memfd_create_wrapper(
name,
MFD_CLOEXEC | MFD_NOEXEC_SEAL | extra_flags);
}
int memfd_add_seals(int fd, unsigned int seals) {
int memfd_add_seals(int fd, unsigned seals) {
assert(fd >= 0);
return RET_NERRNO(fcntl(fd, F_ADD_SEALS, seals));
}
int memfd_get_seals(int fd, unsigned int *ret_seals) {
int memfd_get_seals(int fd, unsigned *ret_seals) {
int r;
assert(fd >= 0);
@ -88,30 +95,6 @@ int memfd_get_seals(int fd, unsigned int *ret_seals) {
return 0;
}
int memfd_map(int fd, uint64_t offset, size_t size, void **p) {
unsigned int seals;
void *q;
int r;
assert(fd >= 0);
assert(size > 0);
assert(p);
r = memfd_get_seals(fd, &seals);
if (r < 0)
return r;
if (seals & F_SEAL_WRITE)
q = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, offset);
else
q = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
if (q == MAP_FAILED)
return -errno;
*p = q;
return 0;
}
int memfd_set_sealed(int fd) {
return memfd_add_seals(fd, F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE);
}
@ -128,16 +111,16 @@ int memfd_get_sealed(int fd) {
return FLAGS_SET(seals, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE);
}
int memfd_get_size(int fd, uint64_t *sz) {
int memfd_get_size(int fd, uint64_t *ret) {
struct stat stat;
assert(fd >= 0);
assert(sz);
assert(ret);
if (fstat(fd, &stat) < 0)
return -errno;
*sz = stat.st_size;
*ret = stat.st_size;
return 0;
}
@ -147,50 +130,25 @@ int memfd_set_size(int fd, uint64_t sz) {
return RET_NERRNO(ftruncate(fd, sz));
}
int memfd_new_and_map(const char *name, size_t sz, void **p) {
_cleanup_close_ int fd = -EBADF;
int r;
assert(sz > 0);
assert(p);
fd = memfd_new(name);
if (fd < 0)
return fd;
r = memfd_set_size(fd, sz);
if (r < 0)
return r;
r = memfd_map(fd, 0, sz, p);
if (r < 0)
return r;
return TAKE_FD(fd);
}
int memfd_new_and_seal(const char *name, const void *data, size_t sz) {
_cleanup_close_ int fd = -EBADF;
ssize_t n;
off_t f;
int r;
assert(data || sz == 0);
fd = memfd_new(name);
if (sz == SIZE_MAX)
sz = strlen(data);
fd = memfd_new_full(name, MFD_ALLOW_SEALING);
if (fd < 0)
return fd;
if (sz > 0) {
n = write(fd, data, sz);
ssize_t n = pwrite(fd, data, sz, 0);
if (n < 0)
return -errno;
if ((size_t) n != sz)
return -EIO;
f = lseek(fd, 0, SEEK_SET);
if (f != 0)
return -errno;
}
r = memfd_set_sealed(fd);

View File

@ -8,16 +8,21 @@
int memfd_create_wrapper(const char *name, unsigned mode);
int memfd_new(const char *name);
int memfd_new_and_map(const char *name, size_t sz, void **p);
int memfd_new_and_seal(const char *name, const void *data, size_t sz);
int memfd_new_full(const char *name, unsigned extra_flags);
static inline int memfd_new(const char *name) {
return memfd_new_full(name, 0);
}
int memfd_add_seals(int fd, unsigned int seals);
int memfd_get_seals(int fd, unsigned int *ret_seals);
int memfd_map(int fd, uint64_t offset, size_t size, void **p);
int memfd_new_and_seal(const char *name, const void *data, size_t sz);
static inline int memfd_new_and_seal_string(const char *name, const char *s) {
return memfd_new_and_seal(name, s, SIZE_MAX);
}
int memfd_add_seals(int fd, unsigned seals);
int memfd_get_seals(int fd, unsigned *ret_seals);
int memfd_set_sealed(int fd);
int memfd_get_sealed(int fd);
int memfd_get_size(int fd, uint64_t *sz);
int memfd_get_size(int fd, uint64_t *ret);
int memfd_set_size(int fd, uint64_t sz);

View File

@ -14,7 +14,6 @@
#include "bus-util.h"
#include "chase.h"
#include "confidential-virt.h"
#include "data-fd-util.h"
#include "dbus-cgroup.h"
#include "dbus-execute.h"
#include "dbus-job.h"
@ -33,6 +32,7 @@
#include "locale-util.h"
#include "log.h"
#include "manager-dump.h"
#include "memfd-util.h"
#include "os-util.h"
#include "parse-util.h"
#include "path-util.h"
@ -48,10 +48,6 @@
#include "virt.h"
#include "watchdog.h"
/* Require 16MiB free in /run/systemd for reloading/reexecing. After all we need to serialize our state
* there, and if we can't we'll fail badly. */
#define RELOAD_DISK_SPACE_MIN (UINT64_C(16) * UINT64_C(1024) * UINT64_C(1024))
static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
return (runtime ? UNIT_FILE_RUNTIME : 0) |
(force ? UNIT_FILE_FORCE : 0);
@ -1447,7 +1443,7 @@ static int method_dump(sd_bus_message *message, void *userdata, sd_bus_error *er
static int reply_dump_by_fd(sd_bus_message *message, char *dump) {
_cleanup_close_ int fd = -EBADF;
fd = acquire_data_fd(dump);
fd = memfd_new_and_seal_string("dump", dump);
if (fd < 0)
return fd;
@ -1485,73 +1481,6 @@ static int method_refuse_snapshot(sd_bus_message *message, void *userdata, sd_bu
return sd_bus_error_set(error, SD_BUS_ERROR_NOT_SUPPORTED, "Support for snapshots has been removed.");
}
static int get_run_space(uint64_t *ret, sd_bus_error *error) {
struct statvfs svfs;
assert(ret);
if (statvfs("/run/systemd", &svfs) < 0)
return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m");
*ret = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize;
return 0;
}
static int verify_run_space(const char *message, sd_bus_error *error) {
uint64_t available = 0; /* unnecessary, but used to trick out gcc's incorrect maybe-uninitialized warning */
int r;
assert(message);
r = get_run_space(&available, error);
if (r < 0)
return r;
if (available < RELOAD_DISK_SPACE_MIN)
return sd_bus_error_setf(error,
BUS_ERROR_DISK_FULL,
"%s, not enough space available on /run/systemd/. "
"Currently, %s are free, but a safety buffer of %s is enforced.",
message,
FORMAT_BYTES(available),
FORMAT_BYTES(RELOAD_DISK_SPACE_MIN));
return 0;
}
int verify_run_space_and_log(const char *message) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
int r;
assert(message);
r = verify_run_space(message, &error);
if (r < 0)
return log_error_errno(r, "%s", bus_error_message(&error, r));
return 0;
}
static int verify_run_space_permissive(const char *message, sd_bus_error *error) {
uint64_t available = 0; /* unnecessary, but used to trick out gcc's incorrect maybe-uninitialized warning */
int r;
assert(message);
r = get_run_space(&available, error);
if (r < 0)
return r;
if (available < RELOAD_DISK_SPACE_MIN)
log_warning("Dangerously low amount of free space on /run/systemd/, %s.\n"
"Currently, %s are free, but %s are suggested. Proceeding anyway.",
message,
FORMAT_BYTES(available),
FORMAT_BYTES(RELOAD_DISK_SPACE_MIN));
return 0;
}
static void log_caller(sd_bus_message *message, Manager *manager, const char *method) {
_cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
@ -1585,10 +1514,6 @@ static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *
assert(message);
r = verify_run_space("Refusing to reload", error);
if (r < 0)
return r;
r = mac_selinux_access_check(message, "reload", error);
if (r < 0)
return r;
@ -1631,10 +1556,6 @@ static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_erro
assert(message);
r = verify_run_space("Refusing to reexecute", error);
if (r < 0)
return r;
r = mac_selinux_access_check(message, "reload", error);
if (r < 0)
return r;
@ -1718,10 +1639,6 @@ static int method_soft_reboot(sd_bus_message *message, void *userdata, sd_bus_er
return sd_bus_error_set(error, SD_BUS_ERROR_NOT_SUPPORTED,
"Soft reboot is only supported by system manager.");
r = verify_run_space_permissive("soft reboot may fail", error);
if (r < 0)
return r;
r = mac_selinux_access_check(message, "reboot", error);
if (r < 0)
return r;
@ -1826,10 +1743,6 @@ static int method_switch_root(sd_bus_message *message, void *userdata, sd_bus_er
return sd_bus_error_set(error, SD_BUS_ERROR_NOT_SUPPORTED,
"Root switching is only supported by system manager.");
r = verify_run_space_permissive("root switching may fail", error);
if (r < 0)
return r;
r = mac_selinux_access_check(message, "reboot", error);
if (r < 0)
return r;

View File

@ -31,7 +31,6 @@
#include "chattr-util.h"
#include "chown-recursive.h"
#include "copy.h"
#include "data-fd-util.h"
#include "env-util.h"
#include "escape.h"
#include "exec-credential.h"
@ -44,6 +43,7 @@
#include "io-util.h"
#include "iovec-util.h"
#include "journal-send.h"
#include "memfd-util.h"
#include "missing_ioprio.h"
#include "missing_prctl.h"
#include "missing_sched.h"
@ -406,7 +406,7 @@ static int setup_input(
case EXEC_INPUT_DATA: {
int fd;
fd = acquire_data_fd_full(context->stdin_data, context->stdin_data_size, /* flags = */ 0);
fd = memfd_new_and_seal("exec-input", context->stdin_data, context->stdin_data_size);
if (fd < 0)
return fd;

View File

@ -516,8 +516,9 @@ int exec_spawn(
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to serialize parameters: %m");
if (fseeko(f, 0, SEEK_SET) < 0)
return log_unit_error_errno(unit, errno, "Failed to reseek on serialization stream: %m");
r = finish_serialization_file(f);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to finish serialization stream: %m");
r = fd_cloexec(fileno(f), false);
if (r < 0)

View File

@ -34,12 +34,12 @@
#include "clock-warp.h"
#include "conf-parser.h"
#include "confidential-virt.h"
#include "constants.h"
#include "copy.h"
#include "cpu-set-util.h"
#include "crash-handler.h"
#include "dbus-manager.h"
#include "dbus.h"
#include "constants.h"
#include "dev-setup.h"
#include "efi-random.h"
#include "efivars.h"
@ -87,6 +87,7 @@
#include "seccomp-util.h"
#include "selinux-setup.h"
#include "selinux-util.h"
#include "serialize.h"
#include "signal-util.h"
#include "smack-setup.h"
#include "special.h"
@ -1233,14 +1234,14 @@ static int prepare_reexecute(
assert(ret_f);
assert(ret_fds);
r = manager_open_serialization(m, &f);
if (r < 0)
return log_error_errno(r, "Failed to create serialization file: %m");
/* Make sure nothing is really destructed when we shut down */
m->n_reloading++;
bus_manager_send_reloading(m, true);
r = manager_open_serialization(m, &f);
if (r < 0)
return log_error_errno(r, "Failed to create serialization file: %m");
fds = fdset_new();
if (!fds)
return log_oom();
@ -1249,8 +1250,9 @@ static int prepare_reexecute(
if (r < 0)
return r;
if (fseeko(f, 0, SEEK_SET) < 0)
return log_error_errno(errno, "Failed to rewind serialization fd: %m");
r = finish_serialization_file(f);
if (r < 0)
return log_error_errno(r, "Failed to finish serialization file: %m");
r = fd_cloexec(fileno(f), false);
if (r < 0)

View File

@ -186,10 +186,6 @@ int manager_serialize(
return r;
}
r = fflush_and_check(f);
if (r < 0)
return log_error_errno(r, "Failed to flush serialization: %m");
r = bus_fdset_add_all(m, fds);
if (r < 0)
return log_error_errno(r, "Failed to add bus sockets to serialization: %m");

View File

@ -80,6 +80,7 @@
#include "rlimit-util.h"
#include "rm-rf.h"
#include "selinux-util.h"
#include "serialize.h"
#include "signal-util.h"
#include "socket-util.h"
#include "special.h"
@ -3123,9 +3124,6 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
case SIGTERM:
if (MANAGER_IS_SYSTEM(m)) {
/* This is for compatibility with the original sysvinit */
if (verify_run_space_and_log("Refusing to reexecute") < 0)
break;
m->objective = MANAGER_REEXECUTE;
break;
}
@ -3179,9 +3177,6 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
}
case SIGHUP:
if (verify_run_space_and_log("Refusing to reload") < 0)
break;
m->objective = MANAGER_RELOAD;
break;
@ -3762,8 +3757,9 @@ int manager_reload(Manager *m) {
if (r < 0)
return r;
if (fseeko(f, 0, SEEK_SET) < 0)
return log_error_errno(errno, "Failed to seek to beginning of serialization: %m");
r = finish_serialization_file(f);
if (r < 0)
return log_error_errno(r, "Failed to finish serialization: %m");
/* 💀 This is the point of no return, from here on there is no way back. 💀 */
reloading = NULL;

View File

@ -13,7 +13,6 @@
#include "build-path.h"
#include "bus-common-errors.h"
#include "bus-locator.h"
#include "data-fd-util.h"
#include "env-util.h"
#include "errno-list.h"
#include "errno-util.h"
@ -1266,13 +1265,13 @@ static int home_start_work(
if (r < 0)
return r;
stdin_fd = acquire_data_fd(formatted);
stdin_fd = memfd_new_and_seal_string("request", formatted);
if (stdin_fd < 0)
return stdin_fd;
log_debug("Sending to worker: %s", formatted);
stdout_fd = memfd_create_wrapper("homework-stdout", MFD_CLOEXEC | MFD_NOEXEC_SEAL);
stdout_fd = memfd_new("homework-stdout");
if (stdout_fd < 0)
return stdout_fd;

View File

@ -5,7 +5,6 @@
#include <linux/fs.h>
#endif
#include "data-fd-util.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
@ -13,6 +12,7 @@
#include "fs-util.h"
#include "homework-cifs.h"
#include "homework-mount.h"
#include "memfd-util.h"
#include "mkdir.h"
#include "mount-util.h"
#include "process-util.h"
@ -76,7 +76,7 @@ int home_setup_cifs(
pid_t mount_pid;
int exit_status;
passwd_fd = acquire_data_fd(*pw);
passwd_fd = memfd_new_and_seal_string("cifspw", *pw);
if (passwd_fd < 0)
return log_error_errno(passwd_fd, "Failed to create data FD for password: %m");

View File

@ -24,7 +24,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
_cleanup_(unlink_and_freep) char *name = NULL;
_cleanup_(sd_journal_closep) sd_journal *j = NULL;
_cleanup_(journal_remote_server_destroy) RemoteServer s = {};
void *mem;
int fdin, r;
if (outside_size_range(size, 3, 65536))
@ -35,13 +34,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
assert_se(mkdtemp_malloc("/tmp/fuzz-journal-remote-XXXXXX", &tmp) >= 0);
assert_se(name = path_join(tmp, "fuzz-journal-remote.XXXXXX.journal"));
fdin = fdin_close = memfd_new_and_map("fuzz-journal-remote", size, &mem);
fdin = fdin_close = memfd_new_and_seal("fuzz-journal-remote", data, size);
if (fdin < 0)
return log_error_errno(fdin, "memfd_new_and_map() failed: %m");
memcpy(mem, data, size);
assert_se(munmap(mem, size) == 0);
fdout = mkostemps(name, STRLEN(".journal"), O_CLOEXEC);
if (fdout < 0)
return log_error_errno(errno, "mkostemps() failed: %m");

View File

@ -32,7 +32,6 @@ BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = {
SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN),
SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH),
SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED, EUNATCH),
SD_BUS_ERROR_MAP(BUS_ERROR_DISK_FULL, ENOSPC),
SD_BUS_ERROR_MAP(BUS_ERROR_FILE_DESCRIPTOR_STORE_DISABLED,
EHOSTDOWN),
SD_BUS_ERROR_MAP(BUS_ERROR_FROZEN_BY_PARENT, EDEADLK),

View File

@ -28,7 +28,6 @@
#define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning"
#define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser"
#define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced"
#define BUS_ERROR_DISK_FULL "org.freedesktop.systemd1.DiskFull"
#define BUS_ERROR_NOTHING_TO_CLEAN "org.freedesktop.systemd1.NothingToClean"
#define BUS_ERROR_UNIT_BUSY "org.freedesktop.systemd1.UnitBusy"
#define BUS_ERROR_UNIT_INACTIVE "org.freedesktop.systemd1.UnitInactive"

View File

@ -22,6 +22,7 @@
#include "iovec-util.h"
#include "journal-send.h"
#include "memfd-util.h"
#include "missing_mman.h"
#include "missing_syscall.h"
#include "process-util.h"
#include "socket-util.h"
@ -233,7 +234,6 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
};
ssize_t k;
bool have_syslog_identifier = false;
bool seal = true;
assert_return(iov, -EINVAL);
assert_return(n > 0, -EINVAL);
@ -312,35 +312,19 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
if (!IN_SET(errno, EMSGSIZE, ENOBUFS, EAGAIN))
return -errno;
/* Message doesn't fit... Let's dump the data in a memfd or
* temporary file and just pass a file descriptor of it to the
* other side.
*
* For the temporary files we use /dev/shm instead of /tmp
* here, since we want this to be a tmpfs, and one that is
* available from early boot on and where unprivileged users
* can create files. */
buffer_fd = memfd_new(NULL);
if (buffer_fd < 0) {
if (buffer_fd == -ENOSYS) {
buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC);
if (buffer_fd < 0)
return buffer_fd;
seal = false;
} else
return buffer_fd;
}
/* Message doesn't fit... Let's dump the data in a memfd or temporary file and just pass a file
* descriptor of it to the other side. */
buffer_fd = memfd_new_full("journal-data", MFD_ALLOW_SEALING);
if (buffer_fd < 0)
return buffer_fd;
n = writev(buffer_fd, w, j);
if (n < 0)
return -errno;
if (seal) {
r = memfd_set_sealed(buffer_fd);
if (r < 0)
return r;
}
r = memfd_set_sealed(buffer_fd);
if (r < 0)
return r;
r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0);
if (r == -ENOENT)

View File

@ -2,11 +2,11 @@
#include "af-list.h"
#include "daemon-util.h"
#include "data-fd-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "iovec-util.h"
#include "json-util.h"
#include "memfd-util.h"
#include "networkd-address.h"
#include "networkd-json.h"
#include "networkd-link.h"
@ -69,7 +69,7 @@ int manager_serialize(Manager *manager) {
return r;
_cleanup_close_ int fd = -EBADF;
fd = acquire_data_fd(dump);
fd = memfd_new_and_seal_string("serialization", dump);
if (fd < 0)
return fd;

View File

@ -4,8 +4,8 @@
#include "bus-common-errors.h"
#include "bus-polkit.h"
#include "data-fd-util.h"
#include "fd-util.h"
#include "memfd-util.h"
#include "oomd-manager-bus.h"
#include "oomd-manager.h"
#include "user-util.h"
@ -22,7 +22,7 @@ static int bus_method_dump_by_fd(sd_bus_message *message, void *userdata, sd_bus
if (r < 0)
return r;
fd = acquire_data_fd(dump);
fd = memfd_new_and_seal_string("oomd-dump", dump);
if (fd < 0)
return fd;

View File

@ -21,10 +21,10 @@
#include "capsule-util.h"
#include "chase.h"
#include "daemon-util.h"
#include "data-fd-util.h"
#include "env-util.h"
#include "fd-util.h"
#include "format-util.h"
#include "memfd-util.h"
#include "memstream-util.h"
#include "path-util.h"
#include "socket-util.h"
@ -803,7 +803,7 @@ static int method_dump_memory_state_by_fd(sd_bus_message *message, void *userdat
if (r < 0)
return r;
fd = acquire_data_fd_full(dump, dump_size, /* flags = */ 0);
fd = memfd_new_and_seal("malloc-info", dump, dump_size);
if (fd < 0)
return fd;

View File

@ -19,137 +19,12 @@
#include "missing_syscall.h"
#include "tmpfile-util.h"
/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
/* When the data is smaller or equal to 64K, try to place the copy in a memfd */
#define DATA_FD_MEMORY_LIMIT (64U * U64_KB)
/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
/* If memfd didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp/ instead. */
#define DATA_FD_TMP_LIMIT (1U * U64_MB)
int acquire_data_fd_full(const void *data, size_t size, DataFDFlags flags) {
_cleanup_close_ int fd = -EBADF;
ssize_t n;
int r;
assert(data || size == 0);
/* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
* complex than I wish it was. But here's why:
*
* a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
* read-only. Unfortunately they require kernel 3.17, and at the time of writing we still support 3.14.
*
* b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
* a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
* clients can only bump their size to a system-wide limit, which might be quite low.
*
* c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
* earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
* /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
*
* d) Finally, we try creating a regular file in /dev/shm, which we then delete.
*
* It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
* figure. */
if (size == SIZE_MAX)
size = strlen(data);
if (size == 0 && !FLAGS_SET(flags, ACQUIRE_NO_DEV_NULL))
/* As a special case, return /dev/null if we have been called for an empty data block */
return RET_NERRNO(open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY));
if (!FLAGS_SET(flags, ACQUIRE_NO_MEMFD)) {
fd = memfd_new_and_seal("data-fd", data, size);
if (fd < 0 && !ERRNO_IS_NOT_SUPPORTED(fd))
return fd;
if (fd >= 0)
return TAKE_FD(fd);
}
if (!FLAGS_SET(flags, ACQUIRE_NO_PIPE)) {
_cleanup_close_pair_ int pipefds[2] = EBADF_PAIR;
int isz;
if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
return -errno;
isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
if (isz < 0)
return -errno;
if ((size_t) isz < size) {
isz = (int) size;
if (isz < 0 || (size_t) isz != size)
return -E2BIG;
/* Try to bump the pipe size */
(void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
/* See if that worked */
isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
if (isz < 0)
return -errno;
if ((size_t) isz < size)
goto try_dev_shm;
}
n = write(pipefds[1], data, size);
if (n < 0)
return -errno;
if ((size_t) n != size)
return -EIO;
(void) fd_nonblock(pipefds[0], false);
return TAKE_FD(pipefds[0]);
}
try_dev_shm:
if (!FLAGS_SET(flags, ACQUIRE_NO_TMPFILE)) {
fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
if (fd < 0)
goto try_dev_shm_without_o_tmpfile;
n = write(fd, data, size);
if (n < 0)
return -errno;
if ((size_t) n != size)
return -EIO;
/* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
}
try_dev_shm_without_o_tmpfile:
if (!FLAGS_SET(flags, ACQUIRE_NO_REGULAR)) {
char pattern[] = "/dev/shm/data-fd-XXXXXX";
fd = mkostemp_safe(pattern);
if (fd < 0)
return fd;
n = write(fd, data, size);
if (n < 0) {
r = -errno;
goto unlink_and_return;
}
if ((size_t) n != size) {
r = -EIO;
goto unlink_and_return;
}
/* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
r = fd_reopen(fd, O_RDONLY|O_CLOEXEC);
unlink_and_return:
(void) unlink(pattern);
return r;
}
return -EOPNOTSUPP;
}
int copy_data_fd(int fd) {
_cleanup_close_ int copy_fd = -EBADF, tmp_fd = -EBADF;
_cleanup_free_ void *remains = NULL;
@ -158,11 +33,11 @@ int copy_data_fd(int fd) {
struct stat st;
int r;
/* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
* independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
* somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
* uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
* /var/tmp. */
/* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only
* fashion, but independent of it (i.e. the source fd can be closed and unmounted after this call
* succeeded). Tries to be somewhat smart about where to place the data. In the best case uses a
* memfd(). For larger data will use an unlinked file in /tmp/, and for even larger data one in
* /var/tmp/. */
if (fstat(fd, &st) < 0)
return -errno;
@ -175,96 +50,49 @@ int copy_data_fd(int fd) {
if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
return -EBADFD;
/* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
* that we use the reported regular file size only as a hint, given that there are plenty special files in
* /proc and /sys which report a zero file size but can be read from. */
/* If we have reason to believe the data is bounded in size, then let's use memfds as backing
* fd. Note that we use the reported regular file size only as a hint, given that there are plenty
* special files in /proc/ and /sys/ which report a zero file size but can be read from. */
if (!S_ISREG(st.st_mode) || (uint64_t) st.st_size < DATA_FD_MEMORY_LIMIT) {
/* Try a memfd first */
copy_fd = memfd_new("data-fd");
if (copy_fd >= 0) {
off_t f;
copy_fd = memfd_new_full("data-fd", MFD_ALLOW_SEALING);
if (copy_fd < 0)
return copy_fd;
r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, COPY_REFLINK);
if (r < 0)
return r;
off_t f = lseek(copy_fd, 0, SEEK_SET);
if (f < 0)
return -errno;
if (f != 0)
return -EIO;
if (r == 0) {
/* Did it fit into the limit? If so, we are done. */
r = memfd_set_sealed(copy_fd);
if (r < 0)
return r;
f = lseek(copy_fd, 0, SEEK_SET);
if (f != 0)
return -errno;
if (r == 0) {
/* Did it fit into the limit? If so, we are done. */
r = memfd_set_sealed(copy_fd);
if (r < 0)
return r;
return TAKE_FD(copy_fd);
}
/* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
} else {
_cleanup_close_pair_ int pipefds[2] = EBADF_PAIR;
int isz;
/* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
* then block indefinitely when we hit the pipe size limit */
if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
return -errno;
isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
if (isz < 0)
return -errno;
/* Try to enlarge the pipe size if necessary */
if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
(void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
if (isz < 0)
return -errno;
}
if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
if (r < 0 && r != -EAGAIN)
return r; /* If we get EAGAIN it could be because of the source or because of
* the destination fd, we can't know, as sendfile() and friends won't
* tell us. Hence, treat this as reason to fall back, just to be
* sure. */
if (r == 0) {
/* Everything fit in, yay! */
(void) fd_nonblock(pipefds[0], false);
return TAKE_FD(pipefds[0]);
}
/* Things didn't fit in. But we read data into the pipe, let's remember that, so that
* when writing the new file we incorporate this first. */
copy_fd = TAKE_FD(pipefds[0]);
}
return TAKE_FD(copy_fd);
}
}
/* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
if ((!S_ISREG(st.st_mode) || (uint64_t) st.st_size < DATA_FD_TMP_LIMIT) &&
(DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
off_t f;
tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
if (tmp_fd < 0)
return tmp_fd;
if (copy_fd >= 0) {
/* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
/* If we tried a memfd first and it ended up being too large, then copy this into the
* temporary file first. */
r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
if (r < 0)
return r;
@ -287,9 +115,11 @@ int copy_data_fd(int fd) {
goto finish; /* Yay, it fit in */
/* It didn't fit in. Let's not forget to use what we already used */
f = lseek(tmp_fd, 0, SEEK_SET);
if (f != 0)
off_t f = lseek(tmp_fd, 0, SEEK_SET);
if (f < 0)
return -errno;
if (f != 0)
return -EIO;
close_and_replace(copy_fd, tmp_fd);
@ -297,7 +127,7 @@ int copy_data_fd(int fd) {
remains_size = 0;
}
/* As last fallback use /var/tmp */
/* As last fallback use /var/tmp/ */
r = var_tmp_dir(&td);
if (r < 0)
return r;
@ -307,7 +137,7 @@ int copy_data_fd(int fd) {
return tmp_fd;
if (copy_fd >= 0) {
/* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
/* If we tried a memfd first, or a file in /tmp/, and it ended up being too large, than copy this
* into the temporary file first. */
r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
if (r < 0)
@ -370,17 +200,11 @@ int memfd_clone_fd(int fd, const char *name, int mode) {
return r;
if (ro) {
_cleanup_close_ int rfd = -EBADF;
r = memfd_set_sealed(mfd);
if (r < 0)
return r;
rfd = fd_reopen(mfd, mode);
if (rfd < 0)
return rfd;
return TAKE_FD(rfd);
return fd_reopen(mfd, mode);
}
off_t f = lseek(mfd, 0, SEEK_SET);

View File

@ -4,18 +4,5 @@
#include <stddef.h>
#include <stdint.h>
typedef enum DataFDFlags {
ACQUIRE_NO_DEV_NULL = 1 << 0,
ACQUIRE_NO_MEMFD = 1 << 1,
ACQUIRE_NO_PIPE = 1 << 2,
ACQUIRE_NO_TMPFILE = 1 << 3,
ACQUIRE_NO_REGULAR = 1 << 4,
} DataFDFlags;
int acquire_data_fd_full(const void *data, size_t size, DataFDFlags flags);
static inline int acquire_data_fd(const void *data) {
return acquire_data_fd_full(data, SIZE_MAX, 0);
}
int copy_data_fd(int fd);
int memfd_clone_fd(int fd, const char *name, int mode);

View File

@ -199,8 +199,9 @@ static int do_execute(
}
if (callbacks) {
if (lseek(fd, 0, SEEK_SET) < 0)
return log_error_errno(errno, "Failed to seek on serialization fd: %m");
r = finish_serialization_fd(fd);
if (r < 0)
return log_error_errno(r, "Failed to finish serialization fd: %m");
r = callbacks[STDOUT_GENERATE](TAKE_FD(fd), callback_args[STDOUT_GENERATE]);
if (r < 0)
@ -290,12 +291,14 @@ int execute_strv(
if (!callbacks)
return 0;
if (lseek(fd, 0, SEEK_SET) < 0)
return log_error_errno(errno, "Failed to rewind serialization fd: %m");
r = finish_serialization_fd(fd);
if (r < 0)
return log_error_errno(r, "Failed to finish serialization fd: %m");
r = callbacks[STDOUT_CONSUME](TAKE_FD(fd), callback_args[STDOUT_CONSUME]);
if (r < 0)
return log_error_errno(r, "Failed to parse returned data: %m");
return 0;
}

View File

@ -547,21 +547,13 @@ void deserialize_ratelimit(RateLimit *rl, const char *name, const char *value) {
}
int open_serialization_fd(const char *ident) {
int fd;
assert(ident);
fd = memfd_create_wrapper(ident, MFD_CLOEXEC | MFD_NOEXEC_SEAL);
if (fd < 0) {
const char *path;
path = getpid_cached() == 1 ? "/run/systemd" : "/tmp";
fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC);
if (fd < 0)
return fd;
log_debug("Serializing %s to %s.", ident, path);
} else
log_debug("Serializing %s to memfd.", ident);
int fd = memfd_new_full(ident, MFD_ALLOW_SEALING);
if (fd < 0)
return fd;
log_debug("Serializing %s to memfd.", ident);
return fd;
}
@ -580,6 +572,33 @@ int open_serialization_file(const char *ident, FILE **ret) {
return -errno;
*ret = TAKE_PTR(f);
return 0;
}
int finish_serialization_fd(int fd) {
assert(fd >= 0);
if (lseek(fd, 0, SEEK_SET) < 0)
return -errno;
return memfd_set_sealed(fd);
}
int finish_serialization_file(FILE *f) {
int r;
assert(f);
r = fflush_and_check(f);
if (r < 0)
return r;
if (fseeko(f, 0, SEEK_SET) < 0)
return -errno;
int fd = fileno(f);
if (fd < 0)
return -EBADF;
return memfd_set_sealed(fd);
}

View File

@ -51,3 +51,6 @@ void deserialize_ratelimit(RateLimit *rl, const char *name, const char *value);
int open_serialization_fd(const char *ident);
int open_serialization_file(const char *ident, FILE **ret);
int finish_serialization_fd(int fd);
int finish_serialization_file(FILE *f);

View File

@ -7,57 +7,11 @@
#include "data-fd-util.h"
#include "fd-util.h"
#include "memfd-util.h"
#include "memory-util.h"
#include "process-util.h"
#include "tests.h"
#include "random-util.h"
static void test_acquire_data_fd_one(unsigned flags) {
char wbuffer[196*1024 - 7];
char rbuffer[sizeof(wbuffer)];
int fd;
fd = acquire_data_fd_full("foo", 3, flags);
assert_se(fd >= 0);
zero(rbuffer);
assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 3);
ASSERT_STREQ(rbuffer, "foo");
fd = safe_close(fd);
fd = acquire_data_fd_full("", SIZE_MAX, flags);
assert_se(fd >= 0);
zero(rbuffer);
assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 0);
ASSERT_STREQ(rbuffer, "");
fd = safe_close(fd);
random_bytes(wbuffer, sizeof(wbuffer));
fd = acquire_data_fd_full(wbuffer, sizeof(wbuffer), flags);
assert_se(fd >= 0);
zero(rbuffer);
assert_se(read(fd, rbuffer, sizeof(rbuffer)) == sizeof(rbuffer));
assert_se(memcmp(rbuffer, wbuffer, sizeof(rbuffer)) == 0);
fd = safe_close(fd);
}
TEST(acquire_data_fd) {
test_acquire_data_fd_one(0);
test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL);
test_acquire_data_fd_one(ACQUIRE_NO_MEMFD);
test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD);
test_acquire_data_fd_one(ACQUIRE_NO_PIPE);
test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_PIPE);
test_acquire_data_fd_one(ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE|ACQUIRE_NO_TMPFILE);
}
#include "tests.h"
static void assert_equal_fd(int fd1, int fd2) {
for (;;) {
@ -98,14 +52,14 @@ TEST(copy_data_fd) {
fd1 = safe_close(fd1);
fd2 = safe_close(fd2);
fd1 = acquire_data_fd("hallo");
fd1 = memfd_new_and_seal_string("data", "hallo");
assert_se(fd1 >= 0);
fd2 = copy_data_fd(fd1);
assert_se(fd2 >= 0);
safe_close(fd1);
fd1 = acquire_data_fd("hallo");
fd1 = memfd_new_and_seal_string("data", "hallo");
assert_se(fd1 >= 0);
assert_equal_fd(fd1, fd2);

View File

@ -6,11 +6,11 @@
#include <unistd.h>
#include "alloc-util.h"
#include "data-fd-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "macro.h"
#include "memfd-util.h"
#include "memory-util.h"
#include "missing_syscall.h"
#include "mkdir.h"
@ -127,6 +127,8 @@ TEST(open_serialization_fd) {
assert_se(fd >= 0);
assert_se(write(fd, "test\n", 5) == 5);
assert_se(finish_serialization_fd(fd) >= 0);
}
TEST(open_serialization_file) {
@ -138,6 +140,8 @@ TEST(open_serialization_file) {
assert_se(f);
assert_se(fwrite("test\n", 1, 5, f) == 5);
assert_se(finish_serialization_file(f) >= 0);
}
TEST(fd_move_above_stdio) {
@ -203,7 +207,7 @@ TEST(rearrange_stdio) {
assert_se(pipe_read_fd >= 3);
assert_se(open("/dev/full", O_WRONLY|O_CLOEXEC) == 0);
assert_se(acquire_data_fd("foobar") == 2);
assert_se(memfd_new_and_seal_string("data", "foobar") == 2);
assert_se(rearrange_stdio(2, 0, 1) >= 0);

View File

@ -5,6 +5,7 @@
#include "errno-util.h"
#include "fd-util.h"
#include "memfd-util.h"
#include "missing_mman.h"
#include "string-util.h"
#include "tests.h"
@ -12,7 +13,7 @@ TEST(memfd_get_sealed) {
#define TEST_TEXT "this is some random test text we are going to write to a memfd"
_cleanup_close_ int fd = -EBADF;
fd = memfd_new("test-memfd-get-sealed");
fd = memfd_new_full("test-memfd-get-sealed", MFD_ALLOW_SEALING);
if (fd < 0) {
assert_se(ERRNO_IS_NOT_SUPPORTED(fd));
return;

View File

@ -8,9 +8,9 @@
#include "sd-json.h"
#include "sd-varlink.h"
#include "data-fd-util.h"
#include "fd-util.h"
#include "json-util.h"
#include "memfd-util.h"
#include "rm-rf.h"
#include "strv.h"
#include "tests.h"
@ -134,8 +134,8 @@ static int method_passfd(sd_varlink *link, sd_json_variant *parameters, sd_varli
test_fd(yy, "bar", 3);
test_fd(zz, "quux", 4);
_cleanup_close_ int vv = acquire_data_fd("miau");
_cleanup_close_ int ww = acquire_data_fd("wuff");
_cleanup_close_ int vv = memfd_new_and_seal_string("data", "miau");
_cleanup_close_ int ww = memfd_new_and_seal_string("data", "wuff");
assert_se(vv >= 0);
assert_se(ww >= 0);
@ -284,9 +284,9 @@ static void *thread(void *arg) {
assert_se(sd_json_variant_integer(sd_json_variant_by_key(o, "sum")) == 88 + 99);
assert_se(!e);
int fd1 = acquire_data_fd("foo");
int fd2 = acquire_data_fd("bar");
int fd3 = acquire_data_fd("quux");
int fd1 = memfd_new_and_seal_string("data", "foo");
int fd2 = memfd_new_and_seal_string("data", "bar");
int fd3 = memfd_new_and_seal_string("data", "quux");
assert_se(fd1 >= 0);
assert_se(fd2 >= 0);