From 420297c9e1bf13757394f7f4dd0f312ba953466d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 25 Feb 2021 16:56:07 +0100 Subject: [PATCH 1/3] missing_syscall: add epoll_pwait2() wrapper --- meson.build | 2 ++ src/basic/missing_syscall.h | 43 ++++++++++++++++++++++++++ src/basic/missing_syscall_def.h | 55 +++++++++++++++++++++++++++++++++ src/basic/missing_syscalls.py | 3 +- 4 files changed, 102 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 2c5150bfc18..8fbfd5fc494 100644 --- a/meson.build +++ b/meson.build @@ -549,6 +549,7 @@ foreach ident : [ ['mallinfo', '''#include '''], ['execveat', '''#include '''], ['close_range', '''#include '''], + ['epoll_pwait2', '''#include '''], ] have = cc.has_function(ident[0], prefix : ident[1], args : '-D_GNU_SOURCE') @@ -672,6 +673,7 @@ foreach header : ['crypt.h', 'sys/auxv.h', 'valgrind/memcheck.h', 'valgrind/valgrind.h', + 'linux/time_types.h', ] conf.set10('HAVE_' + header.underscorify().to_upper(), diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h index 52e9d5dea48..13843248045 100644 --- a/src/basic/missing_syscall.h +++ b/src/basic/missing_syscall.h @@ -5,6 +5,11 @@ #include #include +#if HAVE_LINUX_TIME_TYPES_H +/* This header defines __kernel_timespec for us, but is only available since Linux 5.1, hence conditionally + * include this. */ +#include +#endif #include #include #include @@ -382,3 +387,41 @@ static inline int missing_close_range(int first_fd, int end_fd, unsigned flags) # define close_range missing_close_range #endif + +/* ======================================================================= */ + +#if !HAVE_EPOLL_PWAIT2 + +/* Defined to be equivalent to the kernel's _NSIG_WORDS, i.e. the size of the array of longs that is + * encapsulated by sigset_t. */ +#define KERNEL_NSIG_WORDS (64 / (sizeof(long) * 8)) +#define KERNEL_NSIG_BYTES (KERNEL_NSIG_WORDS * sizeof(long)) + +struct epoll_event; + +static inline int missing_epoll_pwait2( + int fd, + struct epoll_event *events, + int maxevents, + const struct timespec *timeout, + const sigset_t *sigset) { + +# if defined(__NR_epoll_pwait2) && HAVE_LINUX_TIME_TYPES_H + if (timeout) { + /* Convert from userspace timespec to kernel timespec */ + struct __kernel_timespec ts = { + .tv_sec = timeout->tv_sec, + .tv_nsec = timeout->tv_nsec, + }; + + return syscall(__NR_epoll_pwait2, fd, events, maxevents, &ts, sigset, sigset ? KERNEL_NSIG_BYTES : 0); + } else + return syscall(__NR_epoll_pwait2, fd, events, maxevents, NULL, sigset, sigset ? KERNEL_NSIG_BYTES : 0); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define epoll_pwait2 missing_epoll_pwait2 +#endif diff --git a/src/basic/missing_syscall_def.h b/src/basic/missing_syscall_def.h index 44df37b37e8..a66977cfec5 100644 --- a/src/basic/missing_syscall_def.h +++ b/src/basic/missing_syscall_def.h @@ -673,3 +673,58 @@ assert_cc(__NR_statx == systemd_NR_statx); # endif #endif +#ifndef __IGNORE_epoll_pwait2 +# if defined(__aarch64__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__alpha__) +# define systemd_NR_epoll_pwait2 551 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__arm__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__i386__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__ia64__) +# define systemd_NR_epoll_pwait2 1465 +# elif defined(__m68k__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_epoll_pwait2 4441 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_epoll_pwait2 6441 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_epoll_pwait2 5441 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__powerpc__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__s390__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__sparc__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_epoll_pwait2 (441 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_epoll_pwait2 441 +# endif +# else +# warning "epoll_pwait2() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_epoll_pwait2 && __NR_epoll_pwait2 >= 0 +# if defined systemd_NR_epoll_pwait2 +assert_cc(__NR_epoll_pwait2 == systemd_NR_epoll_pwait2); +# endif +# else +# if defined __NR_epoll_pwait2 +# undef __NR_epoll_pwait2 +# endif +# if defined systemd_NR_epoll_pwait2 && systemd_NR_epoll_pwait2 >= 0 +# define __NR_epoll_pwait2 systemd_NR_epoll_pwait2 +# endif +# endif +#endif diff --git a/src/basic/missing_syscalls.py b/src/basic/missing_syscalls.py index 746fbf4a260..650f62d1d4f 100644 --- a/src/basic/missing_syscalls.py +++ b/src/basic/missing_syscalls.py @@ -17,7 +17,8 @@ SYSCALLS = [ 'pkey_mprotect', 'renameat2', 'setns', - 'statx'] + 'statx', + 'epoll_pwait2'] def dictify(f): def wrap(*args, **kwargs): From 798445ab84cff51bde7fcf936f0fb19c37cf858c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 25 Feb 2021 16:56:32 +0100 Subject: [PATCH 2/3] sd-event: make use of epoll_pwait2() for greater time accuracy --- src/libsystemd/sd-event/sd-event.c | 73 ++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index d9991cfe19a..35c4cb67eb0 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -3780,9 +3780,59 @@ pending: return r; } +static int epoll_wait_usec( + int fd, + struct epoll_event *events, + int maxevents, + usec_t timeout) { + + static bool epoll_pwait2_absent = false; + int r, msec; + + /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not */ + + if (!epoll_pwait2_absent && timeout != USEC_INFINITY) { + struct timespec ts; + + r = epoll_pwait2(fd, + events, + maxevents, + timespec_store(&ts, timeout), + NULL); + if (r >= 0) + return r; + if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r)) + return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not + * supported. */ + + epoll_pwait2_absent = true; + } + + if (timeout == USEC_INFINITY) + msec = -1; + else { + usec_t k; + + k = DIV_ROUND_UP(timeout, USEC_PER_MSEC); + if (k >= INT_MAX) + msec = INT_MAX; /* Saturate */ + else + msec = (int) k; + } + + r = epoll_wait(fd, + events, + maxevents, + msec); + if (r < 0) + return -errno; + + return r; +} + _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { size_t n_event_queue, m; - int r, msec; + int r; assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); @@ -3801,21 +3851,16 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { /* If we still have inotify data buffered, then query the other fds, but don't wait on it */ if (e->inotify_data_buffered) - msec = 0; - else - msec = timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC); + timeout = 0; for (;;) { - r = epoll_wait(e->epoll_fd, e->event_queue, e->event_queue_allocated, msec); - if (r < 0) { - if (errno == EINTR) { - e->state = SD_EVENT_PENDING; - return 1; - } - - r = -errno; - goto finish; + r = epoll_wait_usec(e->epoll_fd, e->event_queue, e->event_queue_allocated, timeout); + if (r == -EINTR) { + e->state = SD_EVENT_PENDING; + return 1; } + if (r < 0) + goto finish; m = (size_t) r; @@ -3828,7 +3873,7 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, e->event_queue_allocated + n_event_queue)) return -ENOMEM; - msec = 0; + timeout = 0; } triple_timestamp_get(&e->timestamp); From 1d5484aa67a4a5cc2201b0fcfaef7446b25948f9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 25 Feb 2021 16:58:24 +0100 Subject: [PATCH 3/3] update TODO --- TODO | 3 --- 1 file changed, 3 deletions(-) diff --git a/TODO b/TODO index c97517e1edf..7ff15d3b8fa 100644 --- a/TODO +++ b/TODO @@ -39,9 +39,6 @@ Features: time-based policy, so that the verification key can remain on host and ve validated via TPM. -* sd-event: port to new kernel API epoll_wait2() (new in 5.11), to get more - accurate wait timeouts - * sd-boot: define a drop-in dir in the ESP that may contain X.509 certificates. If the firmware is detected to be in setup mode, automatically enroll them as PK/KEK/db, turn off setup mode and proceed. Optionally,