1
1
mirror of https://github.com/systemd/systemd-stable.git synced 2025-01-07 17:17:44 +03:00

Merge pull request #18804 from poettering/epoll-wait2

sd-event: make use of kernel 5.11 epoll_pwait2() if available
This commit is contained in:
Lennart Poettering 2021-02-26 14:27:27 +01:00 committed by GitHub
commit 16e6a7010f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 161 additions and 18 deletions

3
TODO
View File

@ -39,9 +39,6 @@ Features:
time-based policy, so that the verification key can remain on host and ve
validated via TPM.
* sd-event: port to new kernel API epoll_wait2() (new in 5.11), to get more
accurate wait timeouts
* sd-boot: define a drop-in dir in the ESP that may contain X.509
certificates. If the firmware is detected to be in setup mode, automatically
enroll them as PK/KEK/db, turn off setup mode and proceed. Optionally,

View File

@ -549,6 +549,7 @@ foreach ident : [
['mallinfo', '''#include <malloc.h>'''],
['execveat', '''#include <unistd.h>'''],
['close_range', '''#include <unistd.h>'''],
['epoll_pwait2', '''#include <sys/epoll.h>'''],
]
have = cc.has_function(ident[0], prefix : ident[1], args : '-D_GNU_SOURCE')
@ -672,6 +673,7 @@ foreach header : ['crypt.h',
'sys/auxv.h',
'valgrind/memcheck.h',
'valgrind/valgrind.h',
'linux/time_types.h',
]
conf.set10('HAVE_' + header.underscorify().to_upper(),

View File

@ -5,6 +5,11 @@
#include <errno.h>
#include <fcntl.h>
#if HAVE_LINUX_TIME_TYPES_H
/* This header defines __kernel_timespec for us, but is only available since Linux 5.1, hence conditionally
* include this. */
#include <linux/time_types.h>
#endif
#include <signal.h>
#include <sys/syscall.h>
#include <sys/types.h>
@ -382,3 +387,41 @@ static inline int missing_close_range(int first_fd, int end_fd, unsigned flags)
# define close_range missing_close_range
#endif
/* ======================================================================= */
#if !HAVE_EPOLL_PWAIT2
/* Defined to be equivalent to the kernel's _NSIG_WORDS, i.e. the size of the array of longs that is
* encapsulated by sigset_t. */
#define KERNEL_NSIG_WORDS (64 / (sizeof(long) * 8))
#define KERNEL_NSIG_BYTES (KERNEL_NSIG_WORDS * sizeof(long))
struct epoll_event;
static inline int missing_epoll_pwait2(
int fd,
struct epoll_event *events,
int maxevents,
const struct timespec *timeout,
const sigset_t *sigset) {
# if defined(__NR_epoll_pwait2) && HAVE_LINUX_TIME_TYPES_H
if (timeout) {
/* Convert from userspace timespec to kernel timespec */
struct __kernel_timespec ts = {
.tv_sec = timeout->tv_sec,
.tv_nsec = timeout->tv_nsec,
};
return syscall(__NR_epoll_pwait2, fd, events, maxevents, &ts, sigset, sigset ? KERNEL_NSIG_BYTES : 0);
} else
return syscall(__NR_epoll_pwait2, fd, events, maxevents, NULL, sigset, sigset ? KERNEL_NSIG_BYTES : 0);
# else
errno = ENOSYS;
return -1;
# endif
}
# define epoll_pwait2 missing_epoll_pwait2
#endif

View File

@ -673,3 +673,58 @@ assert_cc(__NR_statx == systemd_NR_statx);
# endif
#endif
#ifndef __IGNORE_epoll_pwait2
# if defined(__aarch64__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__alpha__)
# define systemd_NR_epoll_pwait2 551
# elif defined(__arc__) || defined(__tilegx__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__arm__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__i386__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__ia64__)
# define systemd_NR_epoll_pwait2 1465
# elif defined(__m68k__)
# define systemd_NR_epoll_pwait2 441
# elif defined(_MIPS_SIM)
# if _MIPS_SIM == _MIPS_SIM_ABI32
# define systemd_NR_epoll_pwait2 4441
# elif _MIPS_SIM == _MIPS_SIM_NABI32
# define systemd_NR_epoll_pwait2 6441
# elif _MIPS_SIM == _MIPS_SIM_ABI64
# define systemd_NR_epoll_pwait2 5441
# else
# error "Unknown MIPS ABI"
# endif
# elif defined(__powerpc__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__s390__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__sparc__)
# define systemd_NR_epoll_pwait2 441
# elif defined(__x86_64__)
# if defined(__ILP32__)
# define systemd_NR_epoll_pwait2 (441 | /* __X32_SYSCALL_BIT */ 0x40000000)
# else
# define systemd_NR_epoll_pwait2 441
# endif
# else
# warning "epoll_pwait2() syscall number is unknown for your architecture"
# endif
/* may be an (invalid) negative number due to libseccomp, see PR 13319 */
# if defined __NR_epoll_pwait2 && __NR_epoll_pwait2 >= 0
# if defined systemd_NR_epoll_pwait2
assert_cc(__NR_epoll_pwait2 == systemd_NR_epoll_pwait2);
# endif
# else
# if defined __NR_epoll_pwait2
# undef __NR_epoll_pwait2
# endif
# if defined systemd_NR_epoll_pwait2 && systemd_NR_epoll_pwait2 >= 0
# define __NR_epoll_pwait2 systemd_NR_epoll_pwait2
# endif
# endif
#endif

View File

@ -17,7 +17,8 @@ SYSCALLS = [
'pkey_mprotect',
'renameat2',
'setns',
'statx']
'statx',
'epoll_pwait2']
def dictify(f):
def wrap(*args, **kwargs):

View File

@ -3780,9 +3780,59 @@ pending:
return r;
}
static int epoll_wait_usec(
int fd,
struct epoll_event *events,
int maxevents,
usec_t timeout) {
static bool epoll_pwait2_absent = false;
int r, msec;
/* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not */
if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
struct timespec ts;
r = epoll_pwait2(fd,
events,
maxevents,
timespec_store(&ts, timeout),
NULL);
if (r >= 0)
return r;
if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
* supported. */
epoll_pwait2_absent = true;
}
if (timeout == USEC_INFINITY)
msec = -1;
else {
usec_t k;
k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
if (k >= INT_MAX)
msec = INT_MAX; /* Saturate */
else
msec = (int) k;
}
r = epoll_wait(fd,
events,
maxevents,
msec);
if (r < 0)
return -errno;
return r;
}
_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
size_t n_event_queue, m;
int r, msec;
int r;
assert_return(e, -EINVAL);
assert_return(e = event_resolve(e), -ENOPKG);
@ -3801,21 +3851,16 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
/* If we still have inotify data buffered, then query the other fds, but don't wait on it */
if (e->inotify_data_buffered)
msec = 0;
else
msec = timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC);
timeout = 0;
for (;;) {
r = epoll_wait(e->epoll_fd, e->event_queue, e->event_queue_allocated, msec);
if (r < 0) {
if (errno == EINTR) {
e->state = SD_EVENT_PENDING;
return 1;
}
r = -errno;
goto finish;
r = epoll_wait_usec(e->epoll_fd, e->event_queue, e->event_queue_allocated, timeout);
if (r == -EINTR) {
e->state = SD_EVENT_PENDING;
return 1;
}
if (r < 0)
goto finish;
m = (size_t) r;
@ -3828,7 +3873,7 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, e->event_queue_allocated + n_event_queue))
return -ENOMEM;
msec = 0;
timeout = 0;
}
triple_timestamp_get(&e->timestamp);