mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-01-10 01:17:44 +03:00
event: hook up sd-event with the service watchdog logic
Adds a new call sd_event_set_watchdog() that can be used to hook up the event loop with the watchdog supervision logic of systemd. If enabled and $WATCHDOG_USEC is set the event loop will ping the invoking systemd daemon right after coming back from epoll_wait() but not more often than $WATCHDOG_USEC/4. The epoll_wait() will sleep no longer than $WATCHDOG_USEC/4*3, to make sure the service manager is called in time. This means that setting WatchdogSec= in a .service file and calling sd_event_set_watchdog() in your daemon is enough to hook it up with the watchdog logic.
This commit is contained in:
parent
08cd155254
commit
cde93897cd
@ -652,10 +652,12 @@ test_rtnl_SOURCES = \
|
||||
test_rtnl_LDADD = \
|
||||
libsystemd-rtnl.la \
|
||||
libsystemd-bus-internal.la \
|
||||
libsystemd-daemon-internal.la \
|
||||
libsystemd-id128-internal.la \
|
||||
libsystemd-shared.la
|
||||
|
||||
tests += test-rtnl
|
||||
tests += \
|
||||
test-rtnl
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
noinst_LTLIBRARIES += \
|
||||
@ -3966,10 +3968,12 @@ test_network_LDADD = \
|
||||
libudev-internal.la \
|
||||
libsystemd-bus-internal.la \
|
||||
libsystemd-id128-internal.la \
|
||||
libsystemd-daemon-internal.la \
|
||||
libsystemd-rtnl.la \
|
||||
libsystemd-shared.la
|
||||
|
||||
tests += test-network
|
||||
tests += \
|
||||
test-network
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/network/networkd-gperf.gperf \
|
||||
|
2
TODO
2
TODO
@ -137,7 +137,6 @@ Features:
|
||||
but do not return anything up to the event loop caller. Instead
|
||||
add parameter to sd_event_request_quit() to take retval. This way
|
||||
errors rippling upwards are the option, not the default
|
||||
- native support for watchdog stuff
|
||||
|
||||
* in the final killing spree, detect processes from the root directory, and
|
||||
complain loudly if they have argv[0][0] == '@' set.
|
||||
@ -311,6 +310,7 @@ Features:
|
||||
boot, and causes the journal to be moved back to /run on shutdown,
|
||||
so that we don't keep /var busy. This needs to happen synchronously,
|
||||
hence doing this via signals is not going to work.
|
||||
- port to sd-event, enable watchdog from event loop
|
||||
|
||||
* document:
|
||||
- document that deps in [Unit] sections ignore Alias= fileds in
|
||||
|
@ -627,6 +627,8 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
sd_event_set_watchdog(event, true);
|
||||
|
||||
r = connect_bus(&context, event, &bus);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
@ -238,6 +238,7 @@ global:
|
||||
sd_event_request_quit;
|
||||
sd_event_get_now_realtime;
|
||||
sd_event_get_now_monotonic;
|
||||
sd_event_set_watchdog;
|
||||
|
||||
sd_event_source_ref;
|
||||
sd_event_source_unref;
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "sd-id128.h"
|
||||
#include "sd-daemon.h"
|
||||
#include "macro.h"
|
||||
#include "prioq.h"
|
||||
#include "hashmap.h"
|
||||
@ -43,7 +44,8 @@ typedef enum EventSourceType {
|
||||
SOURCE_SIGNAL,
|
||||
SOURCE_CHILD,
|
||||
SOURCE_DEFER,
|
||||
SOURCE_QUIT
|
||||
SOURCE_QUIT,
|
||||
SOURCE_WATCHDOG
|
||||
} EventSourceType;
|
||||
|
||||
struct sd_event_source {
|
||||
@ -105,6 +107,7 @@ struct sd_event {
|
||||
int signal_fd;
|
||||
int realtime_fd;
|
||||
int monotonic_fd;
|
||||
int watchdog_fd;
|
||||
|
||||
Prioq *pending;
|
||||
Prioq *prepare;
|
||||
@ -139,9 +142,12 @@ struct sd_event {
|
||||
|
||||
bool quit_requested:1;
|
||||
bool need_process_child:1;
|
||||
bool watchdog:1;
|
||||
|
||||
pid_t tid;
|
||||
sd_event **default_event_ptr;
|
||||
|
||||
usec_t watchdog_last, watchdog_period;
|
||||
};
|
||||
|
||||
static int pending_prioq_compare(const void *a, const void *b) {
|
||||
@ -323,6 +329,9 @@ static void event_free(sd_event *e) {
|
||||
if (e->monotonic_fd >= 0)
|
||||
close_nointr_nofail(e->monotonic_fd);
|
||||
|
||||
if (e->watchdog_fd >= 0)
|
||||
close_nointr_nofail(e->watchdog_fd);
|
||||
|
||||
prioq_free(e->pending);
|
||||
prioq_free(e->prepare);
|
||||
prioq_free(e->monotonic_earliest);
|
||||
@ -348,7 +357,7 @@ _public_ int sd_event_new(sd_event** ret) {
|
||||
return -ENOMEM;
|
||||
|
||||
e->n_ref = 1;
|
||||
e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
|
||||
e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
|
||||
e->realtime_next = e->monotonic_next = (usec_t) -1;
|
||||
e->original_pid = getpid();
|
||||
|
||||
@ -1422,8 +1431,8 @@ static int event_arm_timer(
|
||||
usec_t t;
|
||||
int r;
|
||||
|
||||
assert_se(e);
|
||||
assert_se(next);
|
||||
assert(e);
|
||||
assert(next);
|
||||
|
||||
a = prioq_peek(earliest);
|
||||
if (!a || a->enabled == SD_EVENT_OFF) {
|
||||
@ -1462,7 +1471,7 @@ static int event_arm_timer(
|
||||
|
||||
r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
return -errno;
|
||||
|
||||
*next = t;
|
||||
return 0;
|
||||
@ -1484,7 +1493,6 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
|
||||
|
||||
assert(e);
|
||||
assert(fd >= 0);
|
||||
assert(next);
|
||||
|
||||
assert_return(events == EPOLLIN, -EIO);
|
||||
|
||||
@ -1499,7 +1507,8 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
|
||||
if (ss != sizeof(x))
|
||||
return -EIO;
|
||||
|
||||
*next = (usec_t) -1;
|
||||
if (next)
|
||||
*next = (usec_t) -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1782,6 +1791,43 @@ static sd_event_source* event_next_pending(sd_event *e) {
|
||||
return p;
|
||||
}
|
||||
|
||||
static int arm_watchdog(sd_event *e) {
|
||||
struct itimerspec its = {};
|
||||
usec_t t;
|
||||
int r;
|
||||
|
||||
assert(e);
|
||||
assert(e->watchdog_fd >= 0);
|
||||
|
||||
t = sleep_between(e,
|
||||
e->watchdog_last + (e->watchdog_period / 2),
|
||||
e->watchdog_last + (e->watchdog_period * 3 / 4));
|
||||
|
||||
timespec_store(&its.it_value, t);
|
||||
|
||||
r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
|
||||
if (r < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_watchdog(sd_event *e) {
|
||||
assert(e);
|
||||
|
||||
if (!e->watchdog)
|
||||
return 0;
|
||||
|
||||
/* Don't notify watchdog too often */
|
||||
if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
|
||||
return 0;
|
||||
|
||||
sd_notify(false, "WATCHDOG=1");
|
||||
e->watchdog_last = e->timestamp.monotonic;
|
||||
|
||||
return arm_watchdog(e);
|
||||
}
|
||||
|
||||
_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
|
||||
struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
|
||||
sd_event_source *p;
|
||||
@ -1831,6 +1877,8 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
|
||||
r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
|
||||
else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
|
||||
r = process_signal(e, ev_queue[i].events);
|
||||
else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
|
||||
r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
|
||||
else
|
||||
r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
|
||||
|
||||
@ -1838,6 +1886,10 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
r = process_watchdog(e);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
@ -1970,3 +2022,63 @@ _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
_public_ int sd_event_set_watchdog(sd_event *e, int b) {
|
||||
int r;
|
||||
|
||||
assert_return(e, -EINVAL);
|
||||
|
||||
if (e->watchdog == !!b)
|
||||
return e->watchdog;
|
||||
|
||||
if (b) {
|
||||
struct epoll_event ev = {};
|
||||
const char *env;
|
||||
|
||||
env = getenv("WATCHDOG_USEC");
|
||||
if (!env)
|
||||
return false;
|
||||
|
||||
r = safe_atou64(env, &e->watchdog_period);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (e->watchdog_period <= 0)
|
||||
return -EIO;
|
||||
|
||||
/* Issue first ping immediately */
|
||||
sd_notify(false, "WATCHDOG=1");
|
||||
e->watchdog_last = now(CLOCK_MONOTONIC);
|
||||
|
||||
e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
|
||||
if (e->watchdog_fd < 0)
|
||||
return -errno;
|
||||
|
||||
r = arm_watchdog(e);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
||||
ev.events = EPOLLIN;
|
||||
ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
|
||||
|
||||
r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
|
||||
if (r < 0) {
|
||||
r = -errno;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (e->watchdog_fd >= 0) {
|
||||
epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
|
||||
close_nointr_nofail(e->watchdog_fd);
|
||||
e->watchdog_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
e->watchdog = !!b;
|
||||
return e->watchdog;
|
||||
|
||||
fail:
|
||||
close_nointr_nofail(e->watchdog_fd);
|
||||
e->watchdog_fd = -1;
|
||||
return r;
|
||||
}
|
||||
|
@ -165,6 +165,8 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
assert_se(sd_event_default(&e) >= 0);
|
||||
|
||||
assert_se(sd_event_set_watchdog(e, true) >= 0);
|
||||
|
||||
got_a = false, got_b = false, got_c = false, got_d = 0;
|
||||
|
||||
/* Add a oneshot handler, trigger it, re-enable it, and trigger
|
||||
|
@ -1137,6 +1137,8 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
sd_event_set_watchdog(event, true);
|
||||
|
||||
r = connect_bus(&context, event, &bus);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
@ -96,6 +96,8 @@ Manager *manager_new(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sd_event_set_watchdog(m->event, true);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -59,6 +59,8 @@ Manager *manager_new(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sd_event_set_watchdog(m->event, true);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,8 @@ int manager_new(Manager **ret) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
sd_event_set_watchdog(m->event, true);
|
||||
|
||||
r = sd_rtnl_open(RTMGRP_LINK | RTMGRP_IPV4_IFADDR, &m->rtnl);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -632,6 +632,8 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
sd_event_set_watchdog(event, true);
|
||||
|
||||
n = sd_listen_fds(1);
|
||||
if (n < 0) {
|
||||
log_error("Failed to receive sockets from parent.");
|
||||
|
@ -93,6 +93,7 @@ int sd_event_get_quit(sd_event *e);
|
||||
int sd_event_request_quit(sd_event *e);
|
||||
int sd_event_get_now_realtime(sd_event *e, uint64_t *usec);
|
||||
int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec);
|
||||
int sd_event_set_watchdog(sd_event *e, int b);
|
||||
|
||||
sd_event_source* sd_event_source_ref(sd_event_source *s);
|
||||
sd_event_source* sd_event_source_unref(sd_event_source *s);
|
||||
|
@ -836,6 +836,8 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
sd_event_set_watchdog(event, true);
|
||||
|
||||
r = connect_bus(&context, event, &bus);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/hostnamed
|
||||
ExecStart=@rootlibexecdir@/systemd-hostnamed
|
||||
BusName=org.freedesktop.hostname1
|
||||
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE
|
||||
WatchdogSec=1min
|
||||
|
@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/localed
|
||||
ExecStart=@rootlibexecdir@/systemd-localed
|
||||
BusName=org.freedesktop.locale1
|
||||
CapabilityBoundingSet=
|
||||
WatchdogSec=1min
|
||||
|
@ -19,6 +19,7 @@ Restart=always
|
||||
RestartSec=0
|
||||
BusName=org.freedesktop.login1
|
||||
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG
|
||||
WatchdogSec=1min
|
||||
|
||||
# Increase the default a bit in order to allow many simultaneous
|
||||
# logins since we keep one fd open per session.
|
||||
|
@ -18,3 +18,4 @@ Restart=always
|
||||
RestartSec=0
|
||||
BusName=org.freedesktop.machine1
|
||||
CapabilityBoundingSet=CAP_KILL
|
||||
WatchdogSec=1min
|
||||
|
@ -17,3 +17,4 @@ Type=notify
|
||||
Restart=always
|
||||
RestartSec=0
|
||||
ExecStart=@rootlibexecdir@/systemd-networkd
|
||||
WatchdogSec=1min
|
||||
|
@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/timedated
|
||||
ExecStart=@rootlibexecdir@/systemd-timedated
|
||||
BusName=org.freedesktop.timedate1
|
||||
CapabilityBoundingSet=CAP_SYS_TIME
|
||||
WatchdogSec=1min
|
||||
|
Loading…
Reference in New Issue
Block a user