mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-10-27 18:55:09 +03:00
service: introduce WatchdogSec and hook up the watchdog with the existing failure logic
This commit is contained in:
parent
aa704ba8c2
commit
bb242b7b52
@ -459,6 +459,24 @@
|
||||
90s.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>WatchdogSec=</varname></term>
|
||||
<listitem><para>Configures the watchdog
|
||||
timeout for a service. This is activated
|
||||
when the start-up is completed. The service
|
||||
must call
|
||||
<citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
|
||||
regularly with "WATCHDOG=1". If the time
|
||||
between two such calls is larger than
|
||||
the configured time then the service
|
||||
enters a failure state. By setting
|
||||
<term><varname>Restart=</varname></term>
|
||||
to <option>on-failure</option> or
|
||||
<option>always</option> the service can
|
||||
be restarted. Defaults to 0s, which
|
||||
disables this feature.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>Restart=</varname></term>
|
||||
<listitem><para>Configures whether the
|
||||
|
@ -43,6 +43,7 @@
|
||||
" <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
|
||||
" <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"WatchdogUSec\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
|
||||
BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \
|
||||
@ -119,6 +120,7 @@ static const BusProperty bus_service_properties[] = {
|
||||
{ "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) },
|
||||
{ "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) },
|
||||
{ "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) },
|
||||
{ "WatchdogUSec", bus_property_append_usec, "t", offsetof(Service, watchdog_usec) },
|
||||
{ "WatchdogTimestamp", bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.realtime) },
|
||||
{ "WatchdogTimestampMonotonic",bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.monotonic) },
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), true ),
|
||||
|
@ -134,6 +134,7 @@ Service.ExecStop, config_parse_exec, SERVICE_EXE
|
||||
Service.ExecStopPost, config_parse_exec, SERVICE_EXEC_STOP_POST, offsetof(Service, exec_command)
|
||||
Service.RestartSec, config_parse_usec, 0, offsetof(Service, restart_usec)
|
||||
Service.TimeoutSec, config_parse_usec, 0, offsetof(Service, timeout_usec)
|
||||
Service.WatchdogSec, config_parse_usec, 0, offsetof(Service, watchdog_usec)
|
||||
Service.Type, config_parse_service_type, 0, offsetof(Service, type)
|
||||
Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart)
|
||||
Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only)
|
||||
|
@ -112,6 +112,9 @@ static void service_init(Unit *u) {
|
||||
|
||||
s->timeout_usec = DEFAULT_TIMEOUT_USEC;
|
||||
s->restart_usec = DEFAULT_RESTART_USEC;
|
||||
|
||||
s->watchdog_watch.type = WATCH_INVALID;
|
||||
|
||||
s->timer_watch.type = WATCH_INVALID;
|
||||
#ifdef HAVE_SYSV_COMPAT
|
||||
s->sysv_start_priority = -1;
|
||||
@ -208,14 +211,39 @@ static void service_connection_unref(Service *s) {
|
||||
static void service_stop_watchdog(Service *s) {
|
||||
assert(s);
|
||||
|
||||
unit_unwatch_timer(UNIT(s), &s->watchdog_watch);
|
||||
s->watchdog_timestamp.realtime = 0;
|
||||
s->watchdog_timestamp.monotonic = 0;
|
||||
}
|
||||
|
||||
static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart);
|
||||
|
||||
static void service_handle_watchdog(Service *s) {
|
||||
usec_t offset;
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
|
||||
if (s->watchdog_usec == 0)
|
||||
return;
|
||||
|
||||
offset = now(CLOCK_MONOTONIC) - s->watchdog_timestamp.monotonic;
|
||||
if (offset >= s->watchdog_usec) {
|
||||
log_error("%s watchdog timeout!", UNIT(s)->id);
|
||||
service_enter_dead(s, SERVICE_FAILURE_WATCHDOG, true);
|
||||
return;
|
||||
}
|
||||
|
||||
r = unit_watch_timer(UNIT(s), s->watchdog_usec - offset, &s->watchdog_watch);
|
||||
if (r < 0)
|
||||
log_warning("%s failed to install watchdog timer: %s", UNIT(s)->id, strerror(-r));
|
||||
}
|
||||
|
||||
static void service_reset_watchdog(Service *s) {
|
||||
assert(s);
|
||||
|
||||
dual_timestamp_get(&s->watchdog_timestamp);
|
||||
service_handle_watchdog(s);
|
||||
}
|
||||
|
||||
static void service_done(Unit *u) {
|
||||
@ -259,6 +287,8 @@ static void service_done(Unit *u) {
|
||||
|
||||
unit_ref_unset(&s->accept_socket);
|
||||
|
||||
service_stop_watchdog(s);
|
||||
|
||||
unit_unwatch_timer(u, &s->timer_watch);
|
||||
}
|
||||
|
||||
@ -1568,9 +1598,12 @@ static int service_coldplug(Unit *u) {
|
||||
if ((r = unit_watch_pid(UNIT(s), s->control_pid)) < 0)
|
||||
return r;
|
||||
|
||||
if (s->deserialized_state == SERVICE_START_POST ||
|
||||
s->deserialized_state == SERVICE_RUNNING)
|
||||
service_handle_watchdog(s);
|
||||
|
||||
service_set_state(s, s->deserialized_state);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2002,6 +2035,9 @@ static void service_enter_start_post(Service *s) {
|
||||
|
||||
service_unwatch_control_pid(s);
|
||||
|
||||
if (s->watchdog_usec > 0)
|
||||
service_reset_watchdog(s);
|
||||
|
||||
if ((s->control_command = s->exec_command[SERVICE_EXEC_START_POST])) {
|
||||
s->control_command_id = SERVICE_EXEC_START_POST;
|
||||
|
||||
@ -2922,6 +2958,11 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
|
||||
assert(s);
|
||||
assert(elapsed == 1);
|
||||
|
||||
if (w == &s->watchdog_watch) {
|
||||
service_handle_watchdog(s);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(w == &s->timer_watch);
|
||||
|
||||
switch (s->state) {
|
||||
@ -3611,7 +3652,8 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
|
||||
[SERVICE_FAILURE_TIMEOUT] = "timeout",
|
||||
[SERVICE_FAILURE_EXIT_CODE] = "exit-code",
|
||||
[SERVICE_FAILURE_SIGNAL] = "signal",
|
||||
[SERVICE_FAILURE_CORE_DUMP] = "core-dump"
|
||||
[SERVICE_FAILURE_CORE_DUMP] = "core-dump",
|
||||
[SERVICE_FAILURE_WATCHDOG] = "watchdog"
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult);
|
||||
|
@ -95,6 +95,7 @@ typedef enum ServiceResult {
|
||||
SERVICE_FAILURE_EXIT_CODE,
|
||||
SERVICE_FAILURE_SIGNAL,
|
||||
SERVICE_FAILURE_CORE_DUMP,
|
||||
SERVICE_FAILURE_WATCHDOG,
|
||||
_SERVICE_RESULT_MAX,
|
||||
_SERVICE_RESULT_INVALID = -1
|
||||
} ServiceResult;
|
||||
@ -112,6 +113,8 @@ struct Service {
|
||||
usec_t timeout_usec;
|
||||
|
||||
dual_timestamp watchdog_timestamp;
|
||||
usec_t watchdog_usec;
|
||||
Watch watchdog_watch;
|
||||
|
||||
ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
|
||||
ExecContext exec_context;
|
||||
|
Loading…
Reference in New Issue
Block a user