mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-02-27 13:57:26 +03:00
service: add watchdog timestamp
This patch adds WatchdogTimestamp[Monotonic] to the systemd service D-Bus API. The timestamp is updated to the current time when the service calls 'sd_nofity("WATCHDOG=1\n")'. Using a timestamp instead of an 'alive' flag has two advantages: 1. No timeout is needed to define when a service is no longer alive. This simplifies both configuration (no timeout value) and implementation (no timeout event). 2. It is more robust. A 'dead' service might not be detected should systemd 'forget' to reset an 'alive' flag. It is much less likely to get a valid new timestamp if a service died.
This commit is contained in:
parent
c4aa65e714
commit
a6927d7ffc
@ -151,6 +151,18 @@
|
||||
itself. Example:
|
||||
"MAINPID=4711"</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>WATCHDOG=1</term>
|
||||
|
||||
<listitem><para>Tells systemd to
|
||||
update the watchdog timestamp.
|
||||
Services using this feature should do
|
||||
this in regular intervals. A watchdog
|
||||
framework can use the timestamps to
|
||||
detect failed
|
||||
services.</para></listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
<para>It is recommended to prefix variable names that
|
||||
|
@ -43,6 +43,8 @@
|
||||
" <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
|
||||
" <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
|
||||
" <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
|
||||
BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \
|
||||
BUS_EXEC_COMMAND_INTERFACE("ExecStart") \
|
||||
BUS_EXEC_COMMAND_INTERFACE("ExecStartPost") \
|
||||
@ -86,6 +88,8 @@ const char bus_service_invalidating_properties[] =
|
||||
"ExecStop\0"
|
||||
"ExecStopPost\0"
|
||||
"ExecMain\0"
|
||||
"WatchdogTimestamp\0"
|
||||
"WatchdogTimestampMonotonic\0"
|
||||
"MainPID\0"
|
||||
"ControlPID\0"
|
||||
"StatusText\0";
|
||||
@ -106,32 +110,34 @@ static const BusProperty bus_exec_main_status_properties[] = {
|
||||
};
|
||||
|
||||
static const BusProperty bus_service_properties[] = {
|
||||
{ "Type", bus_service_append_type, "s", offsetof(Service, type) },
|
||||
{ "Restart", bus_service_append_restart, "s", offsetof(Service, restart) },
|
||||
{ "PIDFile", bus_property_append_string, "s", offsetof(Service, pid_file), true },
|
||||
{ "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) },
|
||||
{ "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) },
|
||||
{ "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) },
|
||||
{ "Type", bus_service_append_type, "s", offsetof(Service, type) },
|
||||
{ "Restart", bus_service_append_restart, "s", offsetof(Service, restart) },
|
||||
{ "PIDFile", bus_property_append_string, "s", offsetof(Service, pid_file), true },
|
||||
{ "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) },
|
||||
{ "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) },
|
||||
{ "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) },
|
||||
{ "WatchdogTimestamp", bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.realtime) },
|
||||
{ "WatchdogTimestampMonotonic",bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.monotonic) },
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), true ),
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), true ),
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecStartPost", offsetof(Service, exec_command[SERVICE_EXEC_START_POST]), true ),
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), true ),
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), true ),
|
||||
BUS_EXEC_COMMAND_PROPERTY("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), true ),
|
||||
{ "PermissionsStartOnly", bus_property_append_bool, "b", offsetof(Service, permissions_start_only) },
|
||||
{ "RootDirectoryStartOnly", bus_property_append_bool, "b", offsetof(Service, root_directory_start_only) },
|
||||
{ "RemainAfterExit", bus_property_append_bool, "b", offsetof(Service, remain_after_exit) },
|
||||
{ "GuessMainPID", bus_property_append_bool, "b", offsetof(Service, guess_main_pid) },
|
||||
{ "MainPID", bus_property_append_pid, "u", offsetof(Service, main_pid) },
|
||||
{ "ControlPID", bus_property_append_pid, "u", offsetof(Service, control_pid) },
|
||||
{ "BusName", bus_property_append_string, "s", offsetof(Service, bus_name), true },
|
||||
{ "StatusText", bus_property_append_string, "s", offsetof(Service, status_text), true },
|
||||
{ "PermissionsStartOnly", bus_property_append_bool, "b", offsetof(Service, permissions_start_only) },
|
||||
{ "RootDirectoryStartOnly", bus_property_append_bool, "b", offsetof(Service, root_directory_start_only) },
|
||||
{ "RemainAfterExit", bus_property_append_bool, "b", offsetof(Service, remain_after_exit) },
|
||||
{ "GuessMainPID", bus_property_append_bool, "b", offsetof(Service, guess_main_pid) },
|
||||
{ "MainPID", bus_property_append_pid, "u", offsetof(Service, main_pid) },
|
||||
{ "ControlPID", bus_property_append_pid, "u", offsetof(Service, control_pid) },
|
||||
{ "BusName", bus_property_append_string, "s", offsetof(Service, bus_name), true },
|
||||
{ "StatusText", bus_property_append_string, "s", offsetof(Service, status_text), true },
|
||||
#ifdef HAVE_SYSV_COMPAT
|
||||
{ "SysVRunLevels", bus_property_append_string, "s", offsetof(Service, sysv_runlevels), true },
|
||||
{ "SysVStartPriority", bus_property_append_int, "i", offsetof(Service, sysv_start_priority) },
|
||||
{ "SysVPath", bus_property_append_string, "s", offsetof(Service, sysv_path), true },
|
||||
{ "SysVRunLevels", bus_property_append_string, "s", offsetof(Service, sysv_runlevels), true },
|
||||
{ "SysVStartPriority", bus_property_append_int, "i", offsetof(Service, sysv_start_priority) },
|
||||
{ "SysVPath", bus_property_append_string, "s", offsetof(Service, sysv_path), true },
|
||||
#endif
|
||||
{ "FsckPassNo", bus_property_append_int, "i", offsetof(Service, fsck_passno) },
|
||||
{ "FsckPassNo", bus_property_append_int, "i", offsetof(Service, fsck_passno) },
|
||||
{ NULL, }
|
||||
};
|
||||
|
||||
|
@ -205,6 +205,19 @@ static void service_connection_unref(Service *s) {
|
||||
unit_ref_unset(&s->accept_socket);
|
||||
}
|
||||
|
||||
static void service_stop_watchdog(Service *s) {
|
||||
assert(s);
|
||||
|
||||
s->watchdog_timestamp.realtime = 0;
|
||||
s->watchdog_timestamp.monotonic = 0;
|
||||
}
|
||||
|
||||
static void service_reset_watchdog(Service *s) {
|
||||
assert(s);
|
||||
|
||||
dual_timestamp_get(&s->watchdog_timestamp);
|
||||
}
|
||||
|
||||
static void service_done(Unit *u) {
|
||||
Service *s = SERVICE(u);
|
||||
|
||||
@ -1476,6 +1489,9 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
service_connection_unref(s);
|
||||
}
|
||||
|
||||
if (state == SERVICE_STOP)
|
||||
service_stop_watchdog(s);
|
||||
|
||||
/* For the inactive states unit_notify() will trim the cgroup,
|
||||
* but for exit we have to do that ourselves... */
|
||||
if (state == SERVICE_EXITED && UNIT(s)->manager->n_reloading <= 0)
|
||||
@ -2411,6 +2427,8 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
|
||||
unit_serialize_item_format(u, f, "main-exec-status-status", "%i", s->main_exec_status.status);
|
||||
}
|
||||
}
|
||||
if (dual_timestamp_is_set(&s->watchdog_timestamp))
|
||||
dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2511,6 +2529,8 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
|
||||
dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp);
|
||||
else if (streq(key, "main-exec-status-exit"))
|
||||
dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp);
|
||||
else if (streq(key, "watchdog-timestamp"))
|
||||
dual_timestamp_deserialize(value, &s->watchdog_timestamp);
|
||||
else
|
||||
log_debug("Unknown serialization key '%s'", key);
|
||||
|
||||
@ -3069,6 +3089,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
|
||||
}
|
||||
|
||||
}
|
||||
if (strv_find(tags, "WATCHDOG=1")) {
|
||||
log_debug("%s: got WATCHDOG=1", u->id);
|
||||
service_reset_watchdog(s);
|
||||
}
|
||||
|
||||
/* Notify clients about changed status or main pid */
|
||||
unit_add_to_dbus_queue(u);
|
||||
|
@ -100,6 +100,8 @@ struct Service {
|
||||
usec_t restart_usec;
|
||||
usec_t timeout_usec;
|
||||
|
||||
dual_timestamp watchdog_timestamp;
|
||||
|
||||
ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
|
||||
ExecContext exec_context;
|
||||
|
||||
|
@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path);
|
||||
MAINPID=... The main pid of a daemon, in case systemd did not
|
||||
fork off the process itself. Example: "MAINPID=4711"
|
||||
|
||||
WATCHDOG=1 Tells systemd to update the watchdog timestamp.
|
||||
Services using this feature should do this in
|
||||
regular intervals. A watchdog framework can use the
|
||||
timestamps to detect failed services.
|
||||
|
||||
Daemons can choose to send additional variables. However, it is
|
||||
recommended to prefix variable names not listed above with X_.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user