mirror of
https://github.com/systemd/systemd.git
synced 2025-01-26 14:04:03 +03:00
Merge pull request #19970 from curtistklein/watchdog-pretimeout-merge
watchdog: Add watchdog pretimeout support
This commit is contained in:
commit
6ccc08954e
@ -428,6 +428,28 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>systemd.watchdog_pre_sec=</varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Overrides the watchdog pre-timeout settings otherwise configured with
|
||||
<varname>RuntimeWatchdogPreSec=</varname>. Takes a time value (if no unit is specified, seconds is the
|
||||
implicitly assumed time unit) or the special strings <literal>off</literal> or
|
||||
<literal>default</literal>. For details, see
|
||||
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>systemd.watchdog_pretimeout_governor=</varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Overrides the watchdog pre-timeout settings otherwise configured with
|
||||
<varname>RuntimeWatchdogPreGovernor=</varname>. Takes a string value. For details, see
|
||||
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>systemd.cpu_affinity=</varname></term>
|
||||
|
||||
|
@ -402,6 +402,12 @@ node /org/freedesktop/systemd1 {
|
||||
readwrite t RuntimeWatchdogUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
@org.freedesktop.systemd1.Privileged("true")
|
||||
readwrite t RuntimeWatchdogPreUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
@org.freedesktop.systemd1.Privileged("true")
|
||||
readwrite s RuntimeWatchdogPreGovernor = '...';
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
@org.freedesktop.systemd1.Privileged("true")
|
||||
readwrite t RebootWatchdogUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
@org.freedesktop.systemd1.Privileged("true")
|
||||
@ -650,6 +656,10 @@ node /org/freedesktop/systemd1 {
|
||||
|
||||
<!--property RuntimeWatchdogUSec is not documented!-->
|
||||
|
||||
<!--property RuntimeWatchdogPreUSec is not documented!-->
|
||||
|
||||
<!--property RuntimeWatchdogPreGovernor is not documented!-->
|
||||
|
||||
<!--property RebootWatchdogUSec is not documented!-->
|
||||
|
||||
<!--property KExecWatchdogUSec is not documented!-->
|
||||
@ -1052,6 +1062,10 @@ node /org/freedesktop/systemd1 {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogUSec"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogPreUSec"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogPreGovernor"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RebootWatchdogUSec"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="KExecWatchdogUSec"/>
|
||||
|
@ -177,6 +177,50 @@
|
||||
<para>These settings have no effect if a hardware watchdog is not available.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RuntimeWatchdogPreSec=</varname></term>
|
||||
|
||||
<listitem><para>Configure the hardware watchdog device pre-timeout value.
|
||||
Takes a timeout value in seconds (or in other time units similar to
|
||||
<varname>RuntimeWatchdogSec=</varname>). A watchdog pre-timeout is a
|
||||
notification generated by the watchdog before the watchdog reset might
|
||||
occur in the event the watchdog has not been serviced. This notification
|
||||
is handled by the kernel and can be configured to take an action (i.e.
|
||||
generate a kernel panic) using <varname>RuntimeWatchdogPreGovernor=</varname>.
|
||||
Not all watchdog hardware or drivers support generating a pre-timeout and
|
||||
depending on the state of the system, the kernel may be unable to take the
|
||||
configured action before the watchdog reboot. The watchdog will be configured
|
||||
to generate the pre-timeout event at the amount of time specified by
|
||||
<varname>RuntimeWatchdogPreSec=</varname> before the runtime watchdog timeout
|
||||
(set by <varname>RuntimeWatchdogSec=</varname>). For example, if the we have
|
||||
<varname>RuntimeWatchdogSec=30</varname> and
|
||||
<varname>RuntimeWatchdogPreSec=10</varname>, then the pre-timeout event
|
||||
will occur if the watchdog has not pinged for 20s (10s before the
|
||||
watchdog would fire). By default, <varname>RuntimeWatchdogPreSec=</varname>
|
||||
defaults to 0 (off). The value set for <varname>RuntimeWatchdogPreSec=</varname>
|
||||
must be smaller than the timeout value for <varname>RuntimeWatchdogSec=</varname>.
|
||||
This setting has no effect if a hardware watchdog is not available or the
|
||||
hardware watchdog does not support a pre-timeout and will be ignored by the
|
||||
kernel if the setting is greater than the actual watchdog timeout.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RuntimeWatchdogPreGovernor=</varname></term>
|
||||
|
||||
<listitem><para>Configure the action taken by the hardware watchdog device
|
||||
when the pre-timeout expires. The default action for the pre-timeout event
|
||||
depends on the kernel configuration, but it is usually to log a kernel
|
||||
message. For a list of valid actions available for a given watchdog device,
|
||||
check the content of the
|
||||
<filename>/sys/class/watchdog/watchdog<replaceable>X</replaceable>/pretimeout_available_governors</filename>
|
||||
file. Typically, available governor types are <varname>noop</varname> and <varname>panic</varname>.
|
||||
Availability, names and functionality might vary depending on the specific device driver
|
||||
in use. If the <filename>pretimeout_available_governors</filename> sysfs file is empty,
|
||||
the governor might be built as a kernel module and might need to be manually loaded
|
||||
(e.g. <varname>pretimeout_noop.ko</varname>), or the watchdog device might not support
|
||||
pre-timeouts.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>WatchdogDevice=</varname></term>
|
||||
|
||||
|
@ -265,6 +265,42 @@ static int property_get_runtime_watchdog(
|
||||
return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_RUNTIME));
|
||||
}
|
||||
|
||||
static int property_get_pretimeout_watchdog(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
Manager *m = userdata;
|
||||
|
||||
assert(m);
|
||||
assert(bus);
|
||||
assert(reply);
|
||||
|
||||
return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_PRETIMEOUT));
|
||||
}
|
||||
|
||||
static int property_get_pretimeout_watchdog_governor(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
Manager *m = userdata;
|
||||
|
||||
assert(m);
|
||||
assert(bus);
|
||||
assert(reply);
|
||||
|
||||
return sd_bus_message_append(reply, "s", m->watchdog_pretimeout_governor);
|
||||
}
|
||||
|
||||
static int property_get_reboot_watchdog(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
@ -330,6 +366,42 @@ static int property_set_runtime_watchdog(
|
||||
return property_set_watchdog(userdata, WATCHDOG_RUNTIME, value);
|
||||
}
|
||||
|
||||
static int property_set_pretimeout_watchdog(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *value,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
return property_set_watchdog(userdata, WATCHDOG_PRETIMEOUT, value);
|
||||
}
|
||||
|
||||
static int property_set_pretimeout_watchdog_governor(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *value,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
Manager *m = userdata;
|
||||
char *governor;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
||||
r = sd_bus_message_read(value, "s", &governor);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (!string_is_safe(governor))
|
||||
return -EINVAL;
|
||||
|
||||
return manager_override_watchdog_pretimeout_governor(m, governor);
|
||||
}
|
||||
|
||||
static int property_set_reboot_watchdog(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
@ -2696,6 +2768,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
|
||||
SD_BUS_PROPERTY("DefaultStandardOutput", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("DefaultStandardError", "s", bus_property_get_exec_output, offsetof(Manager, default_std_error), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogUSec", "t", property_get_runtime_watchdog, property_set_runtime_watchdog, 0, 0),
|
||||
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogPreUSec", "t", property_get_pretimeout_watchdog, property_set_pretimeout_watchdog, 0, 0),
|
||||
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogPreGovernor", "s", property_get_pretimeout_watchdog_governor, property_set_pretimeout_watchdog_governor, 0, 0),
|
||||
SD_BUS_WRITABLE_PROPERTY("RebootWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, 0),
|
||||
/* The following item is an obsolete alias */
|
||||
SD_BUS_WRITABLE_PROPERTY("ShutdownWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, SD_BUS_VTABLE_HIDDEN),
|
||||
|
@ -138,7 +138,9 @@ static unsigned arg_default_start_limit_burst;
|
||||
static usec_t arg_runtime_watchdog;
|
||||
static usec_t arg_reboot_watchdog;
|
||||
static usec_t arg_kexec_watchdog;
|
||||
static usec_t arg_pretimeout_watchdog;
|
||||
static char *arg_early_core_pattern;
|
||||
static char *arg_watchdog_pretimeout_governor;
|
||||
static char *arg_watchdog_device;
|
||||
static char **arg_default_environment;
|
||||
static char **arg_manager_environment;
|
||||
@ -557,6 +559,37 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
|
||||
|
||||
arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
|
||||
|
||||
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_pre_sec")) {
|
||||
|
||||
if (proc_cmdline_value_missing(key, value))
|
||||
return 0;
|
||||
|
||||
if (streq(value, "default"))
|
||||
arg_pretimeout_watchdog = USEC_INFINITY;
|
||||
else if (streq(value, "off"))
|
||||
arg_pretimeout_watchdog = 0;
|
||||
else {
|
||||
r = parse_sec(value, &arg_pretimeout_watchdog);
|
||||
if (r < 0) {
|
||||
log_warning_errno(r, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_pretimeout_governor")) {
|
||||
|
||||
if (proc_cmdline_value_missing(key, value) || isempty(value)) {
|
||||
arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!string_is_safe(value)) {
|
||||
log_warning("Watchdog pretimeout governor '%s' is not valid, ignoring.", value);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return free_and_strdup_warn(&arg_watchdog_pretimeout_governor, value);
|
||||
|
||||
} else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
|
||||
|
||||
if (proc_cmdline_value_missing(key, value))
|
||||
@ -709,10 +742,12 @@ static int parse_config_file(void) {
|
||||
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
|
||||
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
|
||||
{ "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog },
|
||||
{ "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec, 0, &arg_pretimeout_watchdog },
|
||||
{ "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog },
|
||||
{ "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
|
||||
{ "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog },
|
||||
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
|
||||
{ "Manager", "RuntimeWatchdogPreGovernor", config_parse_safe_string, 0, &arg_watchdog_pretimeout_governor },
|
||||
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
|
||||
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
|
||||
#if HAVE_SECCOMP
|
||||
@ -837,6 +872,7 @@ static void set_manager_defaults(Manager *m) {
|
||||
}
|
||||
|
||||
static void set_manager_settings(Manager *m) {
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
||||
@ -851,6 +887,10 @@ static void set_manager_settings(Manager *m) {
|
||||
manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
|
||||
manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
|
||||
manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
|
||||
manager_set_watchdog(m, WATCHDOG_PRETIMEOUT, arg_pretimeout_watchdog);
|
||||
r = manager_set_watchdog_pretimeout_governor(m, arg_watchdog_pretimeout_governor);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to set watchdog pretimeout governor to '%s', ignoring: %m", arg_watchdog_pretimeout_governor);
|
||||
|
||||
manager_set_show_status(m, arg_show_status, "commandline");
|
||||
m->status_unit_format = arg_status_unit_format;
|
||||
@ -1595,7 +1635,10 @@ static int become_shutdown(
|
||||
watchdog_timer = arg_kexec_watchdog;
|
||||
|
||||
/* If we reboot or kexec let's set the shutdown watchdog and tell the
|
||||
* shutdown binary to repeatedly ping it */
|
||||
* shutdown binary to repeatedly ping it.
|
||||
* Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
|
||||
(void) watchdog_setup_pretimeout(0);
|
||||
(void) watchdog_setup_pretimeout_governor(NULL);
|
||||
r = watchdog_setup(watchdog_timer);
|
||||
watchdog_close(r < 0);
|
||||
|
||||
@ -2448,8 +2491,10 @@ static void reset_arguments(void) {
|
||||
arg_runtime_watchdog = 0;
|
||||
arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
|
||||
arg_kexec_watchdog = 0;
|
||||
arg_pretimeout_watchdog = 0;
|
||||
arg_early_core_pattern = NULL;
|
||||
arg_watchdog_device = NULL;
|
||||
arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
|
||||
|
||||
arg_default_environment = strv_free(arg_default_environment);
|
||||
arg_manager_environment = strv_free(arg_manager_environment);
|
||||
|
@ -118,6 +118,8 @@ int manager_serialize(
|
||||
(void) serialize_usec(f, "runtime-watchdog-overridden", m->watchdog_overridden[WATCHDOG_RUNTIME]);
|
||||
(void) serialize_usec(f, "reboot-watchdog-overridden", m->watchdog_overridden[WATCHDOG_REBOOT]);
|
||||
(void) serialize_usec(f, "kexec-watchdog-overridden", m->watchdog_overridden[WATCHDOG_KEXEC]);
|
||||
(void) serialize_usec(f, "pretimeout-watchdog-overridden", m->watchdog_overridden[WATCHDOG_PRETIMEOUT]);
|
||||
(void) serialize_item(f, "pretimeout-watchdog-governor-overridden", m->watchdog_pretimeout_governor_overridden);
|
||||
|
||||
for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
|
||||
_cleanup_free_ char *joined = NULL;
|
||||
@ -455,6 +457,19 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
|
||||
else
|
||||
manager_override_watchdog(m, WATCHDOG_KEXEC, t);
|
||||
|
||||
} else if ((val = startswith(l, "pretimeout-watchdog-overridden="))) {
|
||||
usec_t t;
|
||||
|
||||
if (deserialize_usec(val, &t) < 0)
|
||||
log_notice("Failed to parse pretimeout-watchdog-overridden value '%s', ignoring.", val);
|
||||
else
|
||||
manager_override_watchdog(m, WATCHDOG_PRETIMEOUT, t);
|
||||
|
||||
} else if ((val = startswith(l, "pretimeout-watchdog-governor-overridden="))) {
|
||||
r = free_and_strdup(&m->watchdog_pretimeout_governor_overridden, val);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
} else if (startswith(l, "env=")) {
|
||||
r = deserialize_environment(l + 4, &m->client_environment);
|
||||
if (r < 0)
|
||||
|
@ -813,6 +813,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
|
||||
.watchdog_overridden[WATCHDOG_RUNTIME] = USEC_INFINITY,
|
||||
.watchdog_overridden[WATCHDOG_REBOOT] = USEC_INFINITY,
|
||||
.watchdog_overridden[WATCHDOG_KEXEC] = USEC_INFINITY,
|
||||
.watchdog_overridden[WATCHDOG_PRETIMEOUT] = USEC_INFINITY,
|
||||
|
||||
.show_status_overridden = _SHOW_STATUS_INVALID,
|
||||
|
||||
@ -1541,6 +1542,9 @@ Manager* manager_free(Manager *m) {
|
||||
m->prefix[dt] = mfree(m->prefix[dt]);
|
||||
free(m->received_credentials);
|
||||
|
||||
free(m->watchdog_pretimeout_governor);
|
||||
free(m->watchdog_pretimeout_governor_overridden);
|
||||
|
||||
#if BPF_FRAMEWORK
|
||||
lsm_bpf_destroy(m->restrict_fs);
|
||||
#endif
|
||||
@ -3232,9 +3236,12 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
|
||||
if (m->watchdog[t] == timeout)
|
||||
return;
|
||||
|
||||
if (t == WATCHDOG_RUNTIME)
|
||||
if (t == WATCHDOG_RUNTIME) {
|
||||
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
|
||||
(void) watchdog_setup(timeout);
|
||||
} else if (t == WATCHDOG_PRETIMEOUT)
|
||||
if (m->watchdog_overridden[WATCHDOG_PRETIMEOUT] == USEC_INFINITY)
|
||||
(void) watchdog_setup_pretimeout(timeout);
|
||||
|
||||
m->watchdog[t] = timeout;
|
||||
}
|
||||
@ -3253,11 +3260,58 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
|
||||
usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t];
|
||||
|
||||
(void) watchdog_setup(usec);
|
||||
}
|
||||
} else if (t == WATCHDOG_PRETIMEOUT)
|
||||
(void) watchdog_setup_pretimeout(timeout);
|
||||
|
||||
m->watchdog_overridden[t] = timeout;
|
||||
}
|
||||
|
||||
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor) {
|
||||
_cleanup_free_ char *p = NULL;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
||||
if (MANAGER_IS_USER(m))
|
||||
return 0;
|
||||
|
||||
if (streq_ptr(m->watchdog_pretimeout_governor, governor))
|
||||
return 0;
|
||||
|
||||
p = strdup(governor);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
r = watchdog_setup_pretimeout_governor(governor);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return free_and_replace(m->watchdog_pretimeout_governor, p);
|
||||
}
|
||||
|
||||
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor) {
|
||||
_cleanup_free_ char *p = NULL;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
||||
if (MANAGER_IS_USER(m))
|
||||
return 0;
|
||||
|
||||
if (streq_ptr(m->watchdog_pretimeout_governor_overridden, governor))
|
||||
return 0;
|
||||
|
||||
p = strdup(governor);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
r = watchdog_setup_pretimeout_governor(governor);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return free_and_replace(m->watchdog_pretimeout_governor_overridden, p);
|
||||
}
|
||||
|
||||
int manager_reload(Manager *m) {
|
||||
_unused_ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
|
||||
_cleanup_fdset_free_ FDSet *fds = NULL;
|
||||
|
@ -118,6 +118,7 @@ typedef enum WatchdogType {
|
||||
WATCHDOG_RUNTIME,
|
||||
WATCHDOG_REBOOT,
|
||||
WATCHDOG_KEXEC,
|
||||
WATCHDOG_PRETIMEOUT,
|
||||
_WATCHDOG_TYPE_MAX,
|
||||
} WatchdogType;
|
||||
|
||||
@ -247,6 +248,8 @@ struct Manager {
|
||||
|
||||
usec_t watchdog[_WATCHDOG_TYPE_MAX];
|
||||
usec_t watchdog_overridden[_WATCHDOG_TYPE_MAX];
|
||||
char *watchdog_pretimeout_governor;
|
||||
char *watchdog_pretimeout_governor_overridden;
|
||||
|
||||
dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
|
||||
|
||||
@ -574,6 +577,8 @@ ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
|
||||
usec_t manager_get_watchdog(Manager *m, WatchdogType t);
|
||||
void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout);
|
||||
void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
|
||||
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor);
|
||||
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor);
|
||||
|
||||
const char* oom_policy_to_string(OOMPolicy i) _const_;
|
||||
OOMPolicy oom_policy_from_string(const char *s) _pure_;
|
||||
|
@ -30,6 +30,8 @@
|
||||
#NUMAPolicy=default
|
||||
#NUMAMask=
|
||||
#RuntimeWatchdogSec=off
|
||||
#RuntimeWatchdogPreSec=off
|
||||
#RuntimeWatchdogPreGovernor=
|
||||
#RebootWatchdogSec=10min
|
||||
#KExecWatchdogSec=off
|
||||
#WatchdogDevice=
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "errno-util.h"
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
#include "log.h"
|
||||
#include "path-util.h"
|
||||
#include "string-util.h"
|
||||
@ -18,7 +19,10 @@
|
||||
static int watchdog_fd = -1;
|
||||
static char *watchdog_device;
|
||||
static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */
|
||||
static usec_t watchdog_pretimeout; /* 0 → disable pretimeout and USEC_INFINITY → don't change pretimeout */
|
||||
static usec_t watchdog_last_ping = USEC_INFINITY;
|
||||
static bool watchdog_supports_pretimeout = false; /* Depends on kernel state that might change at runtime */
|
||||
static char *watchdog_pretimeout_governor = NULL;
|
||||
|
||||
/* Starting from kernel version 4.5, the maximum allowable watchdog timeout is
|
||||
* UINT_MAX/1000U seconds (since internal calculations are done in milliseconds
|
||||
@ -29,11 +33,72 @@ static usec_t watchdog_last_ping = USEC_INFINITY;
|
||||
*/
|
||||
#define WATCHDOG_TIMEOUT_MAX_SEC (CONST_MIN(UINT_MAX/1000U, (unsigned)INT_MAX))
|
||||
|
||||
#define WATCHDOG_GOV_NAME_MAXLEN 20 /* From the kernel watchdog driver */
|
||||
|
||||
static int saturated_usec_to_sec(usec_t val) {
|
||||
usec_t t = DIV_ROUND_UP(val, USEC_PER_SEC);
|
||||
return MIN(t, (usec_t) WATCHDOG_TIMEOUT_MAX_SEC); /* Saturate to watchdog max */
|
||||
}
|
||||
|
||||
static int get_watchdog_sysfs_path(const char *filename, char **ret_path) {
|
||||
struct stat st;
|
||||
|
||||
if (watchdog_fd < 0)
|
||||
return -EBADF;
|
||||
|
||||
if (fstat(watchdog_fd, &st))
|
||||
return -errno;
|
||||
|
||||
if (!S_ISCHR(st.st_mode))
|
||||
return -EBADF;
|
||||
|
||||
if (asprintf(ret_path, "/sys/dev/char/%d:%d/%s", major(st.st_rdev), minor(st.st_rdev), filename) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_pretimeout_governor(char **ret_gov) {
|
||||
_cleanup_free_ char *sys_fn = NULL;
|
||||
int r;
|
||||
|
||||
r = get_watchdog_sysfs_path("pretimeout_governor", &sys_fn);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
log_info("Watchdog: reading from %s", sys_fn);
|
||||
|
||||
r = read_virtual_file(sys_fn, WATCHDOG_GOV_NAME_MAXLEN - 1, ret_gov, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
delete_trailing_chars(*ret_gov, WHITESPACE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_pretimeout_governor(const char *governor) {
|
||||
_cleanup_free_ char *sys_fn = NULL;
|
||||
int r;
|
||||
|
||||
if (isempty(governor))
|
||||
return 0; /* Nothing to do */
|
||||
|
||||
r = get_watchdog_sysfs_path("pretimeout_governor", &sys_fn);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
log_info("Watchdog: setting pretimeout_governor to '%s' via '%s'", governor, sys_fn);
|
||||
|
||||
r = write_string_file(sys_fn,
|
||||
governor,
|
||||
WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_VERIFY_IGNORE_NEWLINE);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to set pretimeout_governor to '%s': %m", governor);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int watchdog_set_enable(bool enable) {
|
||||
int flags = enable ? WDIOS_ENABLECARD : WDIOS_DISABLECARD;
|
||||
|
||||
@ -84,6 +149,46 @@ static int watchdog_set_timeout(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int watchdog_get_pretimeout(void) {
|
||||
int sec = 0;
|
||||
|
||||
assert(watchdog_fd >= 0);
|
||||
|
||||
if (ioctl(watchdog_fd, WDIOC_GETPRETIMEOUT, &sec) < 0) {
|
||||
watchdog_pretimeout = 0;
|
||||
return log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno, "Failed to get pretimeout value, ignoring: %m");
|
||||
}
|
||||
|
||||
watchdog_pretimeout = sec * USEC_PER_SEC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int watchdog_set_pretimeout(void) {
|
||||
int sec;
|
||||
|
||||
assert(watchdog_fd >= 0);
|
||||
assert(watchdog_pretimeout != USEC_INFINITY);
|
||||
|
||||
sec = saturated_usec_to_sec(watchdog_pretimeout);
|
||||
|
||||
if (ioctl(watchdog_fd, WDIOC_SETPRETIMEOUT, &sec) < 0) {
|
||||
watchdog_pretimeout = 0;
|
||||
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno)) {
|
||||
log_info("Watchdog does not support pretimeouts.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return log_error_errno(errno, "Failed to set pretimeout to %s: %m", FORMAT_TIMESPAN(sec, USEC_PER_SEC));
|
||||
}
|
||||
|
||||
/* The set ioctl does not return the actual value set so get it now. */
|
||||
(void) watchdog_get_pretimeout();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int watchdog_ping_now(void) {
|
||||
assert(watchdog_fd >= 0);
|
||||
|
||||
@ -95,6 +200,60 @@ static int watchdog_ping_now(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_pretimeout(void) {
|
||||
_cleanup_free_ char *governor = NULL;
|
||||
int r, t_sec, pt_sec;
|
||||
|
||||
if (watchdog_fd < 0)
|
||||
return 0;
|
||||
|
||||
if (watchdog_timeout == USEC_INFINITY || watchdog_pretimeout == USEC_INFINITY)
|
||||
return 0;
|
||||
|
||||
if (!watchdog_supports_pretimeout && watchdog_pretimeout == 0)
|
||||
return 0; /* Nothing to do */
|
||||
|
||||
/* The configuration changed, do not assume it can still work, as the module(s)
|
||||
* might have been unloaded. */
|
||||
watchdog_supports_pretimeout = false;
|
||||
|
||||
/* Update the pretimeout governor as well */
|
||||
(void) set_pretimeout_governor(watchdog_pretimeout_governor);
|
||||
|
||||
r = get_pretimeout_governor(&governor);
|
||||
if (r < 0)
|
||||
return log_warning_errno(r, "Watchdog: failed to read pretimeout governor: %m");
|
||||
if (isempty(governor))
|
||||
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"Watchdog: no pretimeout governor detected - is the required kernel module loaded?");
|
||||
|
||||
/* If we have a pretimeout governor, then pretimeout is supported. Without a governor
|
||||
* pretimeout does not work at all.
|
||||
* Note that this might require a kernel module that is not autoloaded, so we don't
|
||||
* cache this, but we check everytime the configuration changes. */
|
||||
watchdog_supports_pretimeout = true;
|
||||
|
||||
/* Determine if the pretimeout is valid for the current watchdog timeout. */
|
||||
t_sec = saturated_usec_to_sec(watchdog_timeout);
|
||||
pt_sec = saturated_usec_to_sec(watchdog_pretimeout);
|
||||
if (pt_sec >= t_sec) {
|
||||
r = log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"Cannot set watchdog pretimeout to %is (%s watchdog timeout of %is)",
|
||||
pt_sec, pt_sec == t_sec ? "same as" : "longer than", t_sec);
|
||||
(void) watchdog_get_pretimeout();
|
||||
} else
|
||||
r = watchdog_set_pretimeout();
|
||||
|
||||
if (watchdog_pretimeout == 0)
|
||||
log_info("Watchdog pretimeout is disabled.");
|
||||
else
|
||||
log_info("Watchdog running with a pretimeout of %s with governor '%s'.",
|
||||
FORMAT_TIMESPAN(watchdog_pretimeout, 0),
|
||||
governor);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int update_timeout(void) {
|
||||
int r;
|
||||
|
||||
@ -121,6 +280,12 @@ static int update_timeout(void) {
|
||||
return log_error_errno(r, "Failed to query watchdog HW timeout: %m");
|
||||
}
|
||||
|
||||
/* If the watchdog timeout was changed, the pretimeout could have been
|
||||
* changed as well by the driver or the kernel so we need to update the
|
||||
* pretimeout now. Or if the watchdog is being configured for the first
|
||||
* time, we want to configure the pretimeout before it is enabled. */
|
||||
(void) update_pretimeout();
|
||||
|
||||
r = watchdog_set_enable(true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -210,9 +375,38 @@ int watchdog_setup(usec_t timeout) {
|
||||
return r;
|
||||
}
|
||||
|
||||
usec_t watchdog_runtime_wait(void) {
|
||||
int watchdog_setup_pretimeout(usec_t timeout) {
|
||||
/* timeout=0 disables the pretimeout whereas timeout=USEC_INFINITY is a nop. */
|
||||
if ((watchdog_fd >= 0 && timeout == watchdog_pretimeout) || timeout == USEC_INFINITY)
|
||||
return 0;
|
||||
|
||||
if (!timestamp_is_set(watchdog_timeout))
|
||||
/* Initialize the watchdog timeout with the caller value. This value is
|
||||
* going to be updated by update_pretimeout() with the running value,
|
||||
* even if it fails to update the timeout. */
|
||||
watchdog_pretimeout = timeout;
|
||||
|
||||
return update_pretimeout();
|
||||
}
|
||||
|
||||
int watchdog_setup_pretimeout_governor(const char *governor) {
|
||||
if (free_and_strdup(&watchdog_pretimeout_governor, governor) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
return set_pretimeout_governor(watchdog_pretimeout_governor);
|
||||
}
|
||||
|
||||
static usec_t calc_timeout(void) {
|
||||
/* Calculate the effective timeout which accounts for the watchdog
|
||||
* pretimeout if configured and supported. */
|
||||
if (watchdog_supports_pretimeout && timestamp_is_set(watchdog_pretimeout) && watchdog_timeout >= watchdog_pretimeout)
|
||||
return watchdog_timeout - watchdog_pretimeout;
|
||||
else
|
||||
return watchdog_timeout;
|
||||
}
|
||||
|
||||
usec_t watchdog_runtime_wait(void) {
|
||||
usec_t timeout = calc_timeout();
|
||||
if (!timestamp_is_set(timeout))
|
||||
return USEC_INFINITY;
|
||||
|
||||
/* Sleep half the watchdog timeout since the last successful ping at most */
|
||||
@ -220,14 +414,14 @@ usec_t watchdog_runtime_wait(void) {
|
||||
usec_t ntime = now(clock_boottime_or_monotonic());
|
||||
|
||||
assert(ntime >= watchdog_last_ping);
|
||||
return usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime);
|
||||
return usec_sub_unsigned(watchdog_last_ping + (timeout / 2), ntime);
|
||||
}
|
||||
|
||||
return watchdog_timeout / 2;
|
||||
return timeout / 2;
|
||||
}
|
||||
|
||||
int watchdog_ping(void) {
|
||||
usec_t ntime;
|
||||
usec_t ntime, timeout;
|
||||
|
||||
if (watchdog_timeout == 0)
|
||||
return 0;
|
||||
@ -237,12 +431,13 @@ int watchdog_ping(void) {
|
||||
return open_watchdog();
|
||||
|
||||
ntime = now(clock_boottime_or_monotonic());
|
||||
timeout = calc_timeout();
|
||||
|
||||
/* Never ping earlier than watchdog_timeout/4 and try to ping
|
||||
* by watchdog_timeout/2 plus scheduling latencies the latest */
|
||||
* by watchdog_timeout/2 plus scheduling latencies at the latest */
|
||||
if (timestamp_is_set(watchdog_last_ping)) {
|
||||
assert(ntime >= watchdog_last_ping);
|
||||
if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4))
|
||||
if ((ntime - watchdog_last_ping) < (timeout / 4))
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,8 @@
|
||||
|
||||
int watchdog_set_device(const char *path);
|
||||
int watchdog_setup(usec_t timeout);
|
||||
int watchdog_setup_pretimeout(usec_t usec);
|
||||
int watchdog_setup_pretimeout_governor(const char *governor);
|
||||
int watchdog_ping(void);
|
||||
void watchdog_close(bool disarm);
|
||||
usec_t watchdog_runtime_wait(void);
|
||||
|
@ -737,6 +737,8 @@ LogLevel=
|
||||
LogLocation=
|
||||
LogTarget=
|
||||
RuntimeWatchdogSec=
|
||||
RuntimeWatchdogPreSec=
|
||||
RuntimeWatchdogPreGovernor=
|
||||
ShowStatus=
|
||||
RebootWatchdogSec=
|
||||
ShutdownWatchdogSec=
|
||||
|
Loading…
x
Reference in New Issue
Block a user