1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-26 14:04:03 +03:00

Merge pull request #19970 from curtistklein/watchdog-pretimeout-merge

watchdog: Add watchdog pretimeout support
This commit is contained in:
Luca Boccassi 2022-02-22 22:08:09 +00:00 committed by GitHub
commit 6ccc08954e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 484 additions and 10 deletions

View File

@ -428,6 +428,28 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>systemd.watchdog_pre_sec=</varname></term>
<listitem>
<para>Overrides the watchdog pre-timeout settings otherwise configured with
<varname>RuntimeWatchdogPreSec=</varname>. Takes a time value (if no unit is specified, seconds is the
implicitly assumed time unit) or the special strings <literal>off</literal> or
<literal>default</literal>. For details, see
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>systemd.watchdog_pretimeout_governor=</varname></term>
<listitem>
<para>Overrides the watchdog pre-timeout settings otherwise configured with
<varname>RuntimeWatchdogPreGovernor=</varname>. Takes a string value. For details, see
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>systemd.cpu_affinity=</varname></term>

View File

@ -402,6 +402,12 @@ node /org/freedesktop/systemd1 {
readwrite t RuntimeWatchdogUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
readwrite t RuntimeWatchdogPreUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
readwrite s RuntimeWatchdogPreGovernor = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
readwrite t RebootWatchdogUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
@org.freedesktop.systemd1.Privileged("true")
@ -650,6 +656,10 @@ node /org/freedesktop/systemd1 {
<!--property RuntimeWatchdogUSec is not documented!-->
<!--property RuntimeWatchdogPreUSec is not documented!-->
<!--property RuntimeWatchdogPreGovernor is not documented!-->
<!--property RebootWatchdogUSec is not documented!-->
<!--property KExecWatchdogUSec is not documented!-->
@ -1052,6 +1062,10 @@ node /org/freedesktop/systemd1 {
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogPreUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="RuntimeWatchdogPreGovernor"/>
<variablelist class="dbus-property" generated="True" extra-ref="RebootWatchdogUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="KExecWatchdogUSec"/>

View File

@ -177,6 +177,50 @@
<para>These settings have no effect if a hardware watchdog is not available.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RuntimeWatchdogPreSec=</varname></term>
<listitem><para>Configure the hardware watchdog device pre-timeout value.
Takes a timeout value in seconds (or in other time units similar to
<varname>RuntimeWatchdogSec=</varname>). A watchdog pre-timeout is a
notification generated by the watchdog before the watchdog reset might
occur in the event the watchdog has not been serviced. This notification
is handled by the kernel and can be configured to take an action (i.e.
generate a kernel panic) using <varname>RuntimeWatchdogPreGovernor=</varname>.
Not all watchdog hardware or drivers support generating a pre-timeout and
depending on the state of the system, the kernel may be unable to take the
configured action before the watchdog reboot. The watchdog will be configured
to generate the pre-timeout event at the amount of time specified by
<varname>RuntimeWatchdogPreSec=</varname> before the runtime watchdog timeout
(set by <varname>RuntimeWatchdogSec=</varname>). For example, if the we have
<varname>RuntimeWatchdogSec=30</varname> and
<varname>RuntimeWatchdogPreSec=10</varname>, then the pre-timeout event
will occur if the watchdog has not pinged for 20s (10s before the
watchdog would fire). By default, <varname>RuntimeWatchdogPreSec=</varname>
defaults to 0 (off). The value set for <varname>RuntimeWatchdogPreSec=</varname>
must be smaller than the timeout value for <varname>RuntimeWatchdogSec=</varname>.
This setting has no effect if a hardware watchdog is not available or the
hardware watchdog does not support a pre-timeout and will be ignored by the
kernel if the setting is greater than the actual watchdog timeout.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RuntimeWatchdogPreGovernor=</varname></term>
<listitem><para>Configure the action taken by the hardware watchdog device
when the pre-timeout expires. The default action for the pre-timeout event
depends on the kernel configuration, but it is usually to log a kernel
message. For a list of valid actions available for a given watchdog device,
check the content of the
<filename>/sys/class/watchdog/watchdog<replaceable>X</replaceable>/pretimeout_available_governors</filename>
file. Typically, available governor types are <varname>noop</varname> and <varname>panic</varname>.
Availability, names and functionality might vary depending on the specific device driver
in use. If the <filename>pretimeout_available_governors</filename> sysfs file is empty,
the governor might be built as a kernel module and might need to be manually loaded
(e.g. <varname>pretimeout_noop.ko</varname>), or the watchdog device might not support
pre-timeouts.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>WatchdogDevice=</varname></term>

View File

@ -265,6 +265,42 @@ static int property_get_runtime_watchdog(
return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_RUNTIME));
}
static int property_get_pretimeout_watchdog(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Manager *m = userdata;
assert(m);
assert(bus);
assert(reply);
return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_PRETIMEOUT));
}
static int property_get_pretimeout_watchdog_governor(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Manager *m = userdata;
assert(m);
assert(bus);
assert(reply);
return sd_bus_message_append(reply, "s", m->watchdog_pretimeout_governor);
}
static int property_get_reboot_watchdog(
sd_bus *bus,
const char *path,
@ -330,6 +366,42 @@ static int property_set_runtime_watchdog(
return property_set_watchdog(userdata, WATCHDOG_RUNTIME, value);
}
static int property_set_pretimeout_watchdog(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *value,
void *userdata,
sd_bus_error *error) {
return property_set_watchdog(userdata, WATCHDOG_PRETIMEOUT, value);
}
static int property_set_pretimeout_watchdog_governor(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *value,
void *userdata,
sd_bus_error *error) {
Manager *m = userdata;
char *governor;
int r;
assert(m);
r = sd_bus_message_read(value, "s", &governor);
if (r < 0)
return r;
if (!string_is_safe(governor))
return -EINVAL;
return manager_override_watchdog_pretimeout_governor(m, governor);
}
static int property_set_reboot_watchdog(
sd_bus *bus,
const char *path,
@ -2696,6 +2768,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_PROPERTY("DefaultStandardOutput", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultStandardError", "s", bus_property_get_exec_output, offsetof(Manager, default_std_error), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogUSec", "t", property_get_runtime_watchdog, property_set_runtime_watchdog, 0, 0),
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogPreUSec", "t", property_get_pretimeout_watchdog, property_set_pretimeout_watchdog, 0, 0),
SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogPreGovernor", "s", property_get_pretimeout_watchdog_governor, property_set_pretimeout_watchdog_governor, 0, 0),
SD_BUS_WRITABLE_PROPERTY("RebootWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, 0),
/* The following item is an obsolete alias */
SD_BUS_WRITABLE_PROPERTY("ShutdownWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, SD_BUS_VTABLE_HIDDEN),

View File

@ -138,7 +138,9 @@ static unsigned arg_default_start_limit_burst;
static usec_t arg_runtime_watchdog;
static usec_t arg_reboot_watchdog;
static usec_t arg_kexec_watchdog;
static usec_t arg_pretimeout_watchdog;
static char *arg_early_core_pattern;
static char *arg_watchdog_pretimeout_governor;
static char *arg_watchdog_device;
static char **arg_default_environment;
static char **arg_manager_environment;
@ -557,6 +559,37 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_pre_sec")) {
if (proc_cmdline_value_missing(key, value))
return 0;
if (streq(value, "default"))
arg_pretimeout_watchdog = USEC_INFINITY;
else if (streq(value, "off"))
arg_pretimeout_watchdog = 0;
else {
r = parse_sec(value, &arg_pretimeout_watchdog);
if (r < 0) {
log_warning_errno(r, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value);
return 0;
}
}
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_pretimeout_governor")) {
if (proc_cmdline_value_missing(key, value) || isempty(value)) {
arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
return 0;
}
if (!string_is_safe(value)) {
log_warning("Watchdog pretimeout governor '%s' is not valid, ignoring.", value);
return 0;
}
return free_and_strdup_warn(&arg_watchdog_pretimeout_governor, value);
} else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
if (proc_cmdline_value_missing(key, value))
@ -709,10 +742,12 @@ static int parse_config_file(void) {
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
{ "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog },
{ "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec, 0, &arg_pretimeout_watchdog },
{ "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
{ "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog },
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
{ "Manager", "RuntimeWatchdogPreGovernor", config_parse_safe_string, 0, &arg_watchdog_pretimeout_governor },
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
#if HAVE_SECCOMP
@ -837,6 +872,7 @@ static void set_manager_defaults(Manager *m) {
}
static void set_manager_settings(Manager *m) {
int r;
assert(m);
@ -851,6 +887,10 @@ static void set_manager_settings(Manager *m) {
manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
manager_set_watchdog(m, WATCHDOG_PRETIMEOUT, arg_pretimeout_watchdog);
r = manager_set_watchdog_pretimeout_governor(m, arg_watchdog_pretimeout_governor);
if (r < 0)
log_warning_errno(r, "Failed to set watchdog pretimeout governor to '%s', ignoring: %m", arg_watchdog_pretimeout_governor);
manager_set_show_status(m, arg_show_status, "commandline");
m->status_unit_format = arg_status_unit_format;
@ -1595,7 +1635,10 @@ static int become_shutdown(
watchdog_timer = arg_kexec_watchdog;
/* If we reboot or kexec let's set the shutdown watchdog and tell the
* shutdown binary to repeatedly ping it */
* shutdown binary to repeatedly ping it.
* Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
(void) watchdog_setup_pretimeout(0);
(void) watchdog_setup_pretimeout_governor(NULL);
r = watchdog_setup(watchdog_timer);
watchdog_close(r < 0);
@ -2448,8 +2491,10 @@ static void reset_arguments(void) {
arg_runtime_watchdog = 0;
arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
arg_kexec_watchdog = 0;
arg_pretimeout_watchdog = 0;
arg_early_core_pattern = NULL;
arg_watchdog_device = NULL;
arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
arg_default_environment = strv_free(arg_default_environment);
arg_manager_environment = strv_free(arg_manager_environment);

View File

@ -118,6 +118,8 @@ int manager_serialize(
(void) serialize_usec(f, "runtime-watchdog-overridden", m->watchdog_overridden[WATCHDOG_RUNTIME]);
(void) serialize_usec(f, "reboot-watchdog-overridden", m->watchdog_overridden[WATCHDOG_REBOOT]);
(void) serialize_usec(f, "kexec-watchdog-overridden", m->watchdog_overridden[WATCHDOG_KEXEC]);
(void) serialize_usec(f, "pretimeout-watchdog-overridden", m->watchdog_overridden[WATCHDOG_PRETIMEOUT]);
(void) serialize_item(f, "pretimeout-watchdog-governor-overridden", m->watchdog_pretimeout_governor_overridden);
for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
_cleanup_free_ char *joined = NULL;
@ -455,6 +457,19 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
else
manager_override_watchdog(m, WATCHDOG_KEXEC, t);
} else if ((val = startswith(l, "pretimeout-watchdog-overridden="))) {
usec_t t;
if (deserialize_usec(val, &t) < 0)
log_notice("Failed to parse pretimeout-watchdog-overridden value '%s', ignoring.", val);
else
manager_override_watchdog(m, WATCHDOG_PRETIMEOUT, t);
} else if ((val = startswith(l, "pretimeout-watchdog-governor-overridden="))) {
r = free_and_strdup(&m->watchdog_pretimeout_governor_overridden, val);
if (r < 0)
return r;
} else if (startswith(l, "env=")) {
r = deserialize_environment(l + 4, &m->client_environment);
if (r < 0)

View File

@ -813,6 +813,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
.watchdog_overridden[WATCHDOG_RUNTIME] = USEC_INFINITY,
.watchdog_overridden[WATCHDOG_REBOOT] = USEC_INFINITY,
.watchdog_overridden[WATCHDOG_KEXEC] = USEC_INFINITY,
.watchdog_overridden[WATCHDOG_PRETIMEOUT] = USEC_INFINITY,
.show_status_overridden = _SHOW_STATUS_INVALID,
@ -1541,6 +1542,9 @@ Manager* manager_free(Manager *m) {
m->prefix[dt] = mfree(m->prefix[dt]);
free(m->received_credentials);
free(m->watchdog_pretimeout_governor);
free(m->watchdog_pretimeout_governor_overridden);
#if BPF_FRAMEWORK
lsm_bpf_destroy(m->restrict_fs);
#endif
@ -3232,9 +3236,12 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
if (m->watchdog[t] == timeout)
return;
if (t == WATCHDOG_RUNTIME)
if (t == WATCHDOG_RUNTIME) {
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
(void) watchdog_setup(timeout);
} else if (t == WATCHDOG_PRETIMEOUT)
if (m->watchdog_overridden[WATCHDOG_PRETIMEOUT] == USEC_INFINITY)
(void) watchdog_setup_pretimeout(timeout);
m->watchdog[t] = timeout;
}
@ -3253,11 +3260,58 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t];
(void) watchdog_setup(usec);
}
} else if (t == WATCHDOG_PRETIMEOUT)
(void) watchdog_setup_pretimeout(timeout);
m->watchdog_overridden[t] = timeout;
}
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor) {
_cleanup_free_ char *p = NULL;
int r;
assert(m);
if (MANAGER_IS_USER(m))
return 0;
if (streq_ptr(m->watchdog_pretimeout_governor, governor))
return 0;
p = strdup(governor);
if (!p)
return -ENOMEM;
r = watchdog_setup_pretimeout_governor(governor);
if (r < 0)
return r;
return free_and_replace(m->watchdog_pretimeout_governor, p);
}
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor) {
_cleanup_free_ char *p = NULL;
int r;
assert(m);
if (MANAGER_IS_USER(m))
return 0;
if (streq_ptr(m->watchdog_pretimeout_governor_overridden, governor))
return 0;
p = strdup(governor);
if (!p)
return -ENOMEM;
r = watchdog_setup_pretimeout_governor(governor);
if (r < 0)
return r;
return free_and_replace(m->watchdog_pretimeout_governor_overridden, p);
}
int manager_reload(Manager *m) {
_unused_ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
_cleanup_fdset_free_ FDSet *fds = NULL;

View File

@ -118,6 +118,7 @@ typedef enum WatchdogType {
WATCHDOG_RUNTIME,
WATCHDOG_REBOOT,
WATCHDOG_KEXEC,
WATCHDOG_PRETIMEOUT,
_WATCHDOG_TYPE_MAX,
} WatchdogType;
@ -247,6 +248,8 @@ struct Manager {
usec_t watchdog[_WATCHDOG_TYPE_MAX];
usec_t watchdog_overridden[_WATCHDOG_TYPE_MAX];
char *watchdog_pretimeout_governor;
char *watchdog_pretimeout_governor_overridden;
dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
@ -574,6 +577,8 @@ ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
usec_t manager_get_watchdog(Manager *m, WatchdogType t);
void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout);
void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor);
int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor);
const char* oom_policy_to_string(OOMPolicy i) _const_;
OOMPolicy oom_policy_from_string(const char *s) _pure_;

View File

@ -30,6 +30,8 @@
#NUMAPolicy=default
#NUMAMask=
#RuntimeWatchdogSec=off
#RuntimeWatchdogPreSec=off
#RuntimeWatchdogPreGovernor=
#RebootWatchdogSec=10min
#KExecWatchdogSec=off
#WatchdogDevice=

View File

@ -9,6 +9,7 @@
#include "errno-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "log.h"
#include "path-util.h"
#include "string-util.h"
@ -18,7 +19,10 @@
static int watchdog_fd = -1;
static char *watchdog_device;
static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */
static usec_t watchdog_pretimeout; /* 0 → disable pretimeout and USEC_INFINITY → don't change pretimeout */
static usec_t watchdog_last_ping = USEC_INFINITY;
static bool watchdog_supports_pretimeout = false; /* Depends on kernel state that might change at runtime */
static char *watchdog_pretimeout_governor = NULL;
/* Starting from kernel version 4.5, the maximum allowable watchdog timeout is
* UINT_MAX/1000U seconds (since internal calculations are done in milliseconds
@ -29,11 +33,72 @@ static usec_t watchdog_last_ping = USEC_INFINITY;
*/
#define WATCHDOG_TIMEOUT_MAX_SEC (CONST_MIN(UINT_MAX/1000U, (unsigned)INT_MAX))
#define WATCHDOG_GOV_NAME_MAXLEN 20 /* From the kernel watchdog driver */
static int saturated_usec_to_sec(usec_t val) {
usec_t t = DIV_ROUND_UP(val, USEC_PER_SEC);
return MIN(t, (usec_t) WATCHDOG_TIMEOUT_MAX_SEC); /* Saturate to watchdog max */
}
static int get_watchdog_sysfs_path(const char *filename, char **ret_path) {
struct stat st;
if (watchdog_fd < 0)
return -EBADF;
if (fstat(watchdog_fd, &st))
return -errno;
if (!S_ISCHR(st.st_mode))
return -EBADF;
if (asprintf(ret_path, "/sys/dev/char/%d:%d/%s", major(st.st_rdev), minor(st.st_rdev), filename) < 0)
return -ENOMEM;
return 0;
}
static int get_pretimeout_governor(char **ret_gov) {
_cleanup_free_ char *sys_fn = NULL;
int r;
r = get_watchdog_sysfs_path("pretimeout_governor", &sys_fn);
if (r < 0)
return r;
log_info("Watchdog: reading from %s", sys_fn);
r = read_virtual_file(sys_fn, WATCHDOG_GOV_NAME_MAXLEN - 1, ret_gov, NULL);
if (r < 0)
return r;
delete_trailing_chars(*ret_gov, WHITESPACE);
return 0;
}
static int set_pretimeout_governor(const char *governor) {
_cleanup_free_ char *sys_fn = NULL;
int r;
if (isempty(governor))
return 0; /* Nothing to do */
r = get_watchdog_sysfs_path("pretimeout_governor", &sys_fn);
if (r < 0)
return r;
log_info("Watchdog: setting pretimeout_governor to '%s' via '%s'", governor, sys_fn);
r = write_string_file(sys_fn,
governor,
WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_VERIFY_IGNORE_NEWLINE);
if (r < 0)
return log_error_errno(r, "Failed to set pretimeout_governor to '%s': %m", governor);
return r;
}
static int watchdog_set_enable(bool enable) {
int flags = enable ? WDIOS_ENABLECARD : WDIOS_DISABLECARD;
@ -84,6 +149,46 @@ static int watchdog_set_timeout(void) {
return 0;
}
static int watchdog_get_pretimeout(void) {
int sec = 0;
assert(watchdog_fd >= 0);
if (ioctl(watchdog_fd, WDIOC_GETPRETIMEOUT, &sec) < 0) {
watchdog_pretimeout = 0;
return log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno, "Failed to get pretimeout value, ignoring: %m");
}
watchdog_pretimeout = sec * USEC_PER_SEC;
return 0;
}
static int watchdog_set_pretimeout(void) {
int sec;
assert(watchdog_fd >= 0);
assert(watchdog_pretimeout != USEC_INFINITY);
sec = saturated_usec_to_sec(watchdog_pretimeout);
if (ioctl(watchdog_fd, WDIOC_SETPRETIMEOUT, &sec) < 0) {
watchdog_pretimeout = 0;
if (ERRNO_IS_NOT_SUPPORTED(errno)) {
log_info("Watchdog does not support pretimeouts.");
return 0;
}
return log_error_errno(errno, "Failed to set pretimeout to %s: %m", FORMAT_TIMESPAN(sec, USEC_PER_SEC));
}
/* The set ioctl does not return the actual value set so get it now. */
(void) watchdog_get_pretimeout();
return 0;
}
static int watchdog_ping_now(void) {
assert(watchdog_fd >= 0);
@ -95,6 +200,60 @@ static int watchdog_ping_now(void) {
return 0;
}
static int update_pretimeout(void) {
_cleanup_free_ char *governor = NULL;
int r, t_sec, pt_sec;
if (watchdog_fd < 0)
return 0;
if (watchdog_timeout == USEC_INFINITY || watchdog_pretimeout == USEC_INFINITY)
return 0;
if (!watchdog_supports_pretimeout && watchdog_pretimeout == 0)
return 0; /* Nothing to do */
/* The configuration changed, do not assume it can still work, as the module(s)
* might have been unloaded. */
watchdog_supports_pretimeout = false;
/* Update the pretimeout governor as well */
(void) set_pretimeout_governor(watchdog_pretimeout_governor);
r = get_pretimeout_governor(&governor);
if (r < 0)
return log_warning_errno(r, "Watchdog: failed to read pretimeout governor: %m");
if (isempty(governor))
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"Watchdog: no pretimeout governor detected - is the required kernel module loaded?");
/* If we have a pretimeout governor, then pretimeout is supported. Without a governor
* pretimeout does not work at all.
* Note that this might require a kernel module that is not autoloaded, so we don't
* cache this, but we check everytime the configuration changes. */
watchdog_supports_pretimeout = true;
/* Determine if the pretimeout is valid for the current watchdog timeout. */
t_sec = saturated_usec_to_sec(watchdog_timeout);
pt_sec = saturated_usec_to_sec(watchdog_pretimeout);
if (pt_sec >= t_sec) {
r = log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Cannot set watchdog pretimeout to %is (%s watchdog timeout of %is)",
pt_sec, pt_sec == t_sec ? "same as" : "longer than", t_sec);
(void) watchdog_get_pretimeout();
} else
r = watchdog_set_pretimeout();
if (watchdog_pretimeout == 0)
log_info("Watchdog pretimeout is disabled.");
else
log_info("Watchdog running with a pretimeout of %s with governor '%s'.",
FORMAT_TIMESPAN(watchdog_pretimeout, 0),
governor);
return r;
}
static int update_timeout(void) {
int r;
@ -121,6 +280,12 @@ static int update_timeout(void) {
return log_error_errno(r, "Failed to query watchdog HW timeout: %m");
}
/* If the watchdog timeout was changed, the pretimeout could have been
* changed as well by the driver or the kernel so we need to update the
* pretimeout now. Or if the watchdog is being configured for the first
* time, we want to configure the pretimeout before it is enabled. */
(void) update_pretimeout();
r = watchdog_set_enable(true);
if (r < 0)
return r;
@ -210,9 +375,38 @@ int watchdog_setup(usec_t timeout) {
return r;
}
usec_t watchdog_runtime_wait(void) {
int watchdog_setup_pretimeout(usec_t timeout) {
/* timeout=0 disables the pretimeout whereas timeout=USEC_INFINITY is a nop. */
if ((watchdog_fd >= 0 && timeout == watchdog_pretimeout) || timeout == USEC_INFINITY)
return 0;
if (!timestamp_is_set(watchdog_timeout))
/* Initialize the watchdog timeout with the caller value. This value is
* going to be updated by update_pretimeout() with the running value,
* even if it fails to update the timeout. */
watchdog_pretimeout = timeout;
return update_pretimeout();
}
int watchdog_setup_pretimeout_governor(const char *governor) {
if (free_and_strdup(&watchdog_pretimeout_governor, governor) < 0)
return -ENOMEM;
return set_pretimeout_governor(watchdog_pretimeout_governor);
}
static usec_t calc_timeout(void) {
/* Calculate the effective timeout which accounts for the watchdog
* pretimeout if configured and supported. */
if (watchdog_supports_pretimeout && timestamp_is_set(watchdog_pretimeout) && watchdog_timeout >= watchdog_pretimeout)
return watchdog_timeout - watchdog_pretimeout;
else
return watchdog_timeout;
}
usec_t watchdog_runtime_wait(void) {
usec_t timeout = calc_timeout();
if (!timestamp_is_set(timeout))
return USEC_INFINITY;
/* Sleep half the watchdog timeout since the last successful ping at most */
@ -220,14 +414,14 @@ usec_t watchdog_runtime_wait(void) {
usec_t ntime = now(clock_boottime_or_monotonic());
assert(ntime >= watchdog_last_ping);
return usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime);
return usec_sub_unsigned(watchdog_last_ping + (timeout / 2), ntime);
}
return watchdog_timeout / 2;
return timeout / 2;
}
int watchdog_ping(void) {
usec_t ntime;
usec_t ntime, timeout;
if (watchdog_timeout == 0)
return 0;
@ -237,12 +431,13 @@ int watchdog_ping(void) {
return open_watchdog();
ntime = now(clock_boottime_or_monotonic());
timeout = calc_timeout();
/* Never ping earlier than watchdog_timeout/4 and try to ping
* by watchdog_timeout/2 plus scheduling latencies the latest */
* by watchdog_timeout/2 plus scheduling latencies at the latest */
if (timestamp_is_set(watchdog_last_ping)) {
assert(ntime >= watchdog_last_ping);
if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4))
if ((ntime - watchdog_last_ping) < (timeout / 4))
return 0;
}

View File

@ -8,6 +8,8 @@
int watchdog_set_device(const char *path);
int watchdog_setup(usec_t timeout);
int watchdog_setup_pretimeout(usec_t usec);
int watchdog_setup_pretimeout_governor(const char *governor);
int watchdog_ping(void);
void watchdog_close(bool disarm);
usec_t watchdog_runtime_wait(void);

View File

@ -737,6 +737,8 @@ LogLevel=
LogLocation=
LogTarget=
RuntimeWatchdogSec=
RuntimeWatchdogPreSec=
RuntimeWatchdogPreGovernor=
ShowStatus=
RebootWatchdogSec=
ShutdownWatchdogSec=