From 7a0019d373ee8082129c1e7d66342662bb96c4b4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 9 Aug 2017 21:12:55 +0200 Subject: [PATCH] core: introduce a restart counter (#6495) This adds a per-service restart counter. Each time an automatic restart is scheduled (due to Restart=) it is increased by one. Its current value is exposed over the bus as NRestarts=. It is also logged (in a structured, recognizable way) on each restart. Note that this really only counts automatic starts triggered by Restart= (which it nicely complements). Manual restarts will reset the counter, as will explicit calls to "systemctl reset-failed". It's supposed to be a tool for measure the automatic restart feature, and nothing else. Fixes: #4126 --- src/core/dbus-service.c | 1 + src/core/service.c | 47 +++++++++++++++++++++++++++++++++++++-- src/core/service.h | 3 +++ src/systemd/sd-messages.h | 4 ++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index a20d4b3b99..0b81d085fe 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -67,6 +67,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), diff --git a/src/core/service.c b/src/core/service.c index 39fcdcc1a7..e576f4ba83 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -21,6 +21,8 @@ #include #include +#include "sd-messages.h" + #include "alloc-util.h" #include "async.h" #include "bus-error.h" @@ -1514,7 +1516,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) goto fail; service_set_state(s, SERVICE_AUTO_RESTART); - } + } else + /* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the + * user can still introspect the counter. Do so on the next start. */ + s->flush_n_restarts = true; /* The next restart might not be a manual stop, hence reset the flag indicating manual stops */ s->forbid_restart = false; @@ -1932,11 +1937,26 @@ static void service_enter_restart(Service *s) { if (r < 0) goto fail; + /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully + * stopped, i.e. as long as it remains up or remains in auto-start states. The use can reset the counter + * explicitly however via the usual "systemctl reset-failure" logic. */ + s->n_restarts ++; + s->flush_n_restarts = false; + + log_struct(LOG_INFO, + "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR, + LOG_UNIT_ID(UNIT(s)), + LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts), + "N_RESTARTS=%u", s->n_restarts, + NULL); + + /* Notify clients about changed restart counter */ + unit_add_to_dbus_queue(UNIT(s)); + /* Note that we stay in the SERVICE_AUTO_RESTART state here, * it will be canceled as part of the service_stop() call that * is executed as part of JOB_RESTART. */ - log_unit_debug(UNIT(s), "Scheduled restart job."); return; fail: @@ -2119,6 +2139,12 @@ static int service_start(Unit *u) { s->watchdog_override_enable = false; s->watchdog_override_usec = 0; + /* This is not an automatic restart? Flush the restart counter then */ + if (s->flush_n_restarts) { + s->n_restarts = 0; + s->flush_n_restarts = false; + } + service_enter_start_pre(s); return 1; } @@ -2271,6 +2297,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { unit_serialize_item(u, f, "bus-name-good", yes_no(s->bus_name_good)); unit_serialize_item(u, f, "bus-name-owner", s->bus_name_owner); + unit_serialize_item_format(u, f, "n-restarts", "%u", s->n_restarts); + unit_serialize_item(u, f, "n-restarts", yes_no(s->flush_n_restarts)); + r = unit_serialize_item_escaped(u, f, "status-text", s->status_text); if (r < 0) return r; @@ -2636,6 +2665,18 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, r = service_deserialize_exec_command(u, key, value); if (r < 0) log_unit_debug_errno(u, r, "Failed to parse serialized command \"%s\": %m", value); + + } else if (streq(key, "n-restarts")) { + r = safe_atou(value, &s->n_restarts); + if (r < 0) + log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value); + + } else if (streq(key, "flush-n-restarts")) { + r = parse_boolean(value); + if (r < 0) + log_unit_debug_errno(u, r, "Failed to parse serialized flush restart counter setting '%s': %m", value); + else + s->flush_n_restarts = r; } else log_unit_debug(u, "Unknown serialization key: %s", key); @@ -3548,6 +3589,8 @@ static void service_reset_failed(Unit *u) { s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; + s->n_restarts = 0; + s->flush_n_restarts = false; } static int service_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) { diff --git a/src/core/service.h b/src/core/service.h index f4ba604f69..0ac8bc9a67 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -193,6 +193,9 @@ struct Service { int stdin_fd; int stdout_fd; int stderr_fd; + + unsigned n_restarts; + bool flush_n_restarts; }; extern const UnitVTable service_vtable; diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index f466d9b062..4bc248a4b1 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -99,6 +99,10 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_UNIT_RELOADED SD_ID128_MAKE(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54) #define SD_MESSAGE_UNIT_RELOADED_STR SD_ID128_MAKE_STR(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54) +#define SD_MESSAGE_UNIT_RESTART_SCHEDULED SD_ID128_MAKE(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3) +#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \ + SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3) + #define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7) #define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)