mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-01-11 05:17:44 +03:00
core: send sigabrt on watchdog timeout to get the stacktrace
if sigabrt doesn't do the job, follow regular shutdown routine, sigterm > sigkill.
This commit is contained in:
parent
f10af76de5
commit
db2cb23b5b
2
TODO
2
TODO
@ -48,8 +48,6 @@ Features:
|
||||
|
||||
* consider showing the unit names during boot up in the status output, not just the unit descriptions
|
||||
|
||||
* send SIGABRT when a service watchdog is triggered, by default, so that we acquire a backtrace of the hang.
|
||||
|
||||
* dhcp: do we allow configuring dhcp routes on interfaces that are not the one we got the dhcp info from?
|
||||
|
||||
* maybe allow timer units with an empty Units= setting, so that they
|
||||
|
@ -593,8 +593,9 @@
|
||||
(i.e. the "keep-alive ping"). If the time
|
||||
between two such calls is larger than
|
||||
the configured time, then the service
|
||||
is placed in a failed state. By
|
||||
setting <varname>Restart=</varname> to
|
||||
is placed in a failed state and it will
|
||||
be terminated with <varname>SIGABRT</varname>.
|
||||
By setting <varname>Restart=</varname> to
|
||||
<option>on-failure</option> or
|
||||
<option>always</option>, the service
|
||||
will be automatically restarted. The
|
||||
|
@ -446,7 +446,7 @@ static void busname_enter_signal(BusName *n, BusNameState state, BusNameResult f
|
||||
|
||||
r = unit_kill_context(UNIT(n),
|
||||
&kill_context,
|
||||
state != BUSNAME_SIGTERM,
|
||||
state != BUSNAME_SIGTERM ? KILL_KILL : KILL_TERMINATE,
|
||||
-1,
|
||||
n->control_pid,
|
||||
false);
|
||||
|
@ -775,7 +775,8 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
|
||||
r = unit_kill_context(
|
||||
UNIT(m),
|
||||
&m->kill_context,
|
||||
state != MOUNT_MOUNTING_SIGTERM && state != MOUNT_UNMOUNTING_SIGTERM && state != MOUNT_REMOUNTING_SIGTERM,
|
||||
(state != MOUNT_MOUNTING_SIGTERM && state != MOUNT_UNMOUNTING_SIGTERM && state != MOUNT_REMOUNTING_SIGTERM) ?
|
||||
KILL_KILL : KILL_TERMINATE,
|
||||
-1,
|
||||
m->control_pid,
|
||||
false);
|
||||
|
@ -243,7 +243,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
|
||||
r = unit_kill_context(
|
||||
UNIT(s),
|
||||
&s->kill_context,
|
||||
state != SCOPE_STOP_SIGTERM,
|
||||
state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE,
|
||||
-1, -1, false);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
@ -56,6 +56,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
|
||||
[SERVICE_EXITED] = UNIT_ACTIVE,
|
||||
[SERVICE_RELOAD] = UNIT_RELOADING,
|
||||
[SERVICE_STOP] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_POST] = UNIT_DEACTIVATING,
|
||||
@ -76,6 +77,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
|
||||
[SERVICE_EXITED] = UNIT_ACTIVE,
|
||||
[SERVICE_RELOAD] = UNIT_RELOADING,
|
||||
[SERVICE_STOP] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
|
||||
[SERVICE_STOP_POST] = UNIT_DEACTIVATING,
|
||||
@ -663,7 +665,7 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
|
||||
SERVICE_AUTO_RESTART))
|
||||
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
|
||||
@ -672,7 +674,7 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RUNNING, SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
|
||||
service_unwatch_main_pid(s);
|
||||
s->main_command = NULL;
|
||||
@ -682,7 +684,7 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
|
||||
service_unwatch_control_pid(s);
|
||||
s->control_command = NULL;
|
||||
@ -696,7 +698,7 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RUNNING, SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
|
||||
SERVICE_STOP_SIGABRT, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
|
||||
!(state == SERVICE_DEAD && UNIT(s)->job)) {
|
||||
service_close_socket_fd(s);
|
||||
service_connection_unref(s);
|
||||
@ -750,7 +752,7 @@ static int service_coldplug(Unit *u) {
|
||||
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
|
||||
|
||||
usec_t k;
|
||||
@ -779,7 +781,7 @@ static int service_coldplug(Unit *u) {
|
||||
SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RUNNING, SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
|
||||
r = unit_watch_pid(UNIT(s), s->main_pid);
|
||||
if (r < 0)
|
||||
@ -791,7 +793,7 @@ static int service_coldplug(Unit *u) {
|
||||
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
|
||||
SERVICE_RELOAD,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
|
||||
r = unit_watch_pid(UNIT(s), s->control_pid);
|
||||
if (r < 0)
|
||||
@ -1181,7 +1183,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
|
||||
r = unit_kill_context(
|
||||
UNIT(s),
|
||||
&s->kill_context,
|
||||
state != SERVICE_STOP_SIGTERM && state != SERVICE_FINAL_SIGTERM,
|
||||
(state != SERVICE_STOP_SIGTERM && state != SERVICE_FINAL_SIGTERM && state != SERVICE_STOP_SIGABRT) ?
|
||||
KILL_KILL : (state == SERVICE_STOP_SIGABRT ? KILL_ABORT : KILL_TERMINATE),
|
||||
s->main_pid,
|
||||
s->control_pid,
|
||||
s->main_pid_alien);
|
||||
@ -1197,7 +1200,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
|
||||
}
|
||||
|
||||
service_set_state(s, state);
|
||||
} else if (state == SERVICE_STOP_SIGTERM)
|
||||
} else if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGABRT)
|
||||
service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS);
|
||||
else if (state == SERVICE_STOP_SIGKILL)
|
||||
service_enter_stop_post(s, SERVICE_SUCCESS);
|
||||
@ -1211,7 +1214,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
|
||||
fail:
|
||||
log_warning_unit(UNIT(s)->id, "%s failed to kill processes: %s", UNIT(s)->id, strerror(-r));
|
||||
|
||||
if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGKILL)
|
||||
if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGKILL ||
|
||||
state == SERVICE_STOP_SIGABRT)
|
||||
service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES);
|
||||
else
|
||||
service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
|
||||
@ -1637,6 +1641,7 @@ static int service_start(Unit *u) {
|
||||
/* We cannot fulfill this request right now, try again later
|
||||
* please! */
|
||||
if (s->state == SERVICE_STOP ||
|
||||
s->state == SERVICE_STOP_SIGABRT ||
|
||||
s->state == SERVICE_STOP_SIGTERM ||
|
||||
s->state == SERVICE_STOP_SIGKILL ||
|
||||
s->state == SERVICE_STOP_POST ||
|
||||
@ -1695,6 +1700,7 @@ static int service_stop(Unit *u) {
|
||||
|
||||
/* Already on it */
|
||||
if (s->state == SERVICE_STOP ||
|
||||
s->state == SERVICE_STOP_SIGABRT ||
|
||||
s->state == SERVICE_STOP_SIGTERM ||
|
||||
s->state == SERVICE_STOP_SIGKILL ||
|
||||
s->state == SERVICE_STOP_POST ||
|
||||
@ -2126,6 +2132,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
|
||||
service_enter_running(s, SERVICE_SUCCESS);
|
||||
break;
|
||||
|
||||
case SERVICE_STOP_SIGABRT:
|
||||
case SERVICE_STOP_SIGTERM:
|
||||
case SERVICE_STOP_SIGKILL:
|
||||
|
||||
@ -2252,6 +2259,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
||||
service_enter_running(s, f);
|
||||
break;
|
||||
|
||||
case SERVICE_STOP_SIGABRT:
|
||||
case SERVICE_STOP_SIGTERM:
|
||||
case SERVICE_STOP_SIGKILL:
|
||||
|
||||
@ -2392,6 +2400,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
||||
service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
|
||||
break;
|
||||
|
||||
case SERVICE_STOP_SIGABRT:
|
||||
case SERVICE_STOP_SIGTERM:
|
||||
case SERVICE_STOP_SIGKILL:
|
||||
if (main_pid_good(s) <= 0)
|
||||
@ -2461,6 +2470,12 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
|
||||
service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
|
||||
break;
|
||||
|
||||
case SERVICE_STOP_SIGABRT:
|
||||
log_warning_unit(UNIT(s)->id,
|
||||
"%s stop-sigabrt timed out. Terminating.", UNIT(s)->id);
|
||||
service_enter_signal(s, SERVICE_STOP_SIGTERM, s->result);
|
||||
break;
|
||||
|
||||
case SERVICE_STOP_SIGTERM:
|
||||
if (s->kill_context.send_sigkill) {
|
||||
log_warning_unit(UNIT(s)->id, "%s stop-sigterm timed out. Killing.", UNIT(s)->id);
|
||||
@ -2528,7 +2543,7 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
|
||||
log_error_unit(UNIT(s)->id, "%s watchdog timeout (limit %s)!", UNIT(s)->id,
|
||||
format_timespan(t, sizeof(t), s->watchdog_usec, 1));
|
||||
|
||||
service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_WATCHDOG);
|
||||
service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ typedef enum ServiceState {
|
||||
SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
|
||||
SERVICE_RELOAD,
|
||||
SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */
|
||||
SERVICE_STOP_SIGABRT, /* Watchdog timeout */
|
||||
SERVICE_STOP_SIGTERM,
|
||||
SERVICE_STOP_SIGKILL,
|
||||
SERVICE_STOP_POST,
|
||||
|
@ -1578,7 +1578,8 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
|
||||
r = unit_kill_context(
|
||||
UNIT(s),
|
||||
&s->kill_context,
|
||||
state != SOCKET_STOP_PRE_SIGTERM && state != SOCKET_FINAL_SIGTERM,
|
||||
(state != SOCKET_STOP_PRE_SIGTERM && state != SOCKET_FINAL_SIGTERM) ?
|
||||
KILL_KILL : KILL_TERMINATE,
|
||||
-1,
|
||||
s->control_pid,
|
||||
false);
|
||||
|
@ -687,7 +687,8 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
|
||||
r = unit_kill_context(
|
||||
UNIT(s),
|
||||
&s->kill_context,
|
||||
state != SWAP_ACTIVATING_SIGTERM && state != SWAP_DEACTIVATING_SIGTERM,
|
||||
(state != SWAP_ACTIVATING_SIGTERM && state != SWAP_DEACTIVATING_SIGTERM) ?
|
||||
KILL_KILL : KILL_TERMINATE,
|
||||
-1,
|
||||
s->control_pid,
|
||||
false);
|
||||
|
@ -3313,7 +3313,7 @@ int unit_make_transient(Unit *u) {
|
||||
int unit_kill_context(
|
||||
Unit *u,
|
||||
KillContext *c,
|
||||
bool sigkill,
|
||||
KillOperation k,
|
||||
pid_t main_pid,
|
||||
pid_t control_pid,
|
||||
bool main_pid_alien) {
|
||||
@ -3326,7 +3326,19 @@ int unit_kill_context(
|
||||
if (c->kill_mode == KILL_NONE)
|
||||
return 0;
|
||||
|
||||
sig = sigkill ? SIGKILL : c->kill_signal;
|
||||
switch (k) {
|
||||
case KILL_KILL:
|
||||
sig = SIGKILL;
|
||||
break;
|
||||
case KILL_ABORT:
|
||||
sig = SIGABRT;
|
||||
break;
|
||||
case KILL_TERMINATE:
|
||||
sig = c->kill_signal;
|
||||
break;
|
||||
default:
|
||||
assert_not_reached("KillOperation unknown");
|
||||
}
|
||||
|
||||
if (main_pid > 0) {
|
||||
r = kill_and_sigcont(main_pid, sig);
|
||||
@ -3340,7 +3352,7 @@ int unit_kill_context(
|
||||
if (!main_pid_alien)
|
||||
wait_for_exit = true;
|
||||
|
||||
if (c->send_sighup && !sigkill)
|
||||
if (c->send_sighup && k != KILL_KILL)
|
||||
kill(main_pid, SIGHUP);
|
||||
}
|
||||
}
|
||||
@ -3356,12 +3368,12 @@ int unit_kill_context(
|
||||
} else {
|
||||
wait_for_exit = true;
|
||||
|
||||
if (c->send_sighup && !sigkill)
|
||||
if (c->send_sighup && k != KILL_KILL)
|
||||
kill(control_pid, SIGHUP);
|
||||
}
|
||||
}
|
||||
|
||||
if ((c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && sigkill)) && u->cgroup_path) {
|
||||
if ((c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL)) && u->cgroup_path) {
|
||||
_cleanup_set_free_ Set *pid_set = NULL;
|
||||
|
||||
/* Exclude the main/control pids from being killed via the cgroup */
|
||||
@ -3385,7 +3397,7 @@ int unit_kill_context(
|
||||
|
||||
/* wait_for_exit = true; */
|
||||
|
||||
if (c->send_sighup && !sigkill) {
|
||||
if (c->send_sighup && k != KILL_KILL) {
|
||||
set_free(pid_set);
|
||||
|
||||
pid_set = unit_pid_set(main_pid, control_pid);
|
||||
|
@ -54,6 +54,12 @@ enum UnitActiveState {
|
||||
_UNIT_ACTIVE_STATE_INVALID = -1
|
||||
};
|
||||
|
||||
typedef enum KillOperation {
|
||||
KILL_TERMINATE,
|
||||
KILL_KILL,
|
||||
KILL_ABORT,
|
||||
} KillOperation;
|
||||
|
||||
static inline bool UNIT_IS_ACTIVE_OR_RELOADING(UnitActiveState t) {
|
||||
return t == UNIT_ACTIVE || t == UNIT_RELOADING;
|
||||
}
|
||||
@ -576,7 +582,7 @@ int unit_write_drop_in_private_format(Unit *u, UnitSetPropertiesMode mode, const
|
||||
|
||||
int unit_remove_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name);
|
||||
|
||||
int unit_kill_context(Unit *u, KillContext *c, bool sigkill, pid_t main_pid, pid_t control_pid, bool main_pid_alien);
|
||||
int unit_kill_context(Unit *u, KillContext *c, KillOperation k, pid_t main_pid, pid_t control_pid, bool main_pid_alien);
|
||||
|
||||
int unit_make_transient(Unit *u);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user