mirror of
https://github.com/systemd/systemd.git
synced 2025-01-11 09:18:07 +03:00
Merge pull request #28215 from poettering/start-as-restart
pid1: enqueue auto-restart job for the unit itself as JOB_START and for dependending units as JOB_RESTART
This commit is contained in:
commit
f96eeea71a
@ -204,6 +204,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
|
||||
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
|
||||
[SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned",
|
||||
[SERVICE_AUTO_RESTART] = "auto-restart",
|
||||
[SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued",
|
||||
[SERVICE_CLEANING] = "cleaning",
|
||||
};
|
||||
|
||||
|
@ -149,6 +149,7 @@ typedef enum ServiceState {
|
||||
SERVICE_FAILED_BEFORE_AUTO_RESTART,
|
||||
SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */
|
||||
SERVICE_AUTO_RESTART,
|
||||
SERVICE_AUTO_RESTART_QUEUED,
|
||||
SERVICE_CLEANING,
|
||||
_SERVICE_STATE_MAX,
|
||||
_SERVICE_STATE_INVALID = -EINVAL,
|
||||
|
@ -1634,6 +1634,7 @@ static const char* const job_mode_table[_JOB_MODE_MAX] = {
|
||||
[JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
|
||||
[JOB_IGNORE_REQUIREMENTS] = "ignore-requirements",
|
||||
[JOB_TRIGGERING] = "triggering",
|
||||
[JOB_RESTART_DEPENDENCIES] = "restart-dependencies",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
|
||||
|
@ -79,6 +79,7 @@ enum JobMode {
|
||||
JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */
|
||||
JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */
|
||||
JOB_TRIGGERING, /* Adds TRIGGERED_BY dependencies to the same transaction */
|
||||
JOB_RESTART_DEPENDENCIES,/* A "start" job for the specified unit becomes "restart" for depending units */
|
||||
_JOB_MODE_MAX,
|
||||
_JOB_MODE_INVALID = -EINVAL,
|
||||
};
|
||||
|
@ -2044,6 +2044,9 @@ int manager_add_job(
|
||||
if (mode == JOB_TRIGGERING && type != JOB_STOP)
|
||||
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=triggering is only valid for stop.");
|
||||
|
||||
if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START)
|
||||
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start.");
|
||||
|
||||
log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
|
||||
|
||||
type = job_type_collapse(type, unit);
|
||||
@ -2059,7 +2062,8 @@ int manager_add_job(
|
||||
/* by= */ NULL,
|
||||
TRANSACTION_MATTERS |
|
||||
(IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS) ? TRANSACTION_IGNORE_REQUIREMENTS : 0) |
|
||||
(mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0),
|
||||
(mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0) |
|
||||
(mode == JOB_RESTART_DEPENDENCIES ? TRANSACTION_PROPAGATE_START_AS_RESTART : 0),
|
||||
error);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -72,6 +72,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
|
||||
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
|
||||
[SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
|
||||
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
|
||||
[SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
|
||||
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
|
||||
};
|
||||
|
||||
@ -101,6 +102,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
|
||||
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
|
||||
[SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
|
||||
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
|
||||
[SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
|
||||
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
|
||||
};
|
||||
|
||||
@ -285,12 +287,10 @@ usec_t service_restart_usec_next(Service *s) {
|
||||
|
||||
assert(s);
|
||||
|
||||
/* When the service state is in SERVICE_*_BEFORE_AUTO_RESTART or SERVICE_AUTO_RESTART,
|
||||
* we still need to add 1 to s->n_restarts manually because s->n_restarts is not updated
|
||||
* until a restart job is enqueued. Note that for SERVICE_AUTO_RESTART, that might have been
|
||||
* the case, i.e. s->n_restarts is already increased. But we assume it's not since the time
|
||||
* between job enqueuing and running is usually neglectable compared to the time we'll be sleeping. */
|
||||
n_restarts_next = s->n_restarts + 1;
|
||||
/* When the service state is in SERVICE_*_BEFORE_AUTO_RESTART or SERVICE_AUTO_RESTART, we still need
|
||||
* to add 1 to s->n_restarts manually, because s->n_restarts is not updated until a restart job is
|
||||
* enqueued, i.e. state has transitioned to SERVICE_AUTO_RESTART_QUEUED. */
|
||||
n_restarts_next = s->n_restarts + (s->state == SERVICE_AUTO_RESTART_QUEUED ? 0 : 1);
|
||||
|
||||
if (n_restarts_next <= 1 ||
|
||||
s->restart_steps == 0 ||
|
||||
@ -303,7 +303,7 @@ usec_t service_restart_usec_next(Service *s) {
|
||||
/* Enforced in service_verify() and above */
|
||||
assert(s->restart_max_delay_usec > s->restart_usec);
|
||||
|
||||
/* ((restart_usec_max - restart_usec)^(1/restart_steps))^(n_restart_next - 1) */
|
||||
/* ((restart_max_delay_usec - restart_usec)^(1/restart_steps))^(n_restart_next - 1) */
|
||||
value = usec_add(s->restart_usec,
|
||||
(usec_t) powl(s->restart_max_delay_usec - s->restart_usec,
|
||||
(long double) (n_restarts_next - 1) / s->restart_steps));
|
||||
@ -1258,7 +1258,7 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
|
||||
if (IN_SET(state,
|
||||
SERVICE_DEAD, SERVICE_FAILED,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
|
||||
SERVICE_DEAD_RESOURCES_PINNED)) {
|
||||
unit_unwatch_all_pids(UNIT(s));
|
||||
unit_dequeue_rewatch_pids(UNIT(s));
|
||||
@ -1363,7 +1363,7 @@ static int service_coldplug(Unit *u) {
|
||||
|
||||
if (!IN_SET(s->deserialized_state,
|
||||
SERVICE_DEAD, SERVICE_FAILED,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
|
||||
SERVICE_CLEANING,
|
||||
SERVICE_DEAD_RESOURCES_PINNED)) {
|
||||
(void) unit_enqueue_rewatch_pids(u);
|
||||
@ -1945,7 +1945,7 @@ static bool service_will_restart(Unit *u) {
|
||||
|
||||
assert(s);
|
||||
|
||||
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
|
||||
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED))
|
||||
return true;
|
||||
|
||||
return unit_will_restart_default(u);
|
||||
@ -2511,17 +2511,15 @@ static void service_enter_restart(Service *s) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Any units that are bound to this service must also be
|
||||
* restarted. We use JOB_RESTART (instead of the more obvious
|
||||
* JOB_START) here so that those dependency jobs will be added
|
||||
* as well. */
|
||||
r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, NULL, &error, NULL);
|
||||
/* Any units that are bound to this service must also be restarted. We use JOB_START for ourselves
|
||||
* but then set JOB_RESTART_DEPENDENCIES which will enqueue JOB_RESTART for those dependency jobs. */
|
||||
r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(s), JOB_RESTART_DEPENDENCIES, NULL, &error, NULL);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
||||
/* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully
|
||||
* stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset the counter
|
||||
* explicitly however via the usual "systemctl reset-failure" logic. */
|
||||
/* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't
|
||||
* fully stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset
|
||||
* the counter explicitly however via the usual "systemctl reset-failure" logic. */
|
||||
s->n_restarts ++;
|
||||
s->flush_n_restarts = false;
|
||||
|
||||
@ -2534,13 +2532,10 @@ static void service_enter_restart(Service *s) {
|
||||
"Scheduled restart job, restart counter is at %u.", s->n_restarts),
|
||||
"N_RESTARTS=%u", s->n_restarts);
|
||||
|
||||
service_set_state(s, SERVICE_AUTO_RESTART_QUEUED);
|
||||
|
||||
/* Notify clients about changed restart counter */
|
||||
unit_add_to_dbus_queue(UNIT(s));
|
||||
|
||||
/* Note that we stay in the SERVICE_AUTO_RESTART state here,
|
||||
* it will be canceled as part of the service_stop() call that
|
||||
* is executed as part of JOB_RESTART. */
|
||||
|
||||
return;
|
||||
|
||||
fail:
|
||||
@ -2719,7 +2714,7 @@ static int service_start(Unit *u) {
|
||||
if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
|
||||
return -EAGAIN;
|
||||
|
||||
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED));
|
||||
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED, SERVICE_AUTO_RESTART_QUEUED));
|
||||
|
||||
r = unit_acquire_invocation_id(u);
|
||||
if (r < 0)
|
||||
@ -2777,7 +2772,8 @@ static int service_stop(Unit *u) {
|
||||
return 0;
|
||||
|
||||
case SERVICE_AUTO_RESTART:
|
||||
/* A restart will be scheduled or is in progress. */
|
||||
case SERVICE_AUTO_RESTART_QUEUED:
|
||||
/* Give up on the auto restart */
|
||||
service_set_state(s, service_determine_dead_state(s));
|
||||
return 0;
|
||||
|
||||
@ -3650,6 +3646,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
|
||||
/* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
|
||||
* up the cgroup earlier and should do it now. */
|
||||
case SERVICE_AUTO_RESTART:
|
||||
case SERVICE_AUTO_RESTART_QUEUED:
|
||||
unit_prune_cgroup(u);
|
||||
break;
|
||||
|
||||
|
@ -1054,34 +1054,38 @@ int transaction_add_job_and_dependencies(
|
||||
}
|
||||
}
|
||||
|
||||
if (IN_SET(type, JOB_STOP, JOB_RESTART)) {
|
||||
_cleanup_set_free_ Set *propagated_restart = NULL;
|
||||
/* We propagate STOP as STOP, but RESTART only as TRY_RESTART, in order not to start
|
||||
* dependencies that are not around. */
|
||||
_cleanup_set_free_ Set *propagated_restart = NULL;
|
||||
|
||||
if (type == JOB_RESTART)
|
||||
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
|
||||
JobType nt;
|
||||
if (type == JOB_RESTART || (type == JOB_START && FLAGS_SET(flags, TRANSACTION_PROPAGATE_START_AS_RESTART))) {
|
||||
|
||||
r = set_ensure_put(&propagated_restart, NULL, dep);
|
||||
if (r < 0)
|
||||
/* We propagate RESTART only as TRY_RESTART, in order not to start dependencies that
|
||||
* are not around. */
|
||||
|
||||
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
|
||||
JobType nt;
|
||||
|
||||
r = set_ensure_put(&propagated_restart, NULL, dep);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
nt = job_type_collapse(JOB_TRY_RESTART, dep);
|
||||
if (nt == JOB_NOP)
|
||||
continue;
|
||||
|
||||
r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
|
||||
if (r < 0) {
|
||||
if (r != -EBADR) /* job type not applicable */
|
||||
return r;
|
||||
|
||||
nt = job_type_collapse(JOB_TRY_RESTART, dep);
|
||||
if (nt == JOB_NOP)
|
||||
continue;
|
||||
|
||||
r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
|
||||
if (r < 0) {
|
||||
if (r != -EBADR) /* job type not applicable */
|
||||
return r;
|
||||
|
||||
sd_bus_error_free(e);
|
||||
}
|
||||
sd_bus_error_free(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (type == JOB_STOP) {
|
||||
/* The 'stop' part of a restart job is also propagated to units with
|
||||
* UNIT_ATOM_PROPAGATE_STOP */
|
||||
|
||||
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_STOP) {
|
||||
/* Units experienced restart propagation are skipped */
|
||||
if (set_contains(propagated_restart, dep))
|
||||
|
@ -21,10 +21,13 @@ Transaction *transaction_abort_and_free(Transaction *tr);
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free);
|
||||
|
||||
typedef enum TransactionAddFlags {
|
||||
TRANSACTION_MATTERS = 1 << 0,
|
||||
TRANSACTION_CONFLICTS = 1 << 1,
|
||||
TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
|
||||
TRANSACTION_IGNORE_ORDER = 1 << 3,
|
||||
TRANSACTION_MATTERS = 1 << 0,
|
||||
TRANSACTION_CONFLICTS = 1 << 1,
|
||||
TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
|
||||
TRANSACTION_IGNORE_ORDER = 1 << 3,
|
||||
|
||||
/* Propagate a START job to other units like a RESTART */
|
||||
TRANSACTION_PROPAGATE_START_AS_RESTART = 1 << 4,
|
||||
} TransactionAddFlags;
|
||||
|
||||
void transaction_add_propagate_reload_jobs(
|
||||
|
3
test/units/success-failure-test-failure.service
Normal file
3
test/units/success-failure-test-failure.service
Normal file
@ -0,0 +1,3 @@
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=bash -c "echo failure >> /tmp/success-failure-test-result && systemd-notify --ready && sleep infinity"
|
3
test/units/success-failure-test-success.service
Normal file
3
test/units/success-failure-test-success.service
Normal file
@ -0,0 +1,3 @@
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=bash -c "echo success >> /tmp/success-failure-test-result && systemd-notify --ready && sleep infinity"
|
9
test/units/success-failure-test.service
Normal file
9
test/units/success-failure-test.service
Normal file
@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
OnSuccess=success-failure-test-success.service
|
||||
OnFailure=success-failure-test-failure.service
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
Restart=always
|
||||
ExecStart=bash -c 'test -f /tmp/success-failure-test-ran && touch /tmp/success-failure-test-ran2 && systemd-notify --ready && sleep infinity'
|
||||
ExecStopPost=touch /tmp/success-failure-test-ran
|
49
test/units/testsuite-23.success-failure.sh
Executable file
49
test/units/testsuite-23.success-failure.sh
Executable file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
set -eux
|
||||
set -o pipefail
|
||||
|
||||
# Test OnSuccess=/OnFailure= in combination
|
||||
|
||||
systemd-analyze log-level debug
|
||||
|
||||
# Start-up should fail, but the automatic restart should fix it
|
||||
(! systemctl start success-failure-test )
|
||||
|
||||
# Wait until the first invocation finished & failed
|
||||
while test ! -f /tmp/success-failure-test-ran ; do
|
||||
sleep .5
|
||||
done
|
||||
|
||||
# Wait until the second invocation finished & succeeded
|
||||
while test ! -f /tmp/success-failure-test-ran2 ; do
|
||||
sleep .5
|
||||
done
|
||||
|
||||
# Verify it is indeed running
|
||||
systemctl is-active -q success-failure-test
|
||||
|
||||
# The above should have caused the failure service to start (asynchronously)
|
||||
while test "$(systemctl is-active success-failure-test-failure)" != "active" ; do
|
||||
sleep .5
|
||||
done
|
||||
|
||||
# But the success service should not have started
|
||||
test "$(systemctl is-active success-failure-test-success)" = "inactive"
|
||||
|
||||
systemctl stop success-failure-test-failure
|
||||
|
||||
# Do a clean kill of the service now
|
||||
systemctl kill success-failure-test
|
||||
|
||||
# This should result in the success service to start
|
||||
while test "$(systemctl is-active success-failure-test-success)" != "active" ; do
|
||||
sleep .5
|
||||
done
|
||||
|
||||
# But the failure service should not have started again
|
||||
test "$(systemctl is-active success-failure-test-failure)" = "inactive"
|
||||
|
||||
systemctl stop success-failure-test success-failure-test-success
|
||||
|
||||
systemd-analyze log-level info
|
Loading…
Reference in New Issue
Block a user