1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-11 09:18:07 +03:00

Merge pull request #28215 from poettering/start-as-restart

pid1: enqueue auto-restart job for the unit itself as JOB_START and for dependending units as JOB_RESTART
This commit is contained in:
Mike Yuan 2023-07-04 05:54:56 +08:00 committed by GitHub
commit f96eeea71a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 126 additions and 50 deletions

View File

@ -204,6 +204,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
[SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned",
[SERVICE_AUTO_RESTART] = "auto-restart",
[SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued",
[SERVICE_CLEANING] = "cleaning",
};

View File

@ -149,6 +149,7 @@ typedef enum ServiceState {
SERVICE_FAILED_BEFORE_AUTO_RESTART,
SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */
SERVICE_AUTO_RESTART,
SERVICE_AUTO_RESTART_QUEUED,
SERVICE_CLEANING,
_SERVICE_STATE_MAX,
_SERVICE_STATE_INVALID = -EINVAL,

View File

@ -1634,6 +1634,7 @@ static const char* const job_mode_table[_JOB_MODE_MAX] = {
[JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
[JOB_IGNORE_REQUIREMENTS] = "ignore-requirements",
[JOB_TRIGGERING] = "triggering",
[JOB_RESTART_DEPENDENCIES] = "restart-dependencies",
};
DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);

View File

@ -79,6 +79,7 @@ enum JobMode {
JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */
JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */
JOB_TRIGGERING, /* Adds TRIGGERED_BY dependencies to the same transaction */
JOB_RESTART_DEPENDENCIES,/* A "start" job for the specified unit becomes "restart" for depending units */
_JOB_MODE_MAX,
_JOB_MODE_INVALID = -EINVAL,
};

View File

@ -2044,6 +2044,9 @@ int manager_add_job(
if (mode == JOB_TRIGGERING && type != JOB_STOP)
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=triggering is only valid for stop.");
if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START)
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start.");
log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
type = job_type_collapse(type, unit);
@ -2059,7 +2062,8 @@ int manager_add_job(
/* by= */ NULL,
TRANSACTION_MATTERS |
(IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS) ? TRANSACTION_IGNORE_REQUIREMENTS : 0) |
(mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0),
(mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0) |
(mode == JOB_RESTART_DEPENDENCIES ? TRANSACTION_PROPAGATE_START_AS_RESTART : 0),
error);
if (r < 0)
return r;

View File

@ -72,6 +72,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
[SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
[SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
};
@ -101,6 +102,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
[SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
[SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
};
@ -285,12 +287,10 @@ usec_t service_restart_usec_next(Service *s) {
assert(s);
/* When the service state is in SERVICE_*_BEFORE_AUTO_RESTART or SERVICE_AUTO_RESTART,
* we still need to add 1 to s->n_restarts manually because s->n_restarts is not updated
* until a restart job is enqueued. Note that for SERVICE_AUTO_RESTART, that might have been
* the case, i.e. s->n_restarts is already increased. But we assume it's not since the time
* between job enqueuing and running is usually neglectable compared to the time we'll be sleeping. */
n_restarts_next = s->n_restarts + 1;
/* When the service state is in SERVICE_*_BEFORE_AUTO_RESTART or SERVICE_AUTO_RESTART, we still need
* to add 1 to s->n_restarts manually, because s->n_restarts is not updated until a restart job is
* enqueued, i.e. state has transitioned to SERVICE_AUTO_RESTART_QUEUED. */
n_restarts_next = s->n_restarts + (s->state == SERVICE_AUTO_RESTART_QUEUED ? 0 : 1);
if (n_restarts_next <= 1 ||
s->restart_steps == 0 ||
@ -303,7 +303,7 @@ usec_t service_restart_usec_next(Service *s) {
/* Enforced in service_verify() and above */
assert(s->restart_max_delay_usec > s->restart_usec);
/* ((restart_usec_max - restart_usec)^(1/restart_steps))^(n_restart_next - 1) */
/* ((restart_max_delay_usec - restart_usec)^(1/restart_steps))^(n_restart_next - 1) */
value = usec_add(s->restart_usec,
(usec_t) powl(s->restart_max_delay_usec - s->restart_usec,
(long double) (n_restarts_next - 1) / s->restart_steps));
@ -1258,7 +1258,7 @@ static void service_set_state(Service *s, ServiceState state) {
if (IN_SET(state,
SERVICE_DEAD, SERVICE_FAILED,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
SERVICE_DEAD_RESOURCES_PINNED)) {
unit_unwatch_all_pids(UNIT(s));
unit_dequeue_rewatch_pids(UNIT(s));
@ -1363,7 +1363,7 @@ static int service_coldplug(Unit *u) {
if (!IN_SET(s->deserialized_state,
SERVICE_DEAD, SERVICE_FAILED,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
SERVICE_CLEANING,
SERVICE_DEAD_RESOURCES_PINNED)) {
(void) unit_enqueue_rewatch_pids(u);
@ -1945,7 +1945,7 @@ static bool service_will_restart(Unit *u) {
assert(s);
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED))
return true;
return unit_will_restart_default(u);
@ -2511,17 +2511,15 @@ static void service_enter_restart(Service *s) {
return;
}
/* Any units that are bound to this service must also be
* restarted. We use JOB_RESTART (instead of the more obvious
* JOB_START) here so that those dependency jobs will be added
* as well. */
r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, NULL, &error, NULL);
/* Any units that are bound to this service must also be restarted. We use JOB_START for ourselves
* but then set JOB_RESTART_DEPENDENCIES which will enqueue JOB_RESTART for those dependency jobs. */
r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(s), JOB_RESTART_DEPENDENCIES, NULL, &error, NULL);
if (r < 0)
goto fail;
/* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully
* stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset the counter
* explicitly however via the usual "systemctl reset-failure" logic. */
/* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't
* fully stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset
* the counter explicitly however via the usual "systemctl reset-failure" logic. */
s->n_restarts ++;
s->flush_n_restarts = false;
@ -2534,13 +2532,10 @@ static void service_enter_restart(Service *s) {
"Scheduled restart job, restart counter is at %u.", s->n_restarts),
"N_RESTARTS=%u", s->n_restarts);
service_set_state(s, SERVICE_AUTO_RESTART_QUEUED);
/* Notify clients about changed restart counter */
unit_add_to_dbus_queue(UNIT(s));
/* Note that we stay in the SERVICE_AUTO_RESTART state here,
* it will be canceled as part of the service_stop() call that
* is executed as part of JOB_RESTART. */
return;
fail:
@ -2719,7 +2714,7 @@ static int service_start(Unit *u) {
if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
return -EAGAIN;
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED));
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED, SERVICE_AUTO_RESTART_QUEUED));
r = unit_acquire_invocation_id(u);
if (r < 0)
@ -2777,7 +2772,8 @@ static int service_stop(Unit *u) {
return 0;
case SERVICE_AUTO_RESTART:
/* A restart will be scheduled or is in progress. */
case SERVICE_AUTO_RESTART_QUEUED:
/* Give up on the auto restart */
service_set_state(s, service_determine_dead_state(s));
return 0;
@ -3650,6 +3646,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
/* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
* up the cgroup earlier and should do it now. */
case SERVICE_AUTO_RESTART:
case SERVICE_AUTO_RESTART_QUEUED:
unit_prune_cgroup(u);
break;

View File

@ -1054,34 +1054,38 @@ int transaction_add_job_and_dependencies(
}
}
if (IN_SET(type, JOB_STOP, JOB_RESTART)) {
_cleanup_set_free_ Set *propagated_restart = NULL;
/* We propagate STOP as STOP, but RESTART only as TRY_RESTART, in order not to start
* dependencies that are not around. */
_cleanup_set_free_ Set *propagated_restart = NULL;
if (type == JOB_RESTART)
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
JobType nt;
if (type == JOB_RESTART || (type == JOB_START && FLAGS_SET(flags, TRANSACTION_PROPAGATE_START_AS_RESTART))) {
r = set_ensure_put(&propagated_restart, NULL, dep);
if (r < 0)
/* We propagate RESTART only as TRY_RESTART, in order not to start dependencies that
* are not around. */
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
JobType nt;
r = set_ensure_put(&propagated_restart, NULL, dep);
if (r < 0)
return r;
nt = job_type_collapse(JOB_TRY_RESTART, dep);
if (nt == JOB_NOP)
continue;
r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
if (r < 0) {
if (r != -EBADR) /* job type not applicable */
return r;
nt = job_type_collapse(JOB_TRY_RESTART, dep);
if (nt == JOB_NOP)
continue;
r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
if (r < 0) {
if (r != -EBADR) /* job type not applicable */
return r;
sd_bus_error_free(e);
}
sd_bus_error_free(e);
}
}
}
if (type == JOB_STOP) {
/* The 'stop' part of a restart job is also propagated to units with
* UNIT_ATOM_PROPAGATE_STOP */
UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_STOP) {
/* Units experienced restart propagation are skipped */
if (set_contains(propagated_restart, dep))

View File

@ -21,10 +21,13 @@ Transaction *transaction_abort_and_free(Transaction *tr);
DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free);
typedef enum TransactionAddFlags {
TRANSACTION_MATTERS = 1 << 0,
TRANSACTION_CONFLICTS = 1 << 1,
TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
TRANSACTION_IGNORE_ORDER = 1 << 3,
TRANSACTION_MATTERS = 1 << 0,
TRANSACTION_CONFLICTS = 1 << 1,
TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
TRANSACTION_IGNORE_ORDER = 1 << 3,
/* Propagate a START job to other units like a RESTART */
TRANSACTION_PROPAGATE_START_AS_RESTART = 1 << 4,
} TransactionAddFlags;
void transaction_add_propagate_reload_jobs(

View File

@ -0,0 +1,3 @@
[Service]
Type=notify
ExecStart=bash -c "echo failure >> /tmp/success-failure-test-result && systemd-notify --ready && sleep infinity"

View File

@ -0,0 +1,3 @@
[Service]
Type=notify
ExecStart=bash -c "echo success >> /tmp/success-failure-test-result && systemd-notify --ready && sleep infinity"

View File

@ -0,0 +1,9 @@
[Unit]
OnSuccess=success-failure-test-success.service
OnFailure=success-failure-test-failure.service
[Service]
Type=notify
Restart=always
ExecStart=bash -c 'test -f /tmp/success-failure-test-ran && touch /tmp/success-failure-test-ran2 && systemd-notify --ready && sleep infinity'
ExecStopPost=touch /tmp/success-failure-test-ran

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
# Test OnSuccess=/OnFailure= in combination
systemd-analyze log-level debug
# Start-up should fail, but the automatic restart should fix it
(! systemctl start success-failure-test )
# Wait until the first invocation finished & failed
while test ! -f /tmp/success-failure-test-ran ; do
sleep .5
done
# Wait until the second invocation finished & succeeded
while test ! -f /tmp/success-failure-test-ran2 ; do
sleep .5
done
# Verify it is indeed running
systemctl is-active -q success-failure-test
# The above should have caused the failure service to start (asynchronously)
while test "$(systemctl is-active success-failure-test-failure)" != "active" ; do
sleep .5
done
# But the success service should not have started
test "$(systemctl is-active success-failure-test-success)" = "inactive"
systemctl stop success-failure-test-failure
# Do a clean kill of the service now
systemctl kill success-failure-test
# This should result in the success service to start
while test "$(systemctl is-active success-failure-test-success)" != "active" ; do
sleep .5
done
# But the failure service should not have started again
test "$(systemctl is-active success-failure-test-failure)" = "inactive"
systemctl stop success-failure-test success-failure-test-success
systemd-analyze log-level info