1
0
mirror of https://github.com/systemd/systemd.git synced 2024-10-30 14:55:37 +03:00

core: change BoundBy= dependency handling to be processed by a deferred work queue

So far StopWhenUnneeded= handling and UpheldBy= handling was already
processed by a queue that is dispatched in a deferred mode of operation
instead of instantly. This changes BoundBy= handling to be processed the
same way.

This should ensure that all *event*-to-job propagation is done directly
from unit_notify(), while all *state*-to-job propagation is done from a
deferred work queue, quite systematically. The work queue is submitted
to by unit_notify() too.

Key really is the difference between event and state: some jobs shall be
queued one-time on events (think: OnFailure= + OnSuccess= and similar),
others shall be queued continuously when a specific state is in effect
(think: UpheldBy=).  The latter cases are usually effect of the
combination of states of a few units (e.g. StopWhenUnneeded= checks
wether any of the Wants=/Requires=/… deps are still up before acting),
and hence it makes sense to trigger them to be run after an individual
unit's state changed, but process them on a queue that runs whenever
there's nothing else to do that ensures the decision on them is only
taken after all jobs/queued IO events are dispatched, and things
settled, so that it makes sense to come to a combined conclusion. If
we'd dispatch this work immediately inside of unit_notify() we'd always
act instantly, even though another event from another unit that is
already queued might make the work unnecessary or invalid.

This is mostly a commit to make things philosophically clean. It does
not add features, but it should make corner cases more robust.
This commit is contained in:
Lennart Poettering 2021-04-08 22:05:15 +02:00
parent 116654d2cf
commit 56c5959202
6 changed files with 116 additions and 46 deletions

View File

@ -1347,6 +1347,44 @@ static unsigned manager_dispatch_start_when_upheld_queue(Manager *m) {
return n;
}
static unsigned manager_dispatch_stop_when_bound_queue(Manager *m) {
unsigned n = 0;
Unit *u;
int r;
assert(m);
while ((u = m->stop_when_bound_queue)) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
Unit *culprit = NULL;
assert(u->in_stop_when_bound_queue);
LIST_REMOVE(stop_when_bound_queue, m->stop_when_bound_queue, u);
u->in_stop_when_bound_queue = false;
n++;
if (!unit_is_bound_by_inactive(u, &culprit))
continue;
log_unit_debug(u, "Unit is stopped because bound to inactive unit %s.", culprit->id);
/* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
* service being unnecessary after a while. */
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
log_unit_warning(u, "Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.", culprit->id);
continue;
}
r = manager_add_job(u->manager, JOB_STOP, u, JOB_REPLACE, NULL, &error, NULL);
if (r < 0)
log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
}
return n;
}
static void manager_clear_jobs_and_units(Manager *m) {
Unit *u;
@ -1366,6 +1404,7 @@ static void manager_clear_jobs_and_units(Manager *m) {
assert(!m->gc_job_queue);
assert(!m->stop_when_unneeded_queue);
assert(!m->start_when_upheld_queue);
assert(!m->stop_when_bound_queue);
assert(hashmap_isempty(m->jobs));
assert(hashmap_isempty(m->units));
@ -2996,6 +3035,9 @@ int manager_loop(Manager *m) {
if (manager_dispatch_start_when_upheld_queue(m) > 0)
continue;
if (manager_dispatch_stop_when_bound_queue(m) > 0)
continue;
if (manager_dispatch_stop_when_unneeded_queue(m) > 0)
continue;

View File

@ -190,6 +190,9 @@ struct Manager {
/* Units which are upheld by another other which we might need to act on */
LIST_HEAD(Unit, start_when_upheld_queue);
/* Units that have BindsTo= another unit, and might need to be shutdown because the bound unit is not active. */
LIST_HEAD(Unit, stop_when_bound_queue);
sd_event *event;
/* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in

View File

@ -62,6 +62,7 @@ static const UnitDependencyAtom atom_map[_UNIT_DEPENDENCY_MAX] = {
UNIT_ATOM_PROPAGATE_RESTART |
UNIT_ATOM_PROPAGATE_START_FAILURE |
UNIT_ATOM_PINS_STOP_WHEN_UNNEEDED |
UNIT_ATOM_ADD_CANNOT_BE_ACTIVE_WITHOUT_QUEUE |
UNIT_ATOM_DEFAULT_TARGET_DEPENDENCIES,
[UNIT_UPHELD_BY] = UNIT_ATOM_START_STEADILY |
@ -172,7 +173,9 @@ UnitDependency unit_dependency_from_unique_atom(UnitDependencyAtom atom) {
UNIT_ATOM_PROPAGATE_RESTART |
UNIT_ATOM_PROPAGATE_START_FAILURE |
UNIT_ATOM_PINS_STOP_WHEN_UNNEEDED |
UNIT_ATOM_ADD_CANNOT_BE_ACTIVE_WITHOUT_QUEUE |
UNIT_ATOM_DEFAULT_TARGET_DEPENDENCIES:
case UNIT_ATOM_ADD_CANNOT_BE_ACTIVE_WITHOUT_QUEUE:
return UNIT_BOUND_BY;
case UNIT_ATOM_START_STEADILY |

View File

@ -31,6 +31,8 @@ typedef enum UnitDependencyAtom {
/* Stop our unit if the other unit happens to inactive */
UNIT_ATOM_CANNOT_BE_ACTIVE_WITHOUT = UINT64_C(1) << 7,
/* If our unit enters inactive state, add the other unit to the BoundBy= queue */
UNIT_ATOM_ADD_CANNOT_BE_ACTIVE_WITHOUT_QUEUE = UINT64_C(1) << 8,
/* Start this unit whenever we find it inactive and the other unit active */
UNIT_ATOM_START_STEADILY = UINT64_C(1) << 9,

View File

@ -523,6 +523,22 @@ void unit_submit_to_start_when_upheld_queue(Unit *u) {
u->in_start_when_upheld_queue = true;
}
void unit_submit_to_stop_when_bound_queue(Unit *u) {
assert(u);
if (u->in_stop_when_bound_queue)
return;
if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
return;
if (!unit_has_dependency(u, UNIT_ATOM_CANNOT_BE_ACTIVE_WITHOUT, NULL))
return;
LIST_PREPEND(stop_when_bound_queue, u->manager->stop_when_bound_queue, u);
u->in_stop_when_bound_queue = true;
}
static void unit_clear_dependencies(Unit *u) {
assert(u);
@ -738,6 +754,9 @@ Unit* unit_free(Unit *u) {
if (u->in_start_when_upheld_queue)
LIST_REMOVE(start_when_upheld_queue, u->manager->start_when_upheld_queue, u);
if (u->in_stop_when_bound_queue)
LIST_REMOVE(stop_when_bound_queue, u->manager->stop_when_bound_queue, u);
safe_close(u->ip_accounting_ingress_map_fd);
safe_close(u->ip_accounting_egress_map_fd);
@ -2060,6 +2079,38 @@ bool unit_is_upheld_by_active(Unit *u, Unit **ret_culprit) {
return false;
}
bool unit_is_bound_by_inactive(Unit *u, Unit **ret_culprit) {
Unit *other;
assert(u);
/* Checks whether this unit is bound to another unit that is inactive, i.e. whether we should stop
* because the other unit is down.*/
if (unit_active_state(u) != UNIT_ACTIVE || u->job) {
/* Don't clean up while the unit is transitioning or is even inactive. */
if (ret_culprit)
*ret_culprit = NULL;
return false;
}
UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_CANNOT_BE_ACTIVE_WITHOUT) {
if (other->job)
continue;
if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) {
if (*ret_culprit)
*ret_culprit = other;
return true;
}
}
if (ret_culprit)
*ret_culprit = NULL;
return false;
}
static void check_unneeded_dependencies(Unit *u) {
Unit *other;
assert(u);
@ -2080,53 +2131,14 @@ static void check_uphold_dependencies(Unit *u) {
unit_submit_to_start_when_upheld_queue(other);
}
static void unit_check_binds_to(Unit *u) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
bool stop = false;
static void check_bound_by_dependencies(Unit *u) {
Unit *other;
int r;
assert(u);
if (u->job)
return;
/* Add all units this unit depends on to the queue that processes BindsTo= stop behaviour. */
if (unit_active_state(u) != UNIT_ACTIVE)
return;
UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_CANNOT_BE_ACTIVE_WITHOUT) {
if (other->job)
continue;
if (!other->coldplugged)
/* We might yet create a job for the other unit… */
continue;
if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
continue;
stop = true;
break;
}
if (!stop)
return;
/* If stopping a unit fails continuously we might enter a stop
* loop here, hence stop acting on the service being
* unnecessary after a while. */
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
log_unit_warning(u, "Unit is bound to inactive unit %s, but not stopping since we tried this too often recently.", other->id);
return;
}
assert(other);
log_unit_info(u, "Unit is bound to inactive unit %s. Stopping, too.", other->id);
/* A unit we need to run is gone. Sniff. Let's stop this. */
r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, NULL, &error, NULL);
if (r < 0)
log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_ADD_CANNOT_BE_ACTIVE_WITHOUT_QUEUE)
unit_submit_to_stop_when_bound_queue(other);
}
static void retroactively_start_dependencies(Unit *u) {
@ -2654,9 +2666,11 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
retroactively_stop_dependencies(u);
}
/* Stop unneeded units regardless if going down was expected or not */
if (UNIT_IS_INACTIVE_OR_FAILED(ns))
/* Stop unneeded units and bound-by units regardless if going down was expected or not */
if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
check_unneeded_dependencies(u);
check_bound_by_dependencies(u);
}
/* Start uphold units regardless if going up was expected or not */
if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
@ -2703,7 +2717,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
/* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when
* something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive,
* without ever entering started.) */
unit_check_binds_to(u);
unit_submit_to_stop_when_bound_queue(u);
if (os != UNIT_FAILED && ns == UNIT_FAILED) {
reason = strjoina("unit ", u->id, " failed");

View File

@ -235,6 +235,9 @@ typedef struct Unit {
/* Queue of units that have an Uphold= dependency from some other unit, and should be checked for starting */
LIST_FIELDS(Unit, start_when_upheld_queue);
/* Queue of units that have a BindTo= dependency on some other unit, and should possibly be shut down */
LIST_FIELDS(Unit, stop_when_bound_queue);
/* PIDs we keep an eye on. Note that a unit might have many
* more, but these are the ones we care enough about to
* process SIGCHLD for */
@ -387,6 +390,7 @@ typedef struct Unit {
bool in_target_deps_queue:1;
bool in_stop_when_unneeded_queue:1;
bool in_start_when_upheld_queue:1;
bool in_stop_when_bound_queue:1;
bool sent_dbus_new_signal:1;
@ -745,6 +749,7 @@ void unit_add_to_gc_queue(Unit *u);
void unit_add_to_target_deps_queue(Unit *u);
void unit_submit_to_stop_when_unneeded_queue(Unit *u);
void unit_submit_to_start_when_upheld_queue(Unit *u);
void unit_submit_to_stop_when_bound_queue(Unit *u);
int unit_merge(Unit *u, Unit *other);
int unit_merge_by_name(Unit *u, const char *other);
@ -875,6 +880,7 @@ bool unit_is_pristine(Unit *u);
bool unit_is_unneeded(Unit *u);
bool unit_is_upheld_by_active(Unit *u, Unit **ret_culprit);
bool unit_is_bound_by_inactive(Unit *u, Unit **ret_culprit);
pid_t unit_control_pid(Unit *u);
pid_t unit_main_pid(Unit *u);