1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-12 13:18:14 +03:00

oomd: handle situations when no cgroups are killed

Currently if systemd-oomd doesn't kill anything in a selected cgroup, it
selects a new candidate immediately. But if a selected cgroup wasn't killed,
it is likely due to it disappearing or getting cleaned up between the time
it was selected as a candidate and getting sent SIGKILL(s). We should handle
it as though systemd-oomd did perform a kill so that it will check
swap/pressure again before it tries to select a new candidate.
This commit is contained in:
Anita Zhang 2022-01-19 13:26:01 -08:00
parent 2ee209466b
commit 914d4e99f4
2 changed files with 12 additions and 9 deletions

View File

@ -410,7 +410,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
if (r < 0)
log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m");
else {
if (selected)
if (selected && r > 0)
log_notice("Killed %s due to memory used (%"PRIu64") / total (%"PRIu64") and "
"swap used (%"PRIu64") / total (%"PRIu64") being more than "
PERMYRIAD_AS_PERCENT_FORMAT_STR,
@ -518,9 +518,13 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
if (r < 0)
log_notice_errno(r, "Failed to kill any cgroup(s) under %s based on pressure: %m", t->path);
else {
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one.
* If r == 0 then it means there were not eligible candidates, the candidate cgroup
* disappeared, or the candidate cgroup has no processes by the time we tried to kill
* it. In either case, go through the event loop again and select a new candidate if
* pressure is still high. */
m->mem_pressure_post_action_delay_start = usec_now;
if (selected)
if (selected && r > 0)
log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
" for > %s with reclaim activity",
selected, t->path,

View File

@ -206,6 +206,9 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
else if (r < 0)
return r;
if (set_isempty(pids_killed))
log_debug("Nothing killed when attempting to kill %s", path);
r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed));
if (r < 0)
log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m");
@ -231,8 +234,6 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
if (r == 0)
continue; /* We didn't find anything to kill */
if (r == -ENOMEM)
return r; /* Treat oom as a hard error */
if (r < 0) {
@ -245,7 +246,7 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
if (!selected)
return -ENOMEM;
*ret_selected = selected;
return 1;
return r;
}
return ret;
@ -271,8 +272,6 @@ int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run,
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
if (r == 0)
continue; /* We didn't find anything to kill */
if (r == -ENOMEM)
return r; /* Treat oom as a hard error */
if (r < 0) {
@ -285,7 +284,7 @@ int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run,
if (!selected)
return -ENOMEM;
*ret_selected = selected;
return 1;
return r;
}
return ret;