1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-23 21:35:11 +03:00

Merge pull request #19149 from anitazha/oomdlogging

oomd: make it more clear when a kill happens
This commit is contained in:
Luca Boccassi 2021-03-30 19:01:01 +01:00 committed by GitHub
commit ae63987fac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 31 deletions

View File

@ -378,10 +378,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
OomdCGroupContext *t;
SET_FOREACH(t, targets) {
log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC);
_cleanup_free_ char *selected = NULL;
char ts[FORMAT_TIMESPAN_MAX];
r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity",
t->path,
LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
format_timespan(ts, sizeof ts,
m->default_mem_pressure_duration_usec,
USEC_PER_SEC));
r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
@ -389,6 +397,15 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
else {
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
m->post_action_delay_start = usec_now;
if (selected)
log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
" for > %s with reclaim activity",
selected, t->path,
LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
format_timespan(ts, sizeof ts,
m->default_mem_pressure_duration_usec,
USEC_PER_SEC));
return 0;
}
}
@ -397,9 +414,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
_cleanup_free_ char *selected = NULL;
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
m->system_context.swap_used, m->system_context.swap_total,
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
if (r == -ENOMEM)
@ -407,13 +426,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (r < 0)
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
r = oomd_kill_by_swap_usage(candidates, m->dry_run);
r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m");
else {
m->post_action_delay_start = usec_now;
if (selected)
log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than "
PERMYRIAD_AS_PERCENT_FORMAT_STR,
selected, m->system_context.swap_used, m->system_context.swap_total,
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
return 0;
}
}

View File

@ -208,54 +208,82 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
return set_size(pids_killed) != 0;
}
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) {
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
int r;
int n, r, ret = 0;
assert(h);
assert(ret_selected);
r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
if (r < 0)
return r;
n = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
if (n < 0)
return n;
for (int i = 0; i < r; i++) {
/* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */
/* Don't break since there might be "avoid" cgroups at the end. */
for (int i = 0; i < n; i++) {
/* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure.
* Continue since there might be "avoid" cgroups at the end. */
if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
if (r > 0 || r == -ENOMEM)
break;
if (r == 0)
continue; /* We didn't find anything to kill */
if (r == -ENOMEM)
return r; /* Treat oom as a hard error */
if (r < 0) {
if (ret == 0)
ret = r;
continue; /* Try to find something else to kill */
}
char *selected = strdup(sorted[i]->path);
if (!selected)
return -ENOMEM;
*ret_selected = selected;
return 1;
}
return r;
return ret;
}
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
int r;
int n, r, ret = 0;
assert(h);
assert(ret_selected);
r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
if (r < 0)
return r;
n = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
if (n < 0)
return n;
/* Try to kill cgroups with non-zero swap usage until we either succeed in
* killing or we get to a cgroup with no swap usage. */
for (int i = 0; i < r; i++) {
/* Skip over cgroups with no resource usage. Don't break since there might be "avoid"
* cgroups at the end. */
for (int i = 0; i < n; i++) {
/* Skip over cgroups with no resource usage.
* Continue break since there might be "avoid" cgroups at the end. */
if (sorted[i]->swap_usage == 0)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
if (r > 0 || r == -ENOMEM)
break;
if (r == 0)
continue; /* We didn't find anything to kill */
if (r == -ENOMEM)
return r; /* Treat oom as a hard error */
if (r < 0) {
if (ret == 0)
ret = r;
continue; /* Try to find something else to kill */
}
char *selected = strdup(sorted[i]->path);
if (!selected)
return -ENOMEM;
*ret_selected = selected;
return 1;
}
return r;
return ret;
}
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {

View File

@ -122,9 +122,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
* everything in `h` is a candidate. */
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run);
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
* everything in `h` is a candidate.
* Returns the killed cgroup in ret_selected. */
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected);
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);