mirror of
https://github.com/systemd/systemd.git
synced 2024-12-23 21:35:11 +03:00
oomd: make it more clear when a kill happens
Improve the logging to only print if systemd-oomd killed something. And also print which cgroup was targeted. Demote general swap above/pressure above messages to debug. [zjs: fix some issuelets found in review]
This commit is contained in:
parent
b240c08d09
commit
37a7e15968
@ -378,10 +378,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
|
||||
OomdCGroupContext *t;
|
||||
|
||||
SET_FOREACH(t, targets) {
|
||||
log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
|
||||
t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC);
|
||||
_cleanup_free_ char *selected = NULL;
|
||||
char ts[FORMAT_TIMESPAN_MAX];
|
||||
|
||||
r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
|
||||
log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity",
|
||||
t->path,
|
||||
LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
|
||||
LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
|
||||
format_timespan(ts, sizeof ts,
|
||||
m->default_mem_pressure_duration_usec,
|
||||
USEC_PER_SEC));
|
||||
|
||||
r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r < 0)
|
||||
@ -389,6 +397,15 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
|
||||
else {
|
||||
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
|
||||
m->post_action_delay_start = usec_now;
|
||||
if (selected)
|
||||
log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
|
||||
" for > %s with reclaim activity",
|
||||
selected, t->path,
|
||||
LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
|
||||
LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
|
||||
format_timespan(ts, sizeof ts,
|
||||
m->default_mem_pressure_duration_usec,
|
||||
USEC_PER_SEC));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -397,9 +414,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
|
||||
|
||||
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
|
||||
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
|
||||
_cleanup_free_ char *selected = NULL;
|
||||
|
||||
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
||||
m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
||||
log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
||||
m->system_context.swap_used, m->system_context.swap_total,
|
||||
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
||||
|
||||
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
|
||||
if (r == -ENOMEM)
|
||||
@ -407,13 +426,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
|
||||
|
||||
r = oomd_kill_by_swap_usage(candidates, m->dry_run);
|
||||
r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected);
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r < 0)
|
||||
log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m");
|
||||
else {
|
||||
m->post_action_delay_start = usec_now;
|
||||
if (selected)
|
||||
log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than "
|
||||
PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
||||
selected, m->system_context.swap_used, m->system_context.swap_total,
|
||||
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -208,35 +208,50 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
|
||||
return set_size(pids_killed) != 0;
|
||||
}
|
||||
|
||||
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) {
|
||||
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) {
|
||||
_cleanup_free_ OomdCGroupContext **sorted = NULL;
|
||||
int r;
|
||||
int r, ret = 0;
|
||||
|
||||
assert(h);
|
||||
assert(ret_selected);
|
||||
|
||||
r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
for (int i = 0; i < r; i++) {
|
||||
/* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */
|
||||
/* Don't break since there might be "avoid" cgroups at the end. */
|
||||
/* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure.
|
||||
* Continue since there might be "avoid" cgroups at the end. */
|
||||
if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0)
|
||||
continue;
|
||||
|
||||
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
|
||||
if (r > 0 || r == -ENOMEM)
|
||||
break;
|
||||
if (r == 0)
|
||||
continue; /* We didn't find anything to kill */
|
||||
if (r == -ENOMEM)
|
||||
return r; /* Treat oom as a hard error */
|
||||
if (r < 0) {
|
||||
if (ret == 0)
|
||||
ret = r;
|
||||
continue; /* Try to find something else to kill */
|
||||
}
|
||||
|
||||
char *selected = strdup(sorted[i]->path);
|
||||
if (!selected)
|
||||
return -ENOMEM;
|
||||
*ret_selected = selected;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return r;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
|
||||
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
|
||||
_cleanup_free_ OomdCGroupContext **sorted = NULL;
|
||||
int r;
|
||||
int r, ret = 0;
|
||||
|
||||
assert(h);
|
||||
assert(ret_selected);
|
||||
|
||||
r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
|
||||
if (r < 0)
|
||||
@ -245,17 +260,30 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
|
||||
/* Try to kill cgroups with non-zero swap usage until we either succeed in
|
||||
* killing or we get to a cgroup with no swap usage. */
|
||||
for (int i = 0; i < r; i++) {
|
||||
/* Skip over cgroups with no resource usage. Don't break since there might be "avoid"
|
||||
* cgroups at the end. */
|
||||
/* Skip over cgroups with no resource usage.
|
||||
* Continue break since there might be "avoid" cgroups at the end. */
|
||||
if (sorted[i]->swap_usage == 0)
|
||||
continue;
|
||||
|
||||
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
|
||||
if (r > 0 || r == -ENOMEM)
|
||||
break;
|
||||
if (r == 0)
|
||||
continue; /* We didn't find anything to kill */
|
||||
if (r == -ENOMEM)
|
||||
return r; /* Treat oom as a hard error */
|
||||
if (r < 0) {
|
||||
if (ret == 0)
|
||||
ret = r;
|
||||
continue; /* Try to find something else to kill */
|
||||
}
|
||||
|
||||
char *selected = strdup(sorted[i]->path);
|
||||
if (!selected)
|
||||
return -ENOMEM;
|
||||
*ret_selected = selected;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return r;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
|
||||
|
@ -122,9 +122,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
|
||||
|
||||
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
|
||||
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
|
||||
* everything in `h` is a candidate. */
|
||||
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run);
|
||||
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
|
||||
* everything in `h` is a candidate.
|
||||
* Returns the killed cgroup in ret_selected. */
|
||||
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
|
||||
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected);
|
||||
|
||||
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
|
||||
int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);
|
||||
|
Loading…
Reference in New Issue
Block a user