1
1
mirror of https://github.com/systemd/systemd-stable.git synced 2025-03-08 20:58:20 +03:00

oomd: sort by pgscan rate not pgscan

For pressure based killing we want to target who has the highest
increase in pgscan from the previous interval (vs. the previous logic
which used raw pgscan). This will prevent biasing towards long running
cgroups as mentioned in #19007.
This commit is contained in:
Anita Zhang 2021-03-15 17:21:45 -07:00
parent 91cbb4bdd6
commit 88e47952af
4 changed files with 59 additions and 27 deletions

View File

@ -357,9 +357,9 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC);
r = oomd_kill_by_pgscan(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
if (r == -ENOMEM)
return log_error_errno(r, "Failed to kill cgroup processes by pgscan");
return log_error_errno(r, "Failed to kill cgroup processes by pgscan rate: %m");
if (r < 0)
log_info("Failed to kill any cgroup(s) under %s based on pressure", t->path);
else {

View File

@ -208,13 +208,13 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
return set_size(pids_killed) != 0;
}
int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) {
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
int r;
assert(h);
r = oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, prefix, &sorted);
r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
if (r < 0)
return r;

View File

@ -66,7 +66,8 @@ bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad)
/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end
* (after the smallest values). */
static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) {
static inline int compare_pgscan_rate_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) {
uint64_t last1, last2;
int r;
assert(c1);
@ -76,7 +77,22 @@ static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1,
if (r != 0)
return r;
r = CMP((*c2)->pgscan, (*c1)->pgscan);
/* If last_pgscan > pgscan, assume the cgroup was recreated and reset last_pgscan to zero. */
last2 = (*c2)->last_pgscan;
if ((*c2)->last_pgscan > (*c2)->pgscan) {
log_info("Last pgscan %" PRIu64 "greater than current pgscan %" PRIu64 "for %s. Using last pgscan of zero.",
(*c2)->last_pgscan, (*c2)->pgscan, (*c2)->path);
last2 = 0;
}
last1 = (*c1)->last_pgscan;
if ((*c1)->last_pgscan > (*c1)->pgscan) {
log_info("Last pgscan %" PRIu64 "greater than current pgscan %" PRIu64 "for %s. Using last pgscan of zero.",
(*c1)->last_pgscan, (*c1)->pgscan, (*c1)->path);
last1 = 0;
}
r = CMP((*c2)->pgscan - last2, (*c1)->pgscan - last1);
if (r != 0)
return r;
@ -107,7 +123,7 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
* everything in `h` is a candidate. */
int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run);
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run);
int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);

View File

@ -372,33 +372,45 @@ static void test_oomd_sort_cgroups(void) {
"/herp.slice/derp.scope",
"/herp.slice/derp.scope/sheep.service",
"/zupa.slice",
"/boop.slice",
"/omitted.slice",
"/avoid.slice");
OomdCGroupContext ctx[6] = {
OomdCGroupContext ctx[7] = {
{ .path = paths[0],
.swap_usage = 20,
.pgscan = 60,
.last_pgscan = 0,
.pgscan = 33,
.current_memory_usage = 10 },
{ .path = paths[1],
.swap_usage = 60,
.pgscan = 40,
.last_pgscan = 33,
.pgscan = 1,
.current_memory_usage = 20 },
{ .path = paths[2],
.swap_usage = 40,
.pgscan = 40,
.last_pgscan = 1,
.pgscan = 33,
.current_memory_usage = 40 },
{ .path = paths[3],
.swap_usage = 10,
.pgscan = 80,
.last_pgscan = 33,
.pgscan = 2,
.current_memory_usage = 10 },
{ .path = paths[4],
.swap_usage = 90,
.pgscan = 100,
.preference = MANAGED_OOM_PREFERENCE_OMIT },
.swap_usage = 11,
.last_pgscan = 33,
.pgscan = 33,
.current_memory_usage = 10 },
{ .path = paths[5],
.swap_usage = 90,
.last_pgscan = 0,
.pgscan = UINT64_MAX,
.preference = MANAGED_OOM_PREFERENCE_OMIT },
{ .path = paths[6],
.swap_usage = 99,
.pgscan = 200,
.last_pgscan = 0,
.pgscan = UINT64_MAX,
.preference = MANAGED_OOM_PREFERENCE_AVOID },
};
@ -408,32 +420,36 @@ static void test_oomd_sort_cgroups(void) {
assert_se(hashmap_put(h, "/herp.slice/derp.scope", &ctx[1]) >= 0);
assert_se(hashmap_put(h, "/herp.slice/derp.scope/sheep.service", &ctx[2]) >= 0);
assert_se(hashmap_put(h, "/zupa.slice", &ctx[3]) >= 0);
assert_se(hashmap_put(h, "/omitted.slice", &ctx[4]) >= 0);
assert_se(hashmap_put(h, "/avoid.slice", &ctx[5]) >= 0);
assert_se(hashmap_put(h, "/boop.slice", &ctx[4]) >= 0);
assert_se(hashmap_put(h, "/omitted.slice", &ctx[5]) >= 0);
assert_se(hashmap_put(h, "/avoid.slice", &ctx[6]) >= 0);
assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 5);
assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 6);
assert_se(sorted_cgroups[0] == &ctx[1]);
assert_se(sorted_cgroups[1] == &ctx[2]);
assert_se(sorted_cgroups[2] == &ctx[0]);
assert_se(sorted_cgroups[3] == &ctx[3]);
assert_se(sorted_cgroups[4] == &ctx[5]);
assert_se(sorted_cgroups[3] == &ctx[4]);
assert_se(sorted_cgroups[4] == &ctx[3]);
assert_se(sorted_cgroups[5] == &ctx[6]);
sorted_cgroups = mfree(sorted_cgroups);
assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 5);
assert_se(sorted_cgroups[0] == &ctx[3]);
assert_se(sorted_cgroups[1] == &ctx[0]);
assert_se(sorted_cgroups[2] == &ctx[2]);
assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, NULL, &sorted_cgroups) == 6);
assert_se(sorted_cgroups[0] == &ctx[0]);
assert_se(sorted_cgroups[1] == &ctx[2]);
assert_se(sorted_cgroups[2] == &ctx[3]);
assert_se(sorted_cgroups[3] == &ctx[1]);
assert_se(sorted_cgroups[4] == &ctx[5]);
assert_se(sorted_cgroups[4] == &ctx[4]);
assert_se(sorted_cgroups[5] == &ctx[6]);
sorted_cgroups = mfree(sorted_cgroups);
assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2);
assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2);
assert_se(sorted_cgroups[0] == &ctx[2]);
assert_se(sorted_cgroups[1] == &ctx[1]);
assert_se(sorted_cgroups[2] == 0);
assert_se(sorted_cgroups[3] == 0);
assert_se(sorted_cgroups[4] == 0);
assert_se(sorted_cgroups[5] == 0);
assert_se(sorted_cgroups[6] == 0);
sorted_cgroups = mfree(sorted_cgroups);
}