1
0
mirror of https://github.com/systemd/systemd.git synced 2024-10-31 16:21:26 +03:00

oom: fix reclaim activity detection

This should have been checking for any reclaim activity within a larger interval
of time rather than within the past second. On systems with swap this
doesn't seem to have mattered too much as reclaim would always increase when
memory pressure was elevated. But testing in the no swap case having
this larger interval made a difference between oomd killing or not.
This commit is contained in:
Anita Zhang 2021-01-24 01:22:51 -08:00
parent 408a3bbd76
commit 924c89e9fe
2 changed files with 7 additions and 2 deletions

View File

@ -302,6 +302,9 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
else if (r == -ENOENT)
zero(m->system_context);
if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts))
m->last_reclaim_at = usec_now;
/* If we're still recovering from a kill, don't try to kill again yet */
if (m->post_action_delay_start > 0) {
if (m->post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now)
@ -314,12 +317,12 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (r == -ENOMEM)
return log_error_errno(r, "Failed to check if memory pressure exceeded limits");
else if (r == 1) {
/* Check if there was reclaim activity in the last interval. The concern is the following case:
/* Check if there was reclaim activity in the given interval. The concern is the following case:
* Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending
* cgroup. Even after this, well-behaved processes will fault in recently resident pages and
* this will cause pressure to remain high. Thus if there isn't any reclaim pressure, no need
* to kill something (it won't help anyways). */
if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts)) {
if ((usec_now - m->last_reclaim_at) <= RECLAIM_DURATION_USEC) {
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
OomdCGroupContext *t;

View File

@ -20,6 +20,7 @@
#define DEFAULT_MEM_PRESSURE_LIMIT 60
#define DEFAULT_SWAP_USED_LIMIT 90
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
typedef struct Manager Manager;
@ -42,6 +43,7 @@ struct Manager {
OomdSystemContext system_context;
usec_t last_reclaim_at;
usec_t post_action_delay_start;
sd_event_source *cgroup_context_event_source;