mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-12-22 13:33:56 +03:00
oomd: increase accuracy of SwapUsedLimit= to permyriads too
oomd.conf has two parameters with fractionals: SwapUsedLimit= and DefaultMemoryPressureLimit=, but one accepts permyriads, the other only percentages, for no apparent reason. One carries the "Percent" in the name, the other doesn't. Let's clean this up: always accept permyriads, and drop the suffix, given that it is misleading. I figure we should internally try to focus on scaling everything relative to UINT32_MAX, and if that isn't in the cards at least 10000, but never permille nor percent unless there's a really really good reason for it (e.g. interface defined by someone else).
This commit is contained in:
parent
d9d3f05def
commit
d06e7fb532
@ -48,36 +48,38 @@
|
||||
|
||||
<variablelist class='config-directives'>
|
||||
<varlistentry>
|
||||
<term><varname>SwapUsedLimitPercent=</varname></term>
|
||||
<term><varname>SwapUsedLimit=</varname></term>
|
||||
|
||||
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command> will
|
||||
take action. If the percentage of swap used on the system is more than what is defined here,
|
||||
<command>systemd-oomd</command> will act on eligible descendant cgroups, starting from the ones with the
|
||||
highest swap usage to the lowest swap usage. Which cgroups are monitored and what
|
||||
action gets taken depends on what the unit has configured for <varname>ManagedOOMSwap=</varname>.
|
||||
Takes a percentage value between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
|
||||
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command>
|
||||
will take action. If the fraction of swap used on the system is more than what is defined here,
|
||||
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
|
||||
ones with the highest swap usage to the lowest swap usage. Which control groups are monitored and
|
||||
what action gets taken depends on what the unit has configured for
|
||||
<varname>ManagedOOMSwap=</varname>. Takes a value specified in percent (when suffixed with "%"),
|
||||
permille ("‰") or permyriad ("‱"), between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>DefaultMemoryPressureLimit=</varname></term>
|
||||
|
||||
<listitem><para>Sets the limit for memory pressure on the unit's cgroup before <command>systemd-oomd</command>
|
||||
will take action. A unit can override this value with <varname>ManagedOOMMemoryPressureLimit=</varname>.
|
||||
The memory pressure for this property represents the fraction of time in a 10 second window in which all tasks
|
||||
in the cgroup were delayed. For each monitored cgroup, if the memory pressure on that cgroup exceeds the
|
||||
limit set for longer than the duration set by <varname>DefaultMemoryPressureDurationSec=</varname>,
|
||||
<command>systemd-oomd</command> will act on eligible descendant cgroups,
|
||||
starting from the ones with the most reclaim activity to the least reclaim activity. Which cgroups are
|
||||
monitored and what action gets taken depends on what the unit has configured for
|
||||
<varname>ManagedOOMMemoryPressure=</varname>. Takes a percentage value between 0% and 100%, inclusive.
|
||||
Defaults to 60%.</para></listitem>
|
||||
<listitem><para>Sets the limit for memory pressure on the unit's control group before
|
||||
<command>systemd-oomd</command> will take action. A unit can override this value with
|
||||
<varname>ManagedOOMMemoryPressureLimit=</varname>. The memory pressure for this property represents
|
||||
the fraction of time in a 10 second window in which all tasks in the control group were delayed. For
|
||||
each monitored control group, if the memory pressure on that control group exceeds the limit set for
|
||||
longer than the duration set by <varname>DefaultMemoryPressureDurationSec=</varname>,
|
||||
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
|
||||
ones with the most reclaim activity to the least reclaim activity. Which control groups are monitored
|
||||
and what action gets taken depends on what the unit has configured for
|
||||
<varname>ManagedOOMMemoryPressure=</varname>. Takes a fraction specified in the same way as
|
||||
<varname>SwapUsedLimit=</varname> above. Defaults to 60%.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>DefaultMemoryPressureDurationSec=</varname></term>
|
||||
|
||||
<listitem><para>Sets the amount of time a unit's cgroup needs to have exceeded memory pressure limits before
|
||||
<command>systemd-oomd</command> will take action. Memory pressure limits are defined by
|
||||
<listitem><para>Sets the amount of time a unit's control group needs to have exceeded memory pressure
|
||||
limits before <command>systemd-oomd</command> will take action. Memory pressure limits are defined by
|
||||
<varname>DefaultMemoryPressureLimit=</varname> and <varname>ManagedOOMMemoryPressureLimit=</varname>.
|
||||
Defaults to 30 seconds when this property is unset or set to 0.</para></listitem>
|
||||
</varlistentry>
|
||||
|
@ -16,7 +16,7 @@ typedef struct ManagedOOMReply {
|
||||
ManagedOOMMode mode;
|
||||
char *path;
|
||||
char *property;
|
||||
unsigned limit;
|
||||
uint32_t limit;
|
||||
} ManagedOOMReply;
|
||||
|
||||
static void managed_oom_reply_destroy(ManagedOOMReply *reply) {
|
||||
@ -53,10 +53,10 @@ static int process_managed_oom_reply(
|
||||
assert(m);
|
||||
|
||||
static const JsonDispatch dispatch_table[] = {
|
||||
{ "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
|
||||
{ "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
|
||||
{ "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
|
||||
{ "limit", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(ManagedOOMReply, limit), 0 },
|
||||
{ "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
|
||||
{ "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
|
||||
{ "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
|
||||
{ "limit", JSON_VARIANT_UNSIGNED, json_dispatch_uint32, offsetof(ManagedOOMReply, limit), 0 },
|
||||
{},
|
||||
};
|
||||
|
||||
@ -87,7 +87,8 @@ static int process_managed_oom_reply(
|
||||
if (ret == -ENOMEM) {
|
||||
r = ret;
|
||||
goto finish;
|
||||
} else if (ret < 0)
|
||||
}
|
||||
if (ret < 0)
|
||||
continue;
|
||||
|
||||
monitor_hm = streq(reply.property, "ManagedOOMSwap") ?
|
||||
@ -100,19 +101,15 @@ static int process_managed_oom_reply(
|
||||
|
||||
limit = m->default_mem_pressure_limit;
|
||||
|
||||
if (streq(reply.property, "ManagedOOMMemoryPressure")) {
|
||||
if (reply.limit > UINT32_MAX) /* out of range */
|
||||
continue;
|
||||
if (reply.limit != 0) {
|
||||
int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit);
|
||||
if (streq(reply.property, "ManagedOOMMemoryPressure") && reply.limit > 0) {
|
||||
int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit);
|
||||
|
||||
ret = store_loadavg_fixed_point(
|
||||
(unsigned long) permyriad / 100,
|
||||
(unsigned long) permyriad % 100,
|
||||
&limit);
|
||||
if (ret < 0)
|
||||
continue;
|
||||
}
|
||||
ret = store_loadavg_fixed_point(
|
||||
(unsigned long) permyriad / 100,
|
||||
(unsigned long) permyriad % 100,
|
||||
&limit);
|
||||
if (ret < 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path));
|
||||
@ -354,11 +351,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
|
||||
}
|
||||
}
|
||||
|
||||
if (oomd_swap_free_below(&m->system_context, (100 - m->swap_used_limit))) {
|
||||
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
|
||||
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
|
||||
|
||||
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than %u%%",
|
||||
m->system_context.swap_used, m->system_context.swap_total, m->swap_used_limit);
|
||||
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
||||
m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
||||
|
||||
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
|
||||
if (r == -ENOMEM)
|
||||
@ -484,7 +481,13 @@ static int manager_connect_bus(Manager *m) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec) {
|
||||
int manager_start(
|
||||
Manager *m,
|
||||
bool dry_run,
|
||||
int swap_used_limit_permyriad,
|
||||
int mem_pressure_limit_permyriad,
|
||||
usec_t mem_pressure_usec) {
|
||||
|
||||
unsigned long l, f;
|
||||
int r;
|
||||
|
||||
@ -492,10 +495,10 @@ int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressur
|
||||
|
||||
m->dry_run = dry_run;
|
||||
|
||||
m->swap_used_limit = swap_used_limit != -1 ? swap_used_limit : DEFAULT_SWAP_USED_LIMIT;
|
||||
assert(m->swap_used_limit <= 100);
|
||||
m->swap_used_limit_permyriad = swap_used_limit_permyriad >= 0 ? swap_used_limit_permyriad : DEFAULT_SWAP_USED_LIMIT_PERCENT * 100;
|
||||
assert(m->swap_used_limit_permyriad <= 10000);
|
||||
|
||||
if (mem_pressure_limit_permyriad != -1) {
|
||||
if (mem_pressure_limit_permyriad >= 0) {
|
||||
assert(mem_pressure_limit_permyriad <= 10000);
|
||||
|
||||
l = mem_pressure_limit_permyriad / 100;
|
||||
@ -543,12 +546,12 @@ int manager_get_dump_string(Manager *m, char **ret) {
|
||||
|
||||
fprintf(f,
|
||||
"Dry Run: %s\n"
|
||||
"Swap Used Limit: %u%%\n"
|
||||
"Swap Used Limit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n"
|
||||
"Default Memory Pressure Limit: %lu.%02lu%%\n"
|
||||
"Default Memory Pressure Duration: %s\n"
|
||||
"System Context:\n",
|
||||
yes_no(m->dry_run),
|
||||
m->swap_used_limit,
|
||||
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad),
|
||||
LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit),
|
||||
format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC));
|
||||
oomd_dump_system_context(&m->system_context, f, "\t");
|
||||
|
@ -18,7 +18,7 @@
|
||||
* system.slice are assumed to be less latency sensitive. */
|
||||
#define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC)
|
||||
#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60
|
||||
#define DEFAULT_SWAP_USED_LIMIT 90
|
||||
#define DEFAULT_SWAP_USED_LIMIT_PERCENT 90
|
||||
|
||||
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
|
||||
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
|
||||
@ -32,7 +32,7 @@ struct Manager {
|
||||
Hashmap *polkit_registry;
|
||||
|
||||
bool dry_run;
|
||||
unsigned swap_used_limit;
|
||||
int swap_used_limit_permyriad;
|
||||
loadavg_t default_mem_pressure_limit;
|
||||
usec_t default_mem_pressure_duration_usec;
|
||||
|
||||
@ -56,7 +56,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
|
||||
|
||||
int manager_new(Manager **ret);
|
||||
|
||||
int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
|
||||
int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
|
||||
|
||||
int manager_get_dump_string(Manager *m, char **ret);
|
||||
|
||||
|
@ -134,13 +134,13 @@ bool oomd_memory_reclaim(Hashmap *h) {
|
||||
return pgscan_of > last_pgscan_of;
|
||||
}
|
||||
|
||||
bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent) {
|
||||
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) {
|
||||
uint64_t swap_threshold;
|
||||
|
||||
assert(ctx);
|
||||
assert(threshold_percent <= 100);
|
||||
assert(threshold_permyriad <= 10000);
|
||||
|
||||
swap_threshold = ctx->swap_total * threshold_percent / ((uint64_t) 100);
|
||||
swap_threshold = ctx->swap_total * threshold_permyriad / (uint64_t) 10000;
|
||||
return (ctx->swap_total - ctx->swap_used) < swap_threshold;
|
||||
}
|
||||
|
||||
|
@ -61,8 +61,8 @@ int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret);
|
||||
* current sum is higher than the last interval's sum (there was some reclaim activity). */
|
||||
bool oomd_memory_reclaim(Hashmap *h);
|
||||
|
||||
/* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */
|
||||
bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent);
|
||||
/* Returns true if the amount of swap free is below the permyriad of swap specified by `threshold_permyriad`. */
|
||||
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad);
|
||||
|
||||
/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end
|
||||
* (after the smallest values). */
|
||||
|
@ -17,13 +17,13 @@
|
||||
#include "signal-util.h"
|
||||
|
||||
static bool arg_dry_run = false;
|
||||
static int arg_swap_used_limit = -1;
|
||||
static int arg_swap_used_limit_permyriad = -1;
|
||||
static int arg_mem_pressure_limit_permyriad = -1;
|
||||
static usec_t arg_mem_pressure_usec = 0;
|
||||
|
||||
static int parse_config(void) {
|
||||
static const ConfigTableItem items[] = {
|
||||
{ "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit },
|
||||
{ "OOM", "SwapUsedLimit", config_parse_permyriad, 0, &arg_swap_used_limit_permyriad },
|
||||
{ "OOM", "DefaultMemoryPressureLimit", config_parse_permyriad, 0, &arg_mem_pressure_limit_permyriad },
|
||||
{ "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec },
|
||||
{}
|
||||
@ -159,7 +159,12 @@ static int run(int argc, char *argv[]) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create manager: %m");
|
||||
|
||||
r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit_permyriad, arg_mem_pressure_usec);
|
||||
r = manager_start(
|
||||
m,
|
||||
arg_dry_run,
|
||||
arg_swap_used_limit_permyriad,
|
||||
arg_mem_pressure_limit_permyriad,
|
||||
arg_mem_pressure_usec);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to start up daemon: %m");
|
||||
|
||||
|
@ -12,6 +12,6 @@
|
||||
# See oomd.conf(5) for details
|
||||
|
||||
[OOM]
|
||||
#SwapUsedLimitPercent=90%
|
||||
#SwapUsedLimit=90%
|
||||
#DefaultMemoryPressureLimit=60%
|
||||
#DefaultMemoryPressureDurationSec=30s
|
||||
|
@ -302,19 +302,19 @@ static void test_oomd_swap_free_below(void) {
|
||||
.swap_total = 20971512 * 1024U,
|
||||
.swap_used = 20971440 * 1024U,
|
||||
};
|
||||
assert_se(oomd_swap_free_below(&ctx, 20) == true);
|
||||
assert_se(oomd_swap_free_below(&ctx, 2000) == true);
|
||||
|
||||
ctx = (OomdSystemContext) {
|
||||
.swap_total = 20971512 * 1024U,
|
||||
.swap_used = 3310136 * 1024U,
|
||||
};
|
||||
assert_se(oomd_swap_free_below(&ctx, 20) == false);
|
||||
assert_se(oomd_swap_free_below(&ctx, 2000) == false);
|
||||
|
||||
ctx = (OomdSystemContext) {
|
||||
.swap_total = 0,
|
||||
.swap_used = 0,
|
||||
};
|
||||
assert_se(oomd_swap_free_below(&ctx, 20) == false);
|
||||
assert_se(oomd_swap_free_below(&ctx, 2000) == false);
|
||||
}
|
||||
|
||||
static void test_oomd_sort_cgroups(void) {
|
||||
|
Loading…
Reference in New Issue
Block a user