mirror of
https://github.com/systemd/systemd.git
synced 2024-12-23 21:35:11 +03:00
core: add cgroup memory controller support on the unified hierarchy (#3315)
On the unified hierarchy, memory controller implements three control knobs - low, high and max which enables more useable and versatile control over memory usage. This patch implements support for the three control knobs. * MemoryLow, MemoryHigh and MemoryMax are added for memory.low, memory.high and memory.max, respectively. * As all absolute limits on the unified hierarchy use "max" for no limit, make memory limit parse functions accept "max" in addition to "infinity" and document "max" for the new knobs. * Implement compatibility translation between MemoryMax and MemoryLimit. v2: - Fixed missing else's in config_parse_memory_limit(). - Fixed missing newline when writing out drop-ins. - Coding style updates to use "val > 0" instead of "val". - Minor updates to documentation.
This commit is contained in:
parent
ffe54bf4bf
commit
da4d897e75
@ -114,6 +114,13 @@
|
||||
prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>Memory</option></term>
|
||||
<listitem>
|
||||
<para><varname>MemoryMax</varname> replaces <varname>MemoryLimit</varname>. <varname>MemoryLow</varname>
|
||||
and <varname>MemoryHigh</varname> are effective only on unified hierarchy.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
|
||||
@ -212,6 +219,67 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>MemoryLow=<replaceable>bytes</replaceable></varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Specify the best-effort memory usage protection of the executed processes in this unit. If the memory
|
||||
usages of this unit and all its ancestors are below their low boundaries, this unit's memory won't be
|
||||
reclaimed as long as memory can be reclaimed from unprotected units.</para>
|
||||
|
||||
<para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is
|
||||
parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. This controls the
|
||||
<literal>memory.low</literal> control group attribute. For details about this control group attribute, see
|
||||
<ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
|
||||
|
||||
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
|
||||
|
||||
<para>This setting is supported only if the unified control group hierarchy is used.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>MemoryHigh=<replaceable>bytes</replaceable></varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Specify the high limit on memory usage of the executed processes in this unit. Memory usage may go
|
||||
above the limit if unavoidable, but the processes are heavily slowed down and memory is taken away
|
||||
aggressively in such cases. This is the main mechanism to control memory usage of a unit.</para>
|
||||
|
||||
<para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is
|
||||
parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the
|
||||
special value <literal>max</literal>, no memory limit is applied. This controls the
|
||||
<literal>memory.high</literal> control group attribute. For details about this control group attribute, see
|
||||
<ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
|
||||
|
||||
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
|
||||
|
||||
<para>This setting is supported only if the unified control group hierarchy is used.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>MemoryMax=<replaceable>bytes</replaceable></varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Specify the absolute limit on memory usage of the executed processes in this unit. If memory usage
|
||||
cannot be contained under the limit, out-of-memory killer is invoked inside the unit. It is recommended to
|
||||
use <varname>MemoryHigh=</varname> as the main control mechanism and use <varname>MemoryMax=</varname> as the
|
||||
last line of defense.</para>
|
||||
|
||||
<para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is
|
||||
parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the
|
||||
special value <literal>max</literal>, no memory limit is applied. This controls the
|
||||
<literal>memory.max</literal> control group attribute. For details about this control group attribute, see
|
||||
<ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
|
||||
|
||||
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
|
||||
|
||||
<para>This setting is supported only if the unified control group hierarchy is used. Use
|
||||
<varname>MemoryLimit=</varname> on systems using the legacy control group hierarchy.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
|
||||
|
||||
@ -230,6 +298,9 @@
|
||||
url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para>
|
||||
|
||||
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
|
||||
|
||||
<para>This setting is supported only if the legacy control group hierarchy is used. Use
|
||||
<varname>MemoryMax=</varname> on systems using the unified control group hierarchy.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
@ -46,7 +46,10 @@ void cgroup_context_init(CGroupContext *c) {
|
||||
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
|
||||
c->cpu_quota_per_sec_usec = USEC_INFINITY;
|
||||
|
||||
c->memory_limit = (uint64_t) -1;
|
||||
c->memory_high = CGROUP_LIMIT_MAX;
|
||||
c->memory_max = CGROUP_LIMIT_MAX;
|
||||
|
||||
c->memory_limit = CGROUP_LIMIT_MAX;
|
||||
|
||||
c->io_weight = CGROUP_WEIGHT_INVALID;
|
||||
c->startup_io_weight = CGROUP_WEIGHT_INVALID;
|
||||
@ -147,6 +150,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
"%sStartupIOWeight=%" PRIu64 "\n"
|
||||
"%sBlockIOWeight=%" PRIu64 "\n"
|
||||
"%sStartupBlockIOWeight=%" PRIu64 "\n"
|
||||
"%sMemoryLow=%" PRIu64 "\n"
|
||||
"%sMemoryHigh=%" PRIu64 "\n"
|
||||
"%sMemoryMax=%" PRIu64 "\n"
|
||||
"%sMemoryLimit=%" PRIu64 "\n"
|
||||
"%sTasksMax=%" PRIu64 "\n"
|
||||
"%sDevicePolicy=%s\n"
|
||||
@ -163,6 +169,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
prefix, c->startup_io_weight,
|
||||
prefix, c->blockio_weight,
|
||||
prefix, c->startup_blockio_weight,
|
||||
prefix, c->memory_low,
|
||||
prefix, c->memory_high,
|
||||
prefix, c->memory_max,
|
||||
prefix, c->memory_limit,
|
||||
prefix, c->tasks_max,
|
||||
prefix, cgroup_device_policy_to_string(c->device_policy),
|
||||
@ -496,6 +505,23 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de
|
||||
return n;
|
||||
}
|
||||
|
||||
static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
|
||||
return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
|
||||
}
|
||||
|
||||
static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) {
|
||||
char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max";
|
||||
int r;
|
||||
|
||||
if (v != CGROUP_LIMIT_MAX)
|
||||
xsprintf(buf, "%" PRIu64 "\n", v);
|
||||
|
||||
r = cg_set_attribute("memory", path, file, buf);
|
||||
if (r < 0)
|
||||
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
|
||||
"Failed to set %s on %s: %m", file, path);
|
||||
}
|
||||
|
||||
void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) {
|
||||
bool is_root;
|
||||
int r;
|
||||
@ -662,26 +688,30 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
|
||||
}
|
||||
|
||||
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
|
||||
if (c->memory_limit != (uint64_t) -1) {
|
||||
if (cg_unified() > 0) {
|
||||
uint64_t max = c->memory_max;
|
||||
|
||||
if (cgroup_context_has_unified_memory_config(c))
|
||||
max = c->memory_max;
|
||||
else
|
||||
max = c->memory_limit;
|
||||
|
||||
cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low);
|
||||
cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high);
|
||||
cgroup_apply_unified_memory_limit(path, "memory.max", max);
|
||||
} else {
|
||||
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
|
||||
|
||||
sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
|
||||
|
||||
if (cg_unified() <= 0)
|
||||
r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
|
||||
if (c->memory_limit != CGROUP_LIMIT_MAX)
|
||||
xsprintf(buf, "%" PRIu64 "\n", c->memory_limit);
|
||||
else
|
||||
r = cg_set_attribute("memory", path, "memory.max", buf);
|
||||
xsprintf(buf, "%" PRIu64 "\n", c->memory_max);
|
||||
|
||||
} else {
|
||||
if (cg_unified() <= 0)
|
||||
r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
|
||||
else
|
||||
r = cg_set_attribute("memory", path, "memory.max", "max");
|
||||
r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
|
||||
if (r < 0)
|
||||
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
|
||||
"Failed to set memory.limit_in_bytes on %s: %m", path);
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
|
||||
"Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);
|
||||
}
|
||||
|
||||
if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
|
||||
@ -778,7 +808,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
|
||||
mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
|
||||
|
||||
if (c->memory_accounting ||
|
||||
c->memory_limit != (uint64_t) -1)
|
||||
c->memory_limit != CGROUP_LIMIT_MAX ||
|
||||
cgroup_context_has_unified_memory_config(c))
|
||||
mask |= CGROUP_MASK_MEMORY;
|
||||
|
||||
if (c->device_allow ||
|
||||
|
@ -94,6 +94,10 @@ struct CGroupContext {
|
||||
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
|
||||
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
|
||||
|
||||
uint64_t memory_low;
|
||||
uint64_t memory_high;
|
||||
uint64_t memory_max;
|
||||
|
||||
/* For legacy hierarchies */
|
||||
uint64_t cpu_shares;
|
||||
uint64_t startup_cpu_shares;
|
||||
|
@ -228,6 +228,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
|
||||
SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
|
||||
SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
|
||||
SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
|
||||
SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
|
||||
SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
|
||||
SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
|
||||
SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
|
||||
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
|
||||
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
|
||||
@ -826,6 +829,31 @@ int bus_cgroup_set_property(
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
|
||||
uint64_t v;
|
||||
|
||||
r = sd_bus_message_read(message, "t", &v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (mode != UNIT_CHECK) {
|
||||
if (streq(name, "MemoryLow"))
|
||||
c->memory_low = v;
|
||||
else if (streq(name, "MemoryHigh"))
|
||||
c->memory_high = v;
|
||||
else
|
||||
c->memory_max = v;
|
||||
|
||||
unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
|
||||
|
||||
if (v == CGROUP_LIMIT_MAX)
|
||||
unit_write_drop_in_private_format(u, mode, name, "%s=max\n", name);
|
||||
else
|
||||
unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64 "\n", name, v);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (streq(name, "MemoryLimit")) {
|
||||
uint64_t limit;
|
||||
|
||||
|
@ -117,6 +117,9 @@ $1.CPUShares, config_parse_cpu_shares, 0,
|
||||
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
|
||||
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
|
||||
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
|
||||
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
|
||||
$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
|
||||
$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
|
||||
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
|
||||
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
|
||||
$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
|
||||
|
@ -2793,21 +2793,26 @@ int config_parse_memory_limit(
|
||||
void *userdata) {
|
||||
|
||||
CGroupContext *c = data;
|
||||
uint64_t bytes;
|
||||
uint64_t bytes = CGROUP_LIMIT_MAX;
|
||||
int r;
|
||||
|
||||
if (isempty(rvalue) || streq(rvalue, "infinity")) {
|
||||
c->memory_limit = (uint64_t) -1;
|
||||
return 0;
|
||||
if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) {
|
||||
r = parse_size(rvalue, 1024, &bytes);
|
||||
if (r < 0 || bytes < 1) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
r = parse_size(rvalue, 1024, &bytes);
|
||||
if (r < 0 || bytes < 1) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
|
||||
return 0;
|
||||
}
|
||||
if (streq(lvalue, "MemoryLow"))
|
||||
c->memory_low = bytes;
|
||||
else if (streq(lvalue, "MemoryHigh"))
|
||||
c->memory_high = bytes;
|
||||
else if (streq(lvalue, "MemoryMax"))
|
||||
c->memory_max = bytes;
|
||||
else
|
||||
c->memory_limit = bytes;
|
||||
|
||||
c->memory_limit = bytes;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -166,11 +166,11 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
|
||||
|
||||
r = sd_bus_message_append(m, "v", "b", r);
|
||||
|
||||
} else if (streq(field, "MemoryLimit")) {
|
||||
} else if (STR_IN_SET(field, "MemoryLow", "MemoryHigh", "MemoryMax", "MemoryLimit")) {
|
||||
uint64_t bytes;
|
||||
|
||||
if (isempty(eq) || streq(eq, "infinity"))
|
||||
bytes = (uint64_t) -1;
|
||||
if (isempty(eq) || streq(eq, "max") || streq(eq, "infinity"))
|
||||
bytes = CGROUP_LIMIT_MAX;
|
||||
else {
|
||||
r = parse_size(eq, 1024, &bytes);
|
||||
if (r < 0) {
|
||||
|
@ -3493,6 +3493,9 @@ typedef struct UnitStatusInfo {
|
||||
|
||||
/* CGroup */
|
||||
uint64_t memory_current;
|
||||
uint64_t memory_low;
|
||||
uint64_t memory_high;
|
||||
uint64_t memory_max;
|
||||
uint64_t memory_limit;
|
||||
uint64_t cpu_usage_nsec;
|
||||
uint64_t tasks_current;
|
||||
@ -3775,10 +3778,30 @@ static void print_status_info(
|
||||
|
||||
printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current));
|
||||
|
||||
if (i->memory_limit != (uint64_t) -1)
|
||||
printf(" (limit: %s)\n", format_bytes(buf, sizeof(buf), i->memory_limit));
|
||||
else
|
||||
printf("\n");
|
||||
if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX ||
|
||||
i->memory_limit != CGROUP_LIMIT_MAX) {
|
||||
const char *prefix = "";
|
||||
|
||||
printf(" (");
|
||||
if (i->memory_low > 0) {
|
||||
printf("%slow: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_low));
|
||||
prefix = " ";
|
||||
}
|
||||
if (i->memory_high != CGROUP_LIMIT_MAX) {
|
||||
printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high));
|
||||
prefix = " ";
|
||||
}
|
||||
if (i->memory_max != CGROUP_LIMIT_MAX) {
|
||||
printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max));
|
||||
prefix = " ";
|
||||
}
|
||||
if (i->memory_limit != CGROUP_LIMIT_MAX) {
|
||||
printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit));
|
||||
prefix = " ";
|
||||
}
|
||||
printf(")");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (i->cpu_usage_nsec != (uint64_t) -1) {
|
||||
@ -4007,6 +4030,12 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
|
||||
i->assert_timestamp = (usec_t) u;
|
||||
else if (streq(name, "MemoryCurrent"))
|
||||
i->memory_current = u;
|
||||
else if (streq(name, "MemoryLow"))
|
||||
i->memory_low = u;
|
||||
else if (streq(name, "MemoryHigh"))
|
||||
i->memory_high = u;
|
||||
else if (streq(name, "MemoryMax"))
|
||||
i->memory_max = u;
|
||||
else if (streq(name, "MemoryLimit"))
|
||||
i->memory_limit = u;
|
||||
else if (streq(name, "TasksCurrent"))
|
||||
@ -4500,6 +4529,8 @@ static int show_one(
|
||||
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
|
||||
UnitStatusInfo info = {
|
||||
.memory_current = (uint64_t) -1,
|
||||
.memory_high = CGROUP_LIMIT_MAX,
|
||||
.memory_max = CGROUP_LIMIT_MAX,
|
||||
.memory_limit = (uint64_t) -1,
|
||||
.cpu_usage_nsec = (uint64_t) -1,
|
||||
.tasks_current = (uint64_t) -1,
|
||||
|
Loading…
Reference in New Issue
Block a user