diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 066f2cc19bf..570619a743b 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -114,6 +114,13 @@ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes. + + + + MemoryMax replaces MemoryLimit. MemoryLow + and MemoryHigh are effective only on unified hierarchy. + + @@ -212,6 +219,67 @@ + + MemoryLow=bytes + + + Specify the best-effort memory usage protection of the executed processes in this unit. If the memory + usages of this unit and all its ancestors are below their low boundaries, this unit's memory won't be + reclaimed as long as memory can be reclaimed from unprotected units. + + Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. This controls the + memory.low control group attribute. For details about this control group attribute, see + cgroup-v2.txt. + + Implies MemoryAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. + + + + + MemoryHigh=bytes + + + Specify the high limit on memory usage of the executed processes in this unit. Memory usage may go + above the limit if unavoidable, but the processes are heavily slowed down and memory is taken away + aggressively in such cases. This is the main mechanism to control memory usage of a unit. + + Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the + special value max, no memory limit is applied. This controls the + memory.high control group attribute. For details about this control group attribute, see + cgroup-v2.txt. + + Implies MemoryAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. + + + + + MemoryMax=bytes + + + Specify the absolute limit on memory usage of the executed processes in this unit. If memory usage + cannot be contained under the limit, out-of-memory killer is invoked inside the unit. It is recommended to + use MemoryHigh= as the main control mechanism and use MemoryMax= as the + last line of defense. + + Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the + special value max, no memory limit is applied. This controls the + memory.max control group attribute. For details about this control group attribute, see + cgroup-v2.txt. + + Implies MemoryAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. Use + MemoryLimit= on systems using the legacy control group hierarchy. + + + MemoryLimit=bytes @@ -230,6 +298,9 @@ url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt. Implies MemoryAccounting=true. + + This setting is supported only if the legacy control group hierarchy is used. Use + MemoryMax= on systems using the unified control group hierarchy. diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 0fb63b1bd16..fbe69df4e99 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -46,7 +46,10 @@ void cgroup_context_init(CGroupContext *c) { c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; c->cpu_quota_per_sec_usec = USEC_INFINITY; - c->memory_limit = (uint64_t) -1; + c->memory_high = CGROUP_LIMIT_MAX; + c->memory_max = CGROUP_LIMIT_MAX; + + c->memory_limit = CGROUP_LIMIT_MAX; c->io_weight = CGROUP_WEIGHT_INVALID; c->startup_io_weight = CGROUP_WEIGHT_INVALID; @@ -147,6 +150,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" + "%sMemoryLow=%" PRIu64 "\n" + "%sMemoryHigh=%" PRIu64 "\n" + "%sMemoryMax=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n" "%sDevicePolicy=%s\n" @@ -163,6 +169,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, + prefix, c->memory_low, + prefix, c->memory_high, + prefix, c->memory_max, prefix, c->memory_limit, prefix, c->tasks_max, prefix, cgroup_device_policy_to_string(c->device_policy), @@ -496,6 +505,23 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de return n; } +static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { + return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; +} + +static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) { + char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; + int r; + + if (v != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", v); + + r = cg_set_attribute("memory", path, file, buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set %s on %s: %m", file, path); +} + void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { bool is_root; int r; @@ -662,26 +688,30 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (c->memory_limit != (uint64_t) -1) { + if (cg_unified() > 0) { + uint64_t max = c->memory_max; + + if (cgroup_context_has_unified_memory_config(c)) + max = c->memory_max; + else + max = c->memory_limit; + + cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low); + cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high); + cgroup_apply_unified_memory_limit(path, "memory.max", max); + } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - sprintf(buf, "%" PRIu64 "\n", c->memory_limit); - - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (c->memory_limit != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", c->memory_limit); else - r = cg_set_attribute("memory", path, "memory.max", buf); + xsprintf(buf, "%" PRIu64 "\n", c->memory_max); - } else { - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); - else - r = cg_set_attribute("memory", path, "memory.max", "max"); + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set memory.limit_in_bytes on %s: %m", path); } - - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path); } if ((mask & CGROUP_MASK_DEVICES) && !is_root) { @@ -778,7 +808,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || - c->memory_limit != (uint64_t) -1) + c->memory_limit != CGROUP_LIMIT_MAX || + cgroup_context_has_unified_memory_config(c)) mask |= CGROUP_MASK_MEMORY; if (c->device_allow || diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 2b1edbafc45..ff87adfba16 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -94,6 +94,10 @@ struct CGroupContext { LIST_HEAD(CGroupIODeviceWeight, io_device_weights); LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index d6053581f80..27050b4507c 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -228,6 +228,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0), + SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), + SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), + SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0), SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), @@ -826,6 +829,31 @@ int bus_cgroup_set_property( return 1; + } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) { + uint64_t v; + + r = sd_bus_message_read(message, "t", &v); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + if (streq(name, "MemoryLow")) + c->memory_low = v; + else if (streq(name, "MemoryHigh")) + c->memory_high = v; + else + c->memory_max = v; + + unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); + + if (v == CGROUP_LIMIT_MAX) + unit_write_drop_in_private_format(u, mode, name, "%s=max\n", name); + else + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64 "\n", name, v); + } + + return 1; + } else if (streq(name, "MemoryLimit")) { uint64_t limit; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 81934189809..00bdc238ceb 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -117,6 +117,9 @@ $1.CPUShares, config_parse_cpu_shares, 0, $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) +$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 86b4fb071b8..09d3f65c77c 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2793,21 +2793,26 @@ int config_parse_memory_limit( void *userdata) { CGroupContext *c = data; - uint64_t bytes; + uint64_t bytes = CGROUP_LIMIT_MAX; int r; - if (isempty(rvalue) || streq(rvalue, "infinity")) { - c->memory_limit = (uint64_t) -1; - return 0; + if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) { + r = parse_size(rvalue, 1024, &bytes); + if (r < 0 || bytes < 1) { + log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); + return 0; + } } - r = parse_size(rvalue, 1024, &bytes); - if (r < 0 || bytes < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); - return 0; - } + if (streq(lvalue, "MemoryLow")) + c->memory_low = bytes; + else if (streq(lvalue, "MemoryHigh")) + c->memory_high = bytes; + else if (streq(lvalue, "MemoryMax")) + c->memory_max = bytes; + else + c->memory_limit = bytes; - c->memory_limit = bytes; return 0; } diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index f68c4a41ac5..502e98d9dc2 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -166,11 +166,11 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "b", r); - } else if (streq(field, "MemoryLimit")) { + } else if (STR_IN_SET(field, "MemoryLow", "MemoryHigh", "MemoryMax", "MemoryLimit")) { uint64_t bytes; - if (isempty(eq) || streq(eq, "infinity")) - bytes = (uint64_t) -1; + if (isempty(eq) || streq(eq, "max") || streq(eq, "infinity")) + bytes = CGROUP_LIMIT_MAX; else { r = parse_size(eq, 1024, &bytes); if (r < 0) { diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 0500593d062..b2ee00fab3e 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -3493,6 +3493,9 @@ typedef struct UnitStatusInfo { /* CGroup */ uint64_t memory_current; + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; uint64_t memory_limit; uint64_t cpu_usage_nsec; uint64_t tasks_current; @@ -3775,10 +3778,30 @@ static void print_status_info( printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current)); - if (i->memory_limit != (uint64_t) -1) - printf(" (limit: %s)\n", format_bytes(buf, sizeof(buf), i->memory_limit)); - else - printf("\n"); + if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX || + i->memory_limit != CGROUP_LIMIT_MAX) { + const char *prefix = ""; + + printf(" ("); + if (i->memory_low > 0) { + printf("%slow: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_low)); + prefix = " "; + } + if (i->memory_high != CGROUP_LIMIT_MAX) { + printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high)); + prefix = " "; + } + if (i->memory_max != CGROUP_LIMIT_MAX) { + printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max)); + prefix = " "; + } + if (i->memory_limit != CGROUP_LIMIT_MAX) { + printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit)); + prefix = " "; + } + printf(")"); + } + printf("\n"); } if (i->cpu_usage_nsec != (uint64_t) -1) { @@ -4007,6 +4030,12 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo * i->assert_timestamp = (usec_t) u; else if (streq(name, "MemoryCurrent")) i->memory_current = u; + else if (streq(name, "MemoryLow")) + i->memory_low = u; + else if (streq(name, "MemoryHigh")) + i->memory_high = u; + else if (streq(name, "MemoryMax")) + i->memory_max = u; else if (streq(name, "MemoryLimit")) i->memory_limit = u; else if (streq(name, "TasksCurrent")) @@ -4500,6 +4529,8 @@ static int show_one( _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; UnitStatusInfo info = { .memory_current = (uint64_t) -1, + .memory_high = CGROUP_LIMIT_MAX, + .memory_max = CGROUP_LIMIT_MAX, .memory_limit = (uint64_t) -1, .cpu_usage_nsec = (uint64_t) -1, .tasks_current = (uint64_t) -1,