From 6c71db763cb482c30870359dd3d188a6aa23c4da Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Mon, 6 Nov 2023 13:15:55 +0100 Subject: [PATCH] cgroup: add support for memory.peak Linux's Control Group v2 interfaces exposes memory.peak, which contains the "max memory usage recorded for the cgroup and its descendants since the creation of the cgroup." This commit adds a new property "MemoryPeak" for units and makes "systemctl show" display this value if it is available. Fixes #29878. Signed-off-by: Florian Schmaus --- NEWS | 3 ++ man/org.freedesktop.systemd1.xml | 66 ++++++++++++++++++++++++++------ src/core/cgroup.c | 54 ++++++++++++++++++++++++++ src/core/cgroup.h | 1 + src/core/dbus-unit.c | 24 ++++++++++++ src/core/unit.c | 1 + src/core/unit.h | 3 ++ src/systemctl/systemctl-show.c | 10 ++++- 8 files changed, 149 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index dd68a044798..38b94ad42be 100644 --- a/NEWS +++ b/NEWS @@ -130,6 +130,9 @@ CHANGES WITH 255 in spe: machinectl bind and mount-image verbs will now cause the new mount to replace the old mount (if any), instead of overmounting it. + * Units now have a MemoryPeak property, which contains the value of + cgroup v2's memory.peak property. + TPM2 Support + Disk Encryption & Authentication: * systemd-cryptenroll now allows specifying a PCR bank and explicit hash diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 199ce4f14ca..466d71c55b1 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2776,6 +2776,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -3403,6 +3405,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4035,6 +4039,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4831,6 +4837,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -5468,6 +5476,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6082,6 +6092,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6752,6 +6764,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -7317,6 +7331,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -7845,6 +7861,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -8638,6 +8656,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -9189,6 +9209,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -9703,6 +9725,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -10355,6 +10379,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -10532,6 +10558,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10710,6 +10738,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10916,6 +10946,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -11113,6 +11145,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11321,6 +11355,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11716,8 +11752,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MountImagePolicy, and ExtensionImagePolicy were added in version 254. NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Socket Unit Objects @@ -11743,8 +11780,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ PollLimitIntervalUSec, PollLimitBurst, NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Mount Unit Objects @@ -11768,8 +11806,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MountImagePolicy, and ExtensionImagePolicy were added in version 254. NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Swap Unit Objects @@ -11793,8 +11832,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MountImagePolicy, and ExtensionImagePolicy were added in version 254. NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Slice Unit Objects @@ -11809,8 +11849,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ StartupMemoryZSwapMax, MemoryPressureWatch, and MemoryPressureThresholdUSec were added in version 254. - NFTSet and - CoredumpReceive were added in version 255. + NFTSet, + CoredumpReceive, and + MemoryPeak were added in version 255. Scope Unit Objects @@ -11826,8 +11867,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ StartupMemoryZSwapMax, MemoryPressureWatch, and MemoryPressureThresholdUSec were added in version 254. - NFTSet and - CoredumpReceive were added in version 255. + NFTSet, + CoredumpReceive, and + MemoryPeak were added in version 255. Job Objects diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 535f457fa83..53339f52fe6 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4040,6 +4040,60 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { return cg_get_attribute_as_uint64("memory", u->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret); } +static int unit_get_memory_peak_raw(Unit *u, uint64_t *ret) { + int r; + + assert(u); + assert(ret); + + if (!u->cgroup_path) + return -ENODATA; + + /* The root cgroup doesn't expose this information. */ + if (unit_has_host_root_cgroup(u)) + return -ENODATA; + + if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) + return -ENODATA; + + r = cg_all_unified(); + if (r < 0) + return r; + if (!r) + return -ENODATA; + + return cg_get_attribute_as_uint64("memory", u->cgroup_path, "memory.peak", ret); +} + +int unit_get_memory_peak(Unit *u, uint64_t *ret) { + uint64_t bytes; + int r; + + assert(u); + assert(ret); + + if (!UNIT_CGROUP_BOOL(u, memory_accounting)) + return -ENODATA; + + r = unit_get_memory_peak_raw(u, &bytes); + if (r == -ENODATA && u->memory_peak_last != UINT64_MAX) { + /* If we can't get the memory peak anymore (because the cgroup was already removed, for example), + * use our cached value. */ + + if (ret) + *ret = u->memory_peak_last; + return 0; + } + if (r < 0) + return r; + + u->memory_peak_last = bytes; + if (ret) + *ret = bytes; + + return 0; +} + int unit_get_tasks_current(Unit *u, uint64_t *ret) { assert(u); assert(ret); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index d7cc842835b..0b073672a3a 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -353,6 +353,7 @@ int unit_watch_all_pids(Unit *u); int unit_synthesize_cgroup_empty_event(Unit *u); int unit_get_memory_current(Unit *u, uint64_t *ret); +int unit_get_memory_peak(Unit *u, uint64_t *ret); int unit_get_memory_available(Unit *u, uint64_t *ret); int unit_get_tasks_current(Unit *u, uint64_t *ret); int unit_get_cpu_usage(Unit *u, nsec_t *ret); diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 2fff6e135d3..b3029ec1580 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1080,6 +1080,29 @@ static int property_get_current_memory( return sd_bus_message_append(reply, "t", sz); } +static int property_get_peak_memory( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t sz = UINT64_MAX; + Unit *u = ASSERT_PTR(userdata); + int r; + + assert(bus); + assert(reply); + + r = unit_get_memory_peak(u, &sz); + if (r < 0 && r != -ENODATA) + log_unit_warning_errno(u, r, "Failed to get memory.peak attribute: %m"); + + return sd_bus_message_append(reply, "t", sz); +} + static int property_get_available_memory( sd_bus *bus, const char *path, @@ -1536,6 +1559,7 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0), SD_BUS_PROPERTY("ControlGroupId", "t", NULL, offsetof(Unit, cgroup_id), 0), SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0), + SD_BUS_PROPERTY("MemoryPeak", "t", property_get_peak_memory, 0, 0), SD_BUS_PROPERTY("MemoryAvailable", "t", property_get_available_memory, 0, 0), SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0), SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0), diff --git a/src/core/unit.c b/src/core/unit.c index c3613ca8fb5..02c5cbd1020 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -114,6 +114,7 @@ Unit* unit_new(Manager *m, size_t size) { u->ref_uid = UID_INVALID; u->ref_gid = GID_INVALID; u->cpu_usage_last = NSEC_INFINITY; + u->memory_peak_last = UINT64_MAX; u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; u->failure_action_exit_status = u->success_action_exit_status = -1; diff --git a/src/core/unit.h b/src/core/unit.h index 44d0cd2e41b..4070ccc5d46 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -365,6 +365,9 @@ typedef struct Unit { nsec_t cpu_usage_base; nsec_t cpu_usage_last; /* the most recently read value */ + /* Most recently read value of memory.peak */ + uint64_t memory_peak_last; + /* The current counter of OOM kills initiated by systemd-oomd */ uint64_t managed_oom_kill_last; diff --git a/src/systemctl/systemctl-show.c b/src/systemctl/systemctl-show.c index 0cb75d5dfc1..1d061abc6b6 100644 --- a/src/systemctl/systemctl-show.c +++ b/src/systemctl/systemctl-show.c @@ -250,6 +250,7 @@ typedef struct UnitStatusInfo { /* CGroup */ uint64_t memory_current; + uint64_t memory_peak; uint64_t memory_min; uint64_t memory_low; uint64_t startup_memory_low; @@ -702,7 +703,8 @@ static void print_status_info( if (i->memory_current != UINT64_MAX) { printf(" Memory: %s", FORMAT_BYTES(i->memory_current)); - if (i->memory_min > 0 || + if (i->memory_peak != CGROUP_LIMIT_MAX || + i->memory_min > 0 || i->memory_low > 0 || i->startup_memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->startup_memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX || i->startup_memory_max != CGROUP_LIMIT_MAX || @@ -765,6 +767,10 @@ static void print_status_info( printf("%savailable: %s", prefix, FORMAT_BYTES(i->memory_available)); prefix = " "; } + if (i->memory_peak != CGROUP_LIMIT_MAX) { + printf("%speak: %s", prefix, FORMAT_BYTES(i->memory_peak)); + prefix = " "; + } printf(")"); } printf("\n"); @@ -2031,6 +2037,7 @@ static int show_one( { "Where", "s", NULL, offsetof(UnitStatusInfo, where) }, { "What", "s", NULL, offsetof(UnitStatusInfo, what) }, { "MemoryCurrent", "t", NULL, offsetof(UnitStatusInfo, memory_current) }, + { "MemoryPeak", "t", NULL, offsetof(UnitStatusInfo, memory_peak) }, { "MemoryAvailable", "t", NULL, offsetof(UnitStatusInfo, memory_available) }, { "DefaultMemoryMin", "t", NULL, offsetof(UnitStatusInfo, default_memory_min) }, { "DefaultMemoryLow", "t", NULL, offsetof(UnitStatusInfo, default_memory_low) }, @@ -2087,6 +2094,7 @@ static int show_one( .memory_zswap_max = CGROUP_LIMIT_MAX, .startup_memory_zswap_max = CGROUP_LIMIT_MAX, .memory_limit = CGROUP_LIMIT_MAX, + .memory_peak = CGROUP_LIMIT_MAX, .memory_available = CGROUP_LIMIT_MAX, .cpu_usage_nsec = UINT64_MAX, .tasks_current = UINT64_MAX,