From aac3384e56fc863c1de0edf40ce793c1fb2eed90 Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Wed, 8 Nov 2023 19:22:06 +0100 Subject: [PATCH] cgroup: add support for memory.swap.peak --- NEWS | 5 ++- man/org.freedesktop.systemd1.xml | 66 ++++++++++++++++++++++++++------ src/core/cgroup.c | 54 ++++++++++++++++++++++++++ src/core/cgroup.h | 1 + src/core/dbus-unit.c | 24 ++++++++++++ src/core/unit.c | 25 ++++++++++-- src/core/unit.h | 3 ++ src/run/run.c | 6 +++ src/systemctl/systemctl-show.c | 9 +++++ 9 files changed, 176 insertions(+), 17 deletions(-) diff --git a/NEWS b/NEWS index c9f1599f78c..da8008107a4 100644 --- a/NEWS +++ b/NEWS @@ -130,8 +130,9 @@ CHANGES WITH 255 in spe: machinectl bind and mount-image verbs will now cause the new mount to replace the old mount (if any), instead of overmounting it. - * Units now have a MemoryPeak property, which contains the value of - cgroup v2's memory.peak property. + * Units now have a MemoryPeak and MemorySwapPeak property, which + contain the value of cgroup v2's memory.peak and memory.swap.peak + property. TPM2 Support + Disk Encryption & Authentication: diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 466d71c55b1..4a110d1e241 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2778,6 +2778,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryPeak = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemorySwapPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -3407,6 +3409,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4041,6 +4045,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4839,6 +4845,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryPeak = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemorySwapPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -5478,6 +5486,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6094,6 +6104,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6766,6 +6778,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryPeak = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemorySwapPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -7333,6 +7347,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -7863,6 +7879,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -8658,6 +8676,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryPeak = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemorySwapPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -9211,6 +9231,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -9727,6 +9749,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -10381,6 +10405,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryPeak = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemorySwapPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -10560,6 +10586,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10740,6 +10768,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10948,6 +10978,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryPeak = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemorySwapPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -11147,6 +11179,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11357,6 +11391,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11753,8 +11789,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ExtensionImagePolicy were added in version 254. NFTSet, SetLoginEnvironment, - CoredumpReceive, and - MemoryPeak were added in version 255. + CoredumpReceive, + MemoryPeak, and + MemorySwapPeak were added in version 255. Socket Unit Objects @@ -11781,8 +11818,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ PollLimitBurst, NFTSet, SetLoginEnvironment, - CoredumpReceive, and - MemoryPeak were added in version 255. + CoredumpReceive, + MemoryPeak, and + MemorySwapPeak were added in version 255. Mount Unit Objects @@ -11807,8 +11845,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ExtensionImagePolicy were added in version 254. NFTSet, SetLoginEnvironment, - CoredumpReceive, and - MemoryPeak were added in version 255. + CoredumpReceive, + MemoryPeak, and + MemorySwapPeak were added in version 255. Swap Unit Objects @@ -11833,8 +11872,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ExtensionImagePolicy were added in version 254. NFTSet, SetLoginEnvironment, - CoredumpReceive, and - MemoryPeak were added in version 255. + CoredumpReceive, + MemoryPeak, and + MemorySwapPeak were added in version 255. Slice Unit Objects @@ -11850,8 +11890,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemoryPressureWatch, and MemoryPressureThresholdUSec were added in version 254. NFTSet, - CoredumpReceive, and - MemoryPeak were added in version 255. + CoredumpReceive, + MemoryPeak, and + MemorySwapPeak were added in version 255. Scope Unit Objects @@ -11868,8 +11909,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MemoryPressureWatch, and MemoryPressureThresholdUSec were added in version 254. NFTSet, - CoredumpReceive, and - MemoryPeak were added in version 255. + CoredumpReceive, + MemoryPeak, and + MemorySwapPeak were added in version 255. Job Objects diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 53339f52fe6..43e38de78de 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4094,6 +4094,60 @@ int unit_get_memory_peak(Unit *u, uint64_t *ret) { return 0; } +static int unit_get_memory_swap_peak_raw(Unit *u, uint64_t *ret) { + int r; + + assert(u); + assert(ret); + + if (!u->cgroup_path) + return -ENODATA; + + /* The root cgroup doesn't expose this information. */ + if (unit_has_host_root_cgroup(u)) + return -ENODATA; + + if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) + return -ENODATA; + + r = cg_all_unified(); + if (r < 0) + return r; + if (!r) + return -ENODATA; + + return cg_get_attribute_as_uint64("memory", u->cgroup_path, "memory.swap.peak", ret); +} + +int unit_get_memory_swap_peak(Unit *u, uint64_t *ret) { + uint64_t bytes; + int r; + + assert(u); + assert(ret); + + if (!UNIT_CGROUP_BOOL(u, memory_accounting)) + return -ENODATA; + + r = unit_get_memory_swap_peak_raw(u, &bytes); + if (r == -ENODATA && u->memory_swap_peak_last != UINT64_MAX) { + /* If we can't get the memory peak anymore (because the cgroup was already removed, for example), + * use our cached value. */ + + if (ret) + *ret = u->memory_swap_peak_last; + return 0; + } + if (r < 0) + return r; + + u->memory_swap_peak_last = bytes; + if (ret) + *ret = bytes; + + return 0; +} + int unit_get_tasks_current(Unit *u, uint64_t *ret) { assert(u); assert(ret); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 0b073672a3a..836946192ab 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -354,6 +354,7 @@ int unit_synthesize_cgroup_empty_event(Unit *u); int unit_get_memory_current(Unit *u, uint64_t *ret); int unit_get_memory_peak(Unit *u, uint64_t *ret); +int unit_get_memory_swap_peak(Unit *u, uint64_t *ret); int unit_get_memory_available(Unit *u, uint64_t *ret); int unit_get_tasks_current(Unit *u, uint64_t *ret); int unit_get_cpu_usage(Unit *u, nsec_t *ret); diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index b3029ec1580..c229972af51 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1103,6 +1103,29 @@ static int property_get_peak_memory( return sd_bus_message_append(reply, "t", sz); } +static int property_get_peak_swap_memory( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t sz = UINT64_MAX; + Unit *u = ASSERT_PTR(userdata); + int r; + + assert(bus); + assert(reply); + + r = unit_get_memory_swap_peak(u, &sz); + if (r < 0 && r != -ENODATA) + log_unit_warning_errno(u, r, "Failed to get memory.swap.peak attribute: %m"); + + return sd_bus_message_append(reply, "t", sz); +} + static int property_get_available_memory( sd_bus *bus, const char *path, @@ -1560,6 +1583,7 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_PROPERTY("ControlGroupId", "t", NULL, offsetof(Unit, cgroup_id), 0), SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0), SD_BUS_PROPERTY("MemoryPeak", "t", property_get_peak_memory, 0, 0), + SD_BUS_PROPERTY("MemorySwapPeak", "t", property_get_peak_swap_memory, 0, 0), SD_BUS_PROPERTY("MemoryAvailable", "t", property_get_available_memory, 0, 0), SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0), SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0), diff --git a/src/core/unit.c b/src/core/unit.c index 4ac8d0adc03..924828d2d84 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -115,6 +115,7 @@ Unit* unit_new(Manager *m, size_t size) { u->ref_gid = GID_INVALID; u->cpu_usage_last = NSEC_INFINITY; u->memory_peak_last = UINT64_MAX; + u->memory_swap_peak_last = UINT64_MAX; u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; u->failure_action_exit_status = u->success_action_exit_status = -1; @@ -2319,14 +2320,14 @@ static int raise_level(int log_level, bool condition_info, bool condition_notice } static int unit_log_resources(Unit *u) { - struct iovec iovec[1 + 1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + _CGROUP_IO_ACCOUNTING_METRIC_MAX + 4]; + struct iovec iovec[1 + 2 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + _CGROUP_IO_ACCOUNTING_METRIC_MAX + 4]; bool any_traffic = false, have_ip_accounting = false, any_io = false, have_io_accounting = false; _cleanup_free_ char *igress = NULL, *egress = NULL, *rr = NULL, *wr = NULL; int log_level = LOG_DEBUG; /* May be raised if resources consumed over a threshold */ size_t n_message_parts = 0, n_iovec = 0; - char* message_parts[1 + 1 + 2 + 2 + 1], *t; + char* message_parts[1 + 2 + 2 + 2 + 1], *t; nsec_t nsec = NSEC_INFINITY; - uint64_t memory_peak = UINT64_MAX; + uint64_t memory_peak = UINT64_MAX, memory_swap_peak = UINT64_MAX; int r; const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = { [CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES", @@ -2388,6 +2389,24 @@ static int unit_log_resources(Unit *u) { message_parts[n_message_parts++] = t; } + (void) unit_get_memory_swap_peak(u, &memory_swap_peak); + if (memory_swap_peak != UINT64_MAX) { + /* Format peak swap memory for inclusion in the structured log message */ + if (asprintf(&t, "MEMORY_SWAP_PEAK=%" PRIu64, memory_swap_peak) < 0) { + r = log_oom(); + goto finish; + } + iovec[n_iovec++] = IOVEC_MAKE_STRING(t); + + /* Format peak swap memory for inclusion in the human language message string */ + t = strjoin(FORMAT_BYTES(memory_swap_peak), " memory swap peak"); + if (!t) { + r = log_oom(); + goto finish; + } + message_parts[n_message_parts++] = t; + } + for (CGroupIOAccountingMetric k = 0; k < _CGROUP_IO_ACCOUNTING_METRIC_MAX; k++) { uint64_t value = UINT64_MAX; diff --git a/src/core/unit.h b/src/core/unit.h index 4070ccc5d46..ded2f9d7d54 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -368,6 +368,9 @@ typedef struct Unit { /* Most recently read value of memory.peak */ uint64_t memory_peak_last; + /* Most recently read value of memory.swap.peak */ + uint64_t memory_swap_peak_last; + /* The current counter of OOM kills initiated by systemd-oomd */ uint64_t managed_oom_kill_last; diff --git a/src/run/run.c b/src/run/run.c index 1b20a8f4591..fdca8fa9811 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -1050,6 +1050,7 @@ typedef struct RunContext { char *result; uint64_t cpu_usage_nsec; uint64_t memory_peak; + uint64_t memory_swap_peak; uint64_t ip_ingress_bytes; uint64_t ip_egress_bytes; uint64_t io_read_bytes; @@ -1112,6 +1113,7 @@ static int run_context_update(RunContext *c, const char *path) { { "ExecMainStatus", "i", NULL, offsetof(RunContext, exit_status) }, { "CPUUsageNSec", "t", NULL, offsetof(RunContext, cpu_usage_nsec) }, { "MemoryPeak", "t", NULL, offsetof(RunContext, memory_peak) }, + { "MemorySwapPeak", "t", NULL, offsetof(RunContext, memory_swap_peak) }, { "IPIngressBytes", "t", NULL, offsetof(RunContext, ip_ingress_bytes) }, { "IPEgressBytes", "t", NULL, offsetof(RunContext, ip_egress_bytes) }, { "IOReadBytes", "t", NULL, offsetof(RunContext, io_read_bytes) }, @@ -1394,6 +1396,7 @@ static int start_transient_service(sd_bus *bus) { _cleanup_(run_context_free) RunContext c = { .cpu_usage_nsec = NSEC_INFINITY, .memory_peak = UINT64_MAX, + .memory_swap_peak = UINT64_MAX, .ip_ingress_bytes = UINT64_MAX, .ip_egress_bytes = UINT64_MAX, .io_read_bytes = UINT64_MAX, @@ -1492,6 +1495,9 @@ static int start_transient_service(sd_bus *bus) { if (c.memory_peak != UINT64_MAX) log_info("Memory peak: %s", FORMAT_BYTES(c.memory_peak)); + if (c.memory_swap_peak != UINT64_MAX) + log_info("Memory swap peak: %s", FORMAT_BYTES(c.memory_swap_peak)); + if (c.ip_ingress_bytes != UINT64_MAX) log_info("IP traffic received: %s", FORMAT_BYTES(c.ip_ingress_bytes)); diff --git a/src/systemctl/systemctl-show.c b/src/systemctl/systemctl-show.c index 1d061abc6b6..a91abfc78e9 100644 --- a/src/systemctl/systemctl-show.c +++ b/src/systemctl/systemctl-show.c @@ -251,6 +251,7 @@ typedef struct UnitStatusInfo { /* CGroup */ uint64_t memory_current; uint64_t memory_peak; + uint64_t memory_swap_peak; uint64_t memory_min; uint64_t memory_low; uint64_t startup_memory_low; @@ -703,7 +704,9 @@ static void print_status_info( if (i->memory_current != UINT64_MAX) { printf(" Memory: %s", FORMAT_BYTES(i->memory_current)); + bool show_memory_swap_peak = !IN_SET(i->memory_swap_peak, 0, CGROUP_LIMIT_MAX); if (i->memory_peak != CGROUP_LIMIT_MAX || + show_memory_swap_peak || i->memory_min > 0 || i->memory_low > 0 || i->startup_memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->startup_memory_high != CGROUP_LIMIT_MAX || @@ -771,6 +774,10 @@ static void print_status_info( printf("%speak: %s", prefix, FORMAT_BYTES(i->memory_peak)); prefix = " "; } + if (show_memory_swap_peak) { + printf("%sswap peak: %s", prefix, FORMAT_BYTES(i->memory_swap_peak)); + prefix = " "; + } printf(")"); } printf("\n"); @@ -2038,6 +2045,7 @@ static int show_one( { "What", "s", NULL, offsetof(UnitStatusInfo, what) }, { "MemoryCurrent", "t", NULL, offsetof(UnitStatusInfo, memory_current) }, { "MemoryPeak", "t", NULL, offsetof(UnitStatusInfo, memory_peak) }, + { "MemorySwapPeak", "t", NULL, offsetof(UnitStatusInfo, memory_swap_peak) }, { "MemoryAvailable", "t", NULL, offsetof(UnitStatusInfo, memory_available) }, { "DefaultMemoryMin", "t", NULL, offsetof(UnitStatusInfo, default_memory_min) }, { "DefaultMemoryLow", "t", NULL, offsetof(UnitStatusInfo, default_memory_low) }, @@ -2095,6 +2103,7 @@ static int show_one( .startup_memory_zswap_max = CGROUP_LIMIT_MAX, .memory_limit = CGROUP_LIMIT_MAX, .memory_peak = CGROUP_LIMIT_MAX, + .memory_swap_peak = CGROUP_LIMIT_MAX, .memory_available = CGROUP_LIMIT_MAX, .cpu_usage_nsec = UINT64_MAX, .tasks_current = UINT64_MAX,