1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-27 07:22:31 +03:00

Merge pull request #26448 from poettering/sd-event-mempress

sd-event: add memory pressure event source
This commit is contained in:
Lennart Poettering 2023-02-22 16:23:59 +01:00 committed by GitHub
commit 921330af40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 1210 additions and 1 deletions

View File

@ -549,3 +549,18 @@ Whenever the system transitions to a new runtime phase, the specified PCR is
extended with a different string, to ensure that security policies for
TPM-bound secrets and other resources are limited to specific phases of the
runtime.
-- f9b0be465ad540d0850ad32172d57c21
Subject: Memory Trimmed
Defined-By: systemd
Support: %SUPPORT_URL%
Memory of process @_PID@ (@_COMM@) has been trimmed.
Either on user request or as result of a memory pressure event, memory of the
process has been trimmed, returning unneded allocation caches and other
resources back to the OS kernel, making them available for other components of
the OS.
@TRIMMED_BYTES@ of memory were returned to the OS, which took @TRIMMED_USEC@
micro-seconds (µs).

View File

@ -555,6 +555,12 @@ manpages = [
'sd_event_source_set_io_fd',
'sd_event_source_set_io_fd_own'],
''],
['sd_event_add_memory_pressure',
'3',
['sd_event_source_set_memory_pressure_period',
'sd_event_source_set_memory_pressure_type',
'sd_event_trim_memory'],
''],
['sd_event_add_signal',
'3',
['SD_EVENT_SIGNAL_PROCMASK',

View File

@ -0,0 +1,270 @@
<?xml version='1.0'?>
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
<refentry id="sd_event_add_memory_pressure" xmlns:xi="http://www.w3.org/2001/XInclude">
<refentryinfo>
<title>sd_event_add_memory_pressure</title>
<productname>systemd</productname>
</refentryinfo>
<refmeta>
<refentrytitle>sd_event_add_memory_pressure</refentrytitle>
<manvolnum>3</manvolnum>
</refmeta>
<refnamediv>
<refname>sd_event_add_memory_pressure</refname>
<refname>sd_event_source_set_memory_pressure_type</refname>
<refname>sd_event_source_set_memory_pressure_period</refname>
<refname>sd_event_trim_memory</refname>
<refpurpose>Add and configure an event source run as result of memory pressure</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis>
<funcsynopsisinfo>#include &lt;systemd/sd-event.h&gt;</funcsynopsisinfo>
<funcsynopsisinfo><token>typedef</token> struct sd_event_source sd_event_source;</funcsynopsisinfo>
<funcprototype>
<funcdef>int <function>sd_event_add_memory_pressure</function></funcdef>
<paramdef>sd_event *<parameter>event</parameter></paramdef>
<paramdef>sd_event_source **<parameter>ret_source</parameter></paramdef>
<paramdef>sd_event_handler_t <parameter>handler</parameter></paramdef>
<paramdef>void *<parameter>userdata</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>int <function>sd_event_source_set_memory_pressure_type</function></funcdef>
<paramdef>sd_event_source *<parameter>source</parameter></paramdef>
<paramdef>const char *<parameter>type</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>int <function>sd_event_source_set_memory_pressure_period</function></funcdef>
<paramdef>sd_event_source *<parameter>source</parameter></paramdef>
<paramdef>uint64_t <parameter>threshold_usec</parameter></paramdef>
<paramdef>uint64_t <parameter>window_usec</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>int <function>sd_event_trim_memory</function></funcdef>
<paramdef>void</paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para><function>sd_event_add_memory_pressure()</function> adds a new event source that is triggered
whenever memory pressure is seen. This functionality is built around the Linux kernel's <ulink
url="https://docs.kernel.org/accounting/psi.html">Pressure Stall Information (PSI)</ulink> logic.</para>
<para>Expects an event loop object as first parameter, and returns the allocated event source object in
the second parameter, on success. The <parameter>handler</parameter> parameter is a function to call when
memory pressure is seen, or <constant>NULL</constant>. The handler function will be passed the
<parameter>userdata</parameter> pointer, which may be chosen freely by the caller. The handler may return
negative to signal an error (see below), other return values are ignored. If
<parameter>handler</parameter> is <constant>NULL</constant>, a default handler that compacts allocation
caches maintained by <filename>libsystemd</filename> as well as glibc (via <citerefentry
project='man-pages'><refentrytitle>malloc_trim</refentrytitle><manvolnum>3</manvolnum></citerefentry>)
will be used.</para>
<para>To destroy an event source object use
<citerefentry><refentrytitle>sd_event_source_unref</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
but note that the event source is only removed from the event loop when all references to the event
source are dropped. To make sure an event source does not fire anymore, even if it is still referenced,
disable the event source using
<citerefentry><refentrytitle>sd_event_source_set_enabled</refentrytitle><manvolnum>3</manvolnum></citerefentry>
with <constant>SD_EVENT_OFF</constant>.</para>
<para>If the second parameter of <function>sd_event_add_memory_pressure()</function> is
<constant>NULL</constant> no reference to the event source object is returned. In this case the event
source is considered "floating", and will be destroyed implicitly when the event loop itself is
destroyed.</para>
<para>The event source will fire according to the following logic:</para>
<orderedlist>
<listitem><para>If the
<varname>$MEMORY_PRESSURE_WATCH</varname>/<varname>$MEMORY_PRESSURE_WRITE</varname> environment
variables are set at the time the event source is established, it will watch the file, FIFO or AF_UNIX
socket specified via <varname>$MEMORY_PRESSURE_WATCH</varname> (which must contain an absolute path
name) for <constant>POLLPRI</constant> (in case it is a regular file) or <constant>POLLIN</constant>
events (otherwise). After opening the inode, it will write the (decoded) Base64 data provided via
<varname>$MEMORY_PRESSURE_WRITE</varname> into it before it starts polling on it (the variable may be
unset in which case this is skipped). Typically, if used, <varname>$MEMORY_PRESSURE_WATCH</varname>
will contain a path such as <filename>/proc/pressure/memory</filename> or a path to a specific
<filename>memory.pressure</filename> file in the control group file system
(cgroupfs).</para></listitem>
<listitem><para>If these environment variables are not set, the local PSI interface file
<filename>memory.pressure</filename> of the control group the invoking process is running in is
used.</para></listitem>
<listitem><para>If that file does not exist, the system-wide PSI interface file
<filename>/proc/pressure/memory</filename> is watched instead.</para></listitem>
</orderedlist>
<para>Or in other words: preferably any explicit configuration passed in by an invoking service manager
(or similar) is used as notification source, before falling back to local notifications of the service,
and finally to global notifications of the system.</para>
<para>Well-behaving services and applications are recommended to react to memory pressure events by
executing one or more of the following operations, in order to ensure optimal behaviour even on loaded
and resource-constrained systems:</para>
<itemizedlist>
<listitem><para>Release allocation caches such as <function>malloc_trim()</function> or similar, both
implemented in the libraries consumed by the program and in private allocation caches of the program
itself.</para></listitem>
<listitem><para>Release any other form of in-memory caches that can easily be recovered if
needed (e.g. browser caches).</para></listitem>
<listitem><para>Terminate idle worker threads or processes, or similar.</para></listitem>
<listitem><para>Even exit entirely from the program if it is idle and can be automatically started when
needed (for example via socket or bus activation).</para></listitem>
</itemizedlist>
<para>Any of the suggested operations should help easing memory pressure situations and allowing the
system to make progress by reclaiming the memory for other purposes.</para>
<para>This event source typically fires on memory pressure stalls, i.e. when operational latency above a
configured threshold already has been seen. This should be taken into consideration when discussing
whether later latency to re-aquire any released resources is acceptable: it's usually more important to
think of the latencies that already happened than those coming up in future.</para>
<para>The <function>sd_event_source_set_memory_pressure_type()</function> and
<function>sd_event_source_set_memory_pressure_period()</function> functions can be used to fine-tune the
PSI parameters for pressure notifications. The former takes either <literal>some</literal>,
<literal>full</literal> as second parameter, the latter takes threshold and period times in microseconds
as parameters. For details about these three parameters see the PSI documentation. Note that these two
calls must be invoked immediately after allocating the event source, as they must be configured before
polling begins. Also note that these calls will fail if memory pressure paramterization has been passed
in via the <varname>$MEMORY_PRESSURE_WATCH</varname>/<varname>$MEMORY_PRESSURE_WRITE</varname>
environment variables (or in other words: configuration supplied by a service manager wins over internal
settings).</para>
<para>The <function>sd_event_trim_memory()</function> function releases various internal allocation
caches maintained by <filename>libsystemd</filename> and then invokes glibc's <citerefentry
project='man-pages'><refentrytitle>malloc_trim</refentrytitle><manvolnum>3</manvolnum></citerefentry>. This
makes the operation executed when the handler function parameter of
<function>sd_event_add_memory_pressure</function> is passed as <constant>NULL</constant> directly
accessible for invocation at any time (see above). This function will log a structured log message at
<constant>LOG_DEBUG</constant> level (with message ID f9b0be465ad540d0850ad32172d57c21) about the memory
pressure operation.</para>
</refsect1>
<refsect1>
<title>Return Value</title>
<para>On success, these functions return 0 or a positive
integer. On failure, they return a negative errno-style error
code.</para>
<refsect2>
<title>Errors</title>
<para>Returned errors may indicate the following problems:</para>
<variablelist>
<varlistentry>
<term><constant>-ENOMEM</constant></term>
<listitem><para>Not enough memory to allocate an object.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EINVAL</constant></term>
<listitem><para>An invalid argument has been passed.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EHOSTDOWN</constant></term>
<listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname> variable has been set to the literal
string <filename>/dev/null</filename>, in order to explicitly disable memory pressure
handling.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EBADMSG</constant></term>
<listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname> variable has been set to an invalid
string, for example a relative rather than an absolute path.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-ENOTTY</constant></term>
<listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname> variable points to a regular file
outside of the procfs or cgroupfs file systems.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EOPNOTSUPP</constant></term>
<listitem><para>No configuration via <varname>$MEMORY_PRESSURE_WATCH</varname> has been specified
and the local kernel does not support the PSI interface.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EBUSY</constant></term>
<listitem><para>This is returned by <function>sd_event_source_set_memory_pressure_type()</function>
and <function>sd_event_source_set_memory_pressure_period()</function> if invoked on event sources
at a time later than immediately after allocting them.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-ESTALE</constant></term>
<listitem><para>The event loop is already terminated.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-ECHILD</constant></term>
<listitem><para>The event loop has been created in a different process.</para></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EDOM</constant></term>
<listitem><para>The passed event source is not a signal event source.</para></listitem>
</varlistentry>
</variablelist>
</refsect2>
</refsect1>
<xi:include href="libsystemd-pkgconfig.xml" />
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd-event</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_new</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_add_io</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_add_time</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_add_child</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_add_inotify</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_add_defer</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_source_set_enabled</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_source_set_description</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_source_set_userdata</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_source_set_floating</refentrytitle><manvolnum>3</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>

View File

@ -28,3 +28,8 @@ int read_resource_pressure(const char *path, PressureType type, ResourcePressure
/* Was the kernel compiled with CONFIG_PSI=y? 1 if yes, 0 if not, negative on error. */
int is_pressure_supported(void);
/* Default parameters for memory pressure watch logic in sd-event and PID 1 */
#define MEMORY_PRESSURE_DEFAULT_TYPE "some"
#define MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC (100 * USEC_PER_MSEC)
#define MEMORY_PRESSURE_DEFAULT_WINDOW_USEC USEC_PER_SEC

View File

@ -27,6 +27,7 @@ typedef enum EventSourceType {
SOURCE_EXIT,
SOURCE_WATCHDOG,
SOURCE_INOTIFY,
SOURCE_MEMORY_PRESSURE,
_SOURCE_EVENT_SOURCE_TYPE_MAX,
_SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL,
} EventSourceType;
@ -129,6 +130,17 @@ struct sd_event_source {
struct inode_data *inode_data;
LIST_FIELDS(sd_event_source, by_inode_data);
} inotify;
struct {
int fd;
sd_event_handler_t callback;
void *write_buffer;
size_t write_buffer_size;
uint32_t events, revents;
LIST_FIELDS(sd_event_source, write_list);
bool registered:1;
bool locked:1;
bool in_write_list:1;
} memory_pressure;
};
};

View File

@ -7,6 +7,7 @@
#include "sd-daemon.h"
#include "sd-event.h"
#include "sd-id128.h"
#include "sd-messages.h"
#include "alloc-util.h"
#include "env-util.h"
@ -15,15 +16,22 @@
#include "fs-util.h"
#include "glyph-util.h"
#include "hashmap.h"
#include "hexdecoct.h"
#include "list.h"
#include "logarithm.h"
#include "macro.h"
#include "mallinfo-util.h"
#include "memory-util.h"
#include "missing_magic.h"
#include "missing_syscall.h"
#include "path-util.h"
#include "prioq.h"
#include "process-util.h"
#include "psi-util.h"
#include "set.h"
#include "signal-util.h"
#include "socket-util.h"
#include "stat-util.h"
#include "string-table.h"
#include "string-util.h"
#include "strxcpyx.h"
@ -63,6 +71,7 @@ static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX]
[SOURCE_EXIT] = "exit",
[SOURCE_WATCHDOG] = "watchdog",
[SOURCE_INOTIFY] = "inotify",
[SOURCE_MEMORY_PRESSURE] = "memory-pressure",
};
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
@ -85,7 +94,8 @@ DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
SOURCE_TIME_BOOTTIME_ALARM, \
SOURCE_SIGNAL, \
SOURCE_DEFER, \
SOURCE_INOTIFY)
SOURCE_INOTIFY, \
SOURCE_MEMORY_PRESSURE)
/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
* Time sources and ratelimited sources can be passed, so effectively this is the same as the
@ -130,6 +140,9 @@ struct sd_event {
/* A list of inotify objects that already have events buffered which aren't processed yet */
LIST_HEAD(struct inotify_data, buffered_inotify_data_list);
/* A list of memory pressure event sources that still need their subscription string written */
LIST_HEAD(sd_event_source, memory_pressure_write_list);
pid_t original_pid;
uint64_t iteration;
@ -524,6 +537,65 @@ static int source_child_pidfd_register(sd_event_source *s, int enabled) {
return 0;
}
static void source_memory_pressure_unregister(sd_event_source *s) {
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
if (event_pid_changed(s->event))
return;
if (!s->memory_pressure.registered)
return;
if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->memory_pressure.fd, NULL) < 0)
log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
strna(s->description), event_source_type_to_string(s->type));
s->memory_pressure.registered = false;
}
static int source_memory_pressure_register(sd_event_source *s, int enabled) {
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
assert(enabled != SD_EVENT_OFF);
struct epoll_event ev = {
.events = s->memory_pressure.write_buffer_size > 0 ? EPOLLOUT :
(s->memory_pressure.events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0)),
.data.ptr = s,
};
if (epoll_ctl(s->event->epoll_fd,
s->memory_pressure.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
s->memory_pressure.fd, &ev) < 0)
return -errno;
s->memory_pressure.registered = true;
return 0;
}
static void source_memory_pressure_add_to_write_list(sd_event_source *s) {
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
if (s->memory_pressure.in_write_list)
return;
LIST_PREPEND(memory_pressure.write_list, s->event->memory_pressure_write_list, s);
s->memory_pressure.in_write_list = true;
}
static void source_memory_pressure_remove_from_write_list(sd_event_source *s) {
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
if (!s->memory_pressure.in_write_list)
return;
LIST_REMOVE(memory_pressure.write_list, s->event->memory_pressure_write_list, s);
s->memory_pressure.in_write_list = false;
}
static clockid_t event_source_type_to_clock(EventSourceType t) {
switch (t) {
@ -947,6 +1019,11 @@ static void source_disconnect(sd_event_source *s) {
break;
}
case SOURCE_MEMORY_PRESSURE:
source_memory_pressure_remove_from_write_list(s);
source_memory_pressure_unregister(s);
break;
default:
assert_not_reached();
}
@ -1017,6 +1094,11 @@ static sd_event_source* source_free(sd_event_source *s) {
s->child.pidfd = safe_close(s->child.pidfd);
}
if (s->type == SOURCE_MEMORY_PRESSURE) {
s->memory_pressure.fd = safe_close(s->memory_pressure.fd);
s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer);
}
if (s->destroy_callback)
s->destroy_callback(s->userdata);
@ -1092,6 +1174,7 @@ static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType t
[SOURCE_POST] = endoffsetof_field(sd_event_source, post),
[SOURCE_EXIT] = endoffsetof_field(sd_event_source, exit),
[SOURCE_INOTIFY] = endoffsetof_field(sd_event_source, inotify),
[SOURCE_MEMORY_PRESSURE] = endoffsetof_field(sd_event_source, memory_pressure),
};
sd_event_source *s;
@ -1771,6 +1854,257 @@ _public_ int sd_event_add_exit(
return 0;
}
int sd_event_trim_memory(void) {
int r;
/* A default implementation of a memory pressure callback. Simply releases our own allocation caches
* and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a
* NULL callback parameter. */
log_debug("Memory pressure event, trimming malloc() memory.");
#if HAVE_GENERIC_MALLINFO
generic_mallinfo before_mallinfo = generic_mallinfo_get();
#endif
usec_t before_timestamp = now(CLOCK_MONOTONIC);
hashmap_trim_pools();
r = malloc_trim(0);
usec_t after_timestamp = now(CLOCK_MONOTONIC);
if (r > 0)
log_debug("Successfully trimmed some memory.");
else
log_debug("Couldn't trim any memory.");
usec_t period = after_timestamp - before_timestamp;
#if HAVE_GENERIC_MALLINFO
generic_mallinfo after_mallinfo = generic_mallinfo_get();
size_t l = LESS_BY((size_t) before_mallinfo.hblkhd, (size_t) after_mallinfo.hblkhd) +
LESS_BY((size_t) before_mallinfo.arena, (size_t) after_mallinfo.arena);
log_struct(LOG_DEBUG,
LOG_MESSAGE("Memory trimming took %s, returned %s to OS.",
FORMAT_TIMESPAN(period, 0),
FORMAT_BYTES(l)),
"MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR,
"TRIMMED_BYTES=%zu", l,
"TRIMMED_USEC=" USEC_FMT, period);
#else
log_struct(LOG_DEBUG,
LOG_MESSAGE("Memory trimming took %s.",
FORMAT_TIMESPAN(period, 0)),
"MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR,
"TRIMMED_USEC=" USEC_FMT, period);
#endif
return 0;
}
static int memory_pressure_callback(sd_event_source *s, void *userdata) {
assert(s);
sd_event_trim_memory();
return 0;
}
_public_ int sd_event_add_memory_pressure(
sd_event *e,
sd_event_source **ret,
sd_event_handler_t callback,
void *userdata) {
_cleanup_free_ char *w = NULL;
_cleanup_(source_freep) sd_event_source *s = NULL;
_cleanup_close_ int path_fd = -1, fd = -1;
_cleanup_free_ void *write_buffer = NULL;
const char *watch, *watch_fallback, *env;
size_t write_buffer_size = 0;
struct stat st;
uint32_t events;
bool locked;
int r;
assert_return(e, -EINVAL);
assert_return(e = event_resolve(e), -ENOPKG);
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(e), -ECHILD);
if (!callback)
callback = memory_pressure_callback;
s = source_new(e, !ret, SOURCE_MEMORY_PRESSURE);
if (!s)
return -ENOMEM;
s->wakeup = WAKEUP_EVENT_SOURCE;
s->memory_pressure.callback = callback;
s->userdata = userdata;
s->enabled = SD_EVENT_ON;
s->memory_pressure.fd = -EBADF;
env = secure_getenv("MEMORY_PRESSURE_WATCH");
if (env) {
if (isempty(env) || path_equal(env, "/dev/null"))
return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
"Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH.");
if (!path_is_absolute(env) || !path_is_normalized(env))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"$MEMORY_PRESSURE_WATCH set to invalid path: %s", env);
watch = env;
env = secure_getenv("MEMORY_PRESSURE_WRITE");
if (env) {
r = unbase64mem(env, SIZE_MAX, &write_buffer, &write_buffer_size);
if (r < 0)
return r;
}
locked = true;
} else {
r = is_pressure_supported();
if (r < 0)
return r;
if (r == 0)
return -EOPNOTSUPP;
/* By default we want to watch memory pressure on the local cgroup, but we'll fall back on
* the system wide pressure if for some reason we cannot (which could be: memory controller
* not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll
* only use the system-wide logic. */
r = cg_all_unified();
if (r < 0)
return r;
if (r == 0)
watch = "/proc/pressure/memory";
else {
_cleanup_free_ char *cg = NULL;
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cg);
if (r < 0)
return r;
w = path_join("/sys/fs/cgroup", cg, "memory.pressure");
if (!w)
return -ENOMEM;
watch = w;
watch_fallback = "/proc/pressure/memory";
}
/* Android uses three levels in its userspace low memory killer logic:
* some 70000 1000000
* some 100000 1000000
* full 70000 1000000
*
* GNOME's low memory monitor uses:
* some 70000 1000000
* some 100000 1000000
* full 100000 1000000
*
* We'll default to the middle level that both agree on */
if (asprintf((char**) &write_buffer,
"%s " USEC_FMT " " USEC_FMT,
MEMORY_PRESSURE_DEFAULT_TYPE,
MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC,
MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0)
return -ENOMEM;
write_buffer_size = strlen(write_buffer) + 1;
locked = false;
}
path_fd = open(watch, O_PATH|O_CLOEXEC);
if (path_fd < 0) {
if (errno != ENOENT)
return -errno;
/* We got ENOENT. Three options now: try the fallback if we have one, or return the error as
* is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and
* the PSI service apparently is not supported) */
if (!watch_fallback)
return locked ? -ENOENT : -EOPNOTSUPP;
path_fd = open(watch_fallback, O_PATH|O_CLOEXEC);
if (errno == ENOENT) /* PSI is not available in the kernel even under the fallback path? */
return -EOPNOTSUPP;
if (errno < 0)
return -errno;
}
if (fstat(path_fd, &st) < 0)
return -errno;
if (S_ISSOCK(st.st_mode)) {
fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
if (fd < 0)
return -errno;
r = connect_unix_path(fd, path_fd, NULL);
if (r < 0)
return r;
events = EPOLLIN;
} else if (S_ISREG(st.st_mode) || S_ISFIFO(st.st_mode) || S_ISCHR(st.st_mode)) {
fd = fd_reopen(path_fd, (write_buffer_size > 0 ? O_RDWR : O_RDONLY) |O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (fd < 0)
return fd;
if (S_ISREG(st.st_mode)) {
struct statfs sfs;
/* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */
if (fstatfs(fd, &sfs) < 0)
return -errno;
if (!is_fs_type(&sfs, PROC_SUPER_MAGIC) &&
!is_fs_type(&sfs, CGROUP2_SUPER_MAGIC))
return -ENOTTY;
events = EPOLLPRI;
} else
/* For fifos and char devices just watch for EPOLLIN */
events = EPOLLIN;
} else if (S_ISDIR(st.st_mode))
return -EISDIR;
else
return -EBADF;
s->memory_pressure.fd = TAKE_FD(fd);
s->memory_pressure.write_buffer = TAKE_PTR(write_buffer);
s->memory_pressure.write_buffer_size = write_buffer_size;
s->memory_pressure.events = events;
s->memory_pressure.locked = locked;
/* So here's the thing: if we are talking to PSI we need to write the watch string before adding the
* fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the
* fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure
* event sources on which writes must be executed before the first event loop iteration is
* executed. (We could also write the data here, right away, but we want to give the caller the
* freedom to call sd_event_source_set_memory_pressure_type() and
* sd_event_source_set_memory_pressure_rate() before we write it. */
if (s->memory_pressure.write_buffer_size > 0)
source_memory_pressure_add_to_write_list(s);
else {
r = source_memory_pressure_register(s, s->enabled);
if (r < 0)
return r;
}
if (ret)
*ret = s;
TAKE_PTR(s);
return 0;
}
static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
assert(e);
@ -2562,6 +2896,10 @@ static int event_source_offline(
prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
break;
case SOURCE_MEMORY_PRESSURE:
source_memory_pressure_unregister(s);
break;
case SOURCE_TIME_REALTIME:
case SOURCE_TIME_BOOTTIME:
case SOURCE_TIME_MONOTONIC:
@ -2649,6 +2987,13 @@ static int event_source_online(
s->event->n_online_child_sources++;
break;
case SOURCE_MEMORY_PRESSURE:
r = source_memory_pressure_register(s, enabled);
if (r < 0)
return r;
break;
case SOURCE_TIME_REALTIME:
case SOURCE_TIME_BOOTTIME:
case SOURCE_TIME_MONOTONIC:
@ -3630,6 +3975,106 @@ static int process_inotify(sd_event *e) {
return done;
}
static int process_memory_pressure(sd_event_source *s, uint32_t revents) {
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
if (s->pending)
s->memory_pressure.revents |= revents;
else
s->memory_pressure.revents = revents;
return source_set_pending(s, true);
}
static int source_memory_pressure_write(sd_event_source *s) {
ssize_t n;
int r;
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
/* once we start writing, the buffer is locked, we allow no further changes. */
s->memory_pressure.locked = true;
if (s->memory_pressure.write_buffer_size > 0) {
n = write(s->memory_pressure.fd, s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size);
if (n < 0) {
if (!ERRNO_IS_TRANSIENT(errno))
return -errno;
n = 0;
}
} else
n = 0;
assert(n >= 0);
if ((size_t) n == s->memory_pressure.write_buffer_size) {
s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer);
if (n > 0) {
s->memory_pressure.write_buffer_size = 0;
/* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */
r = source_memory_pressure_register(s, s->enabled);
if (r < 0)
return r;
}
} else if (n > 0) {
_cleanup_free_ void *c = NULL;
assert((size_t) n < s->memory_pressure.write_buffer_size);
c = memdup((uint8_t*) s->memory_pressure.write_buffer + n, s->memory_pressure.write_buffer_size - n);
if (!c)
return -ENOMEM;
free_and_replace(s->memory_pressure.write_buffer, c);
s->memory_pressure.write_buffer_size -= n;
return 1;
}
return 0;
}
static int source_memory_pressure_initiate_dispatch(sd_event_source *s) {
int r;
assert(s);
assert(s->type == SOURCE_MEMORY_PRESSURE);
r = source_memory_pressure_write(s);
if (r < 0)
return r;
if (r > 0)
return 1; /* if we wrote something, then don't continue with dispatching user dispatch
* function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */
/* No pending incoming IO? Then let's not continue further */
if ((s->memory_pressure.revents & (EPOLLIN|EPOLLPRI)) == 0) {
/* Treat IO errors on the notifier the same ways errors returned from a callback */
if ((s->memory_pressure.revents & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) != 0)
return -EIO;
return 1; /* leave dispatch, we already processed everything */
}
if (s->memory_pressure.revents & EPOLLIN) {
uint8_t pipe_buf[PIPE_BUF];
ssize_t n;
/* If the fd is readable, then flush out anything that might be queued */
n = read(s->memory_pressure.fd, pipe_buf, sizeof(pipe_buf));
if (n < 0 && !ERRNO_IS_TRANSIENT(errno))
return -errno;
}
return 0; /* go on, dispatch to user callback */
}
static int source_dispatch(sd_event_source *s) {
EventSourceType saved_type;
sd_event *saved_event;
@ -3678,6 +4123,16 @@ static int source_dispatch(sd_event_source *s) {
}
}
if (s->type == SOURCE_MEMORY_PRESSURE) {
r = source_memory_pressure_initiate_dispatch(s);
if (r == -EIO) /* handle EIO errors similar to callback errors */
goto finish;
if (r < 0)
return r;
if (r > 0) /* already handled */
return 1;
}
if (s->enabled == SD_EVENT_ONESHOT) {
r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
if (r < 0)
@ -3764,6 +4219,10 @@ static int source_dispatch(sd_event_source *s) {
break;
}
case SOURCE_MEMORY_PRESSURE:
r = s->memory_pressure.callback(s, s->userdata);
break;
case SOURCE_WATCHDOG:
case _SOURCE_EVENT_SOURCE_TYPE_MAX:
case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
@ -3772,6 +4231,7 @@ static int source_dispatch(sd_event_source *s) {
s->dispatching = false;
finish:
if (r < 0) {
log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
strna(s->description),
@ -3922,6 +4382,30 @@ static void event_close_inode_data_fds(sd_event *e) {
}
}
static int event_memory_pressure_write_list(sd_event *e) {
int r;
assert(e);
for (;;) {
sd_event_source *s;
s = LIST_POP(memory_pressure.write_list, e->memory_pressure_write_list);
if (!s)
break;
assert(s->type == SOURCE_MEMORY_PRESSURE);
assert(s->memory_pressure.write_buffer_size > 0);
s->memory_pressure.in_write_list = false;
r = source_memory_pressure_write(s);
if (r < 0)
return r;
}
return 0;
}
_public_ int sd_event_prepare(sd_event *e) {
int r;
@ -3950,6 +4434,10 @@ _public_ int sd_event_prepare(sd_event *e) {
if (r < 0)
return r;
r = event_memory_pressure_write_list(e);
if (r < 0)
return r;
r = event_arm_timer(e, &e->realtime);
if (r < 0)
return r;
@ -4115,6 +4603,10 @@ static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t
r = process_pidfd(e, s, e->event_queue[i].events);
break;
case SOURCE_MEMORY_PRESSURE:
r = process_memory_pressure(s, e->event_queue[i].events);
break;
default:
assert_not_reached();
}
@ -4700,3 +5192,91 @@ _public_ int sd_event_set_signal_exit(sd_event *e, int b) {
return change;
}
_public_ int sd_event_source_set_memory_pressure_type(sd_event_source *s, const char *ty) {
_cleanup_free_ char *b = NULL;
_cleanup_free_ void *w = NULL;
assert_return(s, -EINVAL);
assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM);
assert_return(ty, -EINVAL);
if (!STR_IN_SET(ty, "some", "full"))
return -EINVAL;
if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */
return -EBUSY;
char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size);
if (!space)
return -EINVAL;
size_t l = (char*) space - (char*) s->memory_pressure.write_buffer;
b = memdup_suffix0(s->memory_pressure.write_buffer, l);
if (!b)
return -ENOMEM;
if (!STR_IN_SET(b, "some", "full"))
return -EINVAL;
if (streq(b, ty))
return 0;
size_t nl = strlen(ty) + (s->memory_pressure.write_buffer_size - l);
w = new(char, nl);
if (!w)
return -ENOMEM;
memcpy(stpcpy(w, ty), space, (s->memory_pressure.write_buffer_size - l));
free_and_replace(s->memory_pressure.write_buffer, w);
s->memory_pressure.write_buffer_size = nl;
s->memory_pressure.locked = false;
return 1;
}
_public_ int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) {
_cleanup_free_ char *b = NULL;
_cleanup_free_ void *w = NULL;
assert_return(s, -EINVAL);
assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM);
if (threshold_usec <= 0 || threshold_usec >= UINT64_MAX)
return -ERANGE;
if (window_usec <= 0 || window_usec >= UINT64_MAX)
return -ERANGE;
if (threshold_usec > window_usec)
return -EINVAL;
if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */
return -EBUSY;
char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size);
if (!space)
return -EINVAL;
size_t l = (char*) space - (char*) s->memory_pressure.write_buffer;
b = memdup_suffix0(s->memory_pressure.write_buffer, l);
if (!b)
return -ENOMEM;
if (!STR_IN_SET(b, "some", "full"))
return -EINVAL;
if (asprintf((char**) &w,
"%s " USEC_FMT " " USEC_FMT "",
b,
threshold_usec,
window_usec) < 0)
return -EINVAL;
l = strlen(w) + 1;
if (memcmp_nn(s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size, w, l) == 0)
return 0;
free_and_replace(s->memory_pressure.write_buffer, w);
s->memory_pressure.write_buffer_size = l;
s->memory_pressure.locked = false;
return 1;
}

View File

@ -99,6 +99,7 @@ int sd_event_add_inotify_fd(sd_event *e, sd_event_source **s, int fd, uint32_t m
int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_add_memory_pressure(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_prepare(sd_event *e);
int sd_event_wait(sd_event *e, uint64_t usec);
@ -160,6 +161,8 @@ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo
int sd_event_source_send_child_signal(sd_event_source *s, int sig, const void *si, unsigned flags);
#endif
int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret);
int sd_event_source_set_memory_pressure_type(sd_event_source *e, const char *ty);
int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec);
int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback);
int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret);
int sd_event_source_get_floating(sd_event_source *s);
@ -171,6 +174,8 @@ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval_use
int sd_event_source_is_ratelimited(sd_event_source *s);
int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback);
int sd_event_trim_memory(void);
/* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */
_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref);
_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event_source, sd_event_source_unref);

View File

@ -195,6 +195,9 @@ _SD_BEGIN_DECLARATIONS;
#define SD_MESSAGE_TPM_PCR_EXTEND SD_ID128_MAKE(3f,7d,5e,f3,e5,4f,43,02,b4,f0,b1,43,bb,27,0c,ab)
#define SD_MESSAGE_TPM_PCR_EXTEND_STR SD_ID128_MAKE_STR(3f,7d,5e,f3,e5,4f,43,02,b4,f0,b1,43,bb,27,0c,ab)
#define SD_MESSAGE_MEMORY_TRIM SD_ID128_MAKE(f9,b0,be,46,5a,d5,40,d0,85,0a,d3,21,72,d5,7c,21)
#define SD_MESSAGE_MEMORY_TRIM_STR SD_ID128_MAKE_STR(f9,b0,be,46,5a,d5,40,d0,85,0a,d3,21,72,d5,7c,21)
_SD_END_DECLARATIONS;
#endif

View File

@ -386,6 +386,10 @@ tests += [
'sources' : files('test-math-util.c'),
'dependencies' : libm,
},
{
'sources' : files('test-mempress.c'),
'dependencies' : threads,
},
{
'sources' : files('test-namespace.c'),
'dependencies' : [

309
src/test/test-mempress.c Normal file
View File

@ -0,0 +1,309 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <fcntl.h>
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sd-bus.h>
#include <sd-event.h>
#include "bus-locator.h"
#include "bus-wait-for-jobs.h"
#include "fd-util.h"
#include "path-util.h"
#include "process-util.h"
#include "random-util.h"
#include "rm-rf.h"
#include "signal-util.h"
#include "socket-util.h"
#include "tests.h"
#include "tmpfile-util.h"
#include "unit-def.h"
struct fake_pressure_context {
int fifo_fd;
int socket_fd;
};
static void *fake_pressure_thread(void *p) {
_cleanup_free_ struct fake_pressure_context *c = ASSERT_PTR(p);
_cleanup_close_ int cfd = -1;
usleep(150);
assert_se(write(c->fifo_fd, &(const char) { 'x' }, 1) == 1);
usleep(150);
cfd = accept4(c->socket_fd, NULL, NULL, SOCK_CLOEXEC);
assert_se(cfd >= 0);
char buf[STRLEN("hello")+1] = {};
assert_se(read(cfd, buf, sizeof(buf)-1) == sizeof(buf)-1);
assert_se(streq(buf, "hello"));
assert_se(write(cfd, &(const char) { 'z' }, 1) == 1);
return 0;
}
static int fake_pressure_callback(sd_event_source *s, void *userdata) {
int *value = userdata;
const char *d;
assert_se(s);
assert_se(sd_event_source_get_description(s, &d) >= 0);
*value *= d[0];
log_notice("memory pressure event: %s", d);
if (*value == 7 * 'f' * 's')
assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
return 0;
}
TEST(fake_pressure) {
_cleanup_(sd_event_source_unrefp) sd_event_source *es = NULL, *ef = NULL;
_cleanup_(sd_event_unrefp) sd_event *e = NULL;
_cleanup_free_ char *j = NULL, *k = NULL;
_cleanup_(rm_rf_physical_and_freep) char *tmp = NULL;
_cleanup_close_ int fifo_fd = -1, socket_fd = -1;
union sockaddr_union sa;
pthread_t th;
int value = 7;
assert_se(sd_event_default(&e) >= 0);
assert_se(mkdtemp_malloc(NULL, &tmp) >= 0);
assert_se(j = path_join(tmp, "fifo"));
assert_se(mkfifo(j, 0600) >= 0);
fifo_fd = open(j, O_CLOEXEC|O_RDWR|O_NONBLOCK);
assert_se(fifo_fd >= 0);
assert_se(k = path_join(tmp, "sock"));
socket_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
assert_se(socket_fd >= 0);
assert_se(sockaddr_un_set_path(&sa.un, k) >= 0);
assert_se(bind(socket_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) >= 0);
assert_se(listen(socket_fd, 1) >= 0);
/* Ideally we'd just allocate this on the stack, but AddressSanitizer doesn't like it if threads
* access each other's stack */
struct fake_pressure_context *fp = new(struct fake_pressure_context, 1);
assert_se(fp);
*fp = (struct fake_pressure_context) {
.fifo_fd = fifo_fd,
.socket_fd = socket_fd,
};
assert_se(pthread_create(&th, NULL, fake_pressure_thread, TAKE_PTR(fp)) == 0);
assert_se(setenv("MEMORY_PRESSURE_WATCH", j, /* override= */ true) >= 0);
assert_se(unsetenv("MEMORY_PRESSURE_WRITE") >= 0);
assert_se(sd_event_add_memory_pressure(e, &es, fake_pressure_callback, &value) >= 0);
assert_se(sd_event_source_set_description(es, "fifo event source") >= 0);
assert_se(setenv("MEMORY_PRESSURE_WATCH", k, /* override= */ true) >= 0);
assert_se(setenv("MEMORY_PRESSURE_WRITE", "aGVsbG8K", /* override= */ true) >= 0);
assert_se(sd_event_add_memory_pressure(e, &ef, fake_pressure_callback, &value) >= 0);
assert_se(sd_event_source_set_description(ef, "socket event source") >= 0);
assert_se(sd_event_loop(e) >= 0);
assert_se(value == 7 * 'f' * 's');
assert_se(pthread_join(th, NULL) == 0);
}
struct real_pressure_context {
sd_event_source *pid;
};
static int real_pressure_callback(sd_event_source *s, void *userdata) {
struct real_pressure_context *c = ASSERT_PTR(userdata);
const char *d;
assert_se(s);
assert_se(sd_event_source_get_description(s, &d) >= 0);
log_notice("real_memory pressure event: %s", d);
sd_event_trim_memory();
assert_se(c->pid);
assert_se(sd_event_source_send_child_signal(c->pid, SIGKILL, NULL, 0) >= 0);
c->pid = NULL;
return 0;
}
#define MMAP_SIZE (10 * 1024 * 1024)
_noreturn_ static void real_pressure_eat_memory(int pipe_fd) {
size_t ate = 0;
/* Allocates and touches 10M at a time, until runs out of memory */
char x;
assert_se(read(pipe_fd, &x, 1) == 1); /* Wait for the GO! */
for (;;) {
void *p;
p = mmap(NULL, MMAP_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
assert_se(p != MAP_FAILED);
log_info("Eating another %s.", FORMAT_BYTES(MMAP_SIZE));
memset(p, random_u32() & 0xFF, MMAP_SIZE);
ate += MMAP_SIZE;
log_info("Ate %s in total.", FORMAT_BYTES(ate));
usleep(50 * USEC_PER_MSEC);
}
}
static int real_pressure_child_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
assert_se(s);
assert_se(si);
log_notice("child dead");
assert_se(si->si_signo == SIGCHLD);
assert_se(si->si_status == SIGKILL);
assert_se(si->si_code == CLD_KILLED);
assert_se(sd_event_exit(sd_event_source_get_event(s), 31) >= 0);
return 0;
}
TEST(real_pressure) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_(sd_event_source_unrefp) sd_event_source *es = NULL, *cs = NULL;
_cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
_cleanup_close_pair_ int pipe_fd[2] = PIPE_EBADF;
_cleanup_(sd_event_unrefp) sd_event *e = NULL;
_cleanup_free_ char *scope = NULL;
const char *object;
int r;
pid_t pid;
r = sd_bus_open_system(&bus);
if (r < 0) {
log_notice_errno(r, "Can't connect to system bus, skipping test: %m");
return;
}
assert_se(bus_wait_for_jobs_new(bus, &w) >= 0);
assert_se(bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit") >= 0);
assert_se(asprintf(&scope, "test-%" PRIu64 ".scope", random_u64()) >= 0);
assert_se(sd_bus_message_append(m, "ss", scope, "fail") >= 0);
assert_se(sd_bus_message_open_container(m, 'a', "(sv)") >= 0);
assert_se(sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, 0) >= 0);
assert_se(sd_bus_message_append(m, "(sv)", "MemoryAccounting", "b", true) >= 0);
assert_se(sd_bus_message_close_container(m) >= 0);
assert_se(sd_bus_message_append(m, "a(sa(sv))", 0) >= 0);
r = sd_bus_call(bus, m, 0, &error, &reply);
if (r < 0) {
log_notice_errno(r, "Can't issue transient unit call, skipping test: %m");
return;
}
assert_se(sd_bus_message_read(reply, "o", &object) >= 0);
assert_se(bus_wait_for_jobs_one(w, object, /* quiet= */ false, /* extra_args= */ NULL) >= 0);
assert_se(sd_event_default(&e) >= 0);
assert_se(pipe2(pipe_fd, O_CLOEXEC) >= 0);
r = safe_fork("(eat-memory)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
assert_se(r >= 0);
if (r == 0) {
real_pressure_eat_memory(pipe_fd[0]);
_exit(EXIT_SUCCESS);
}
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
assert_se(sd_event_add_child(e, &cs, pid, WEXITED, real_pressure_child_callback, NULL) >= 0);
assert_se(sd_event_source_set_child_process_own(cs, true) >= 0);
assert_se(unsetenv("MEMORY_PRESSURE_WATCH") >= 0);
assert_se(unsetenv("MEMORY_PRESSURE_WRITE") >= 0);
struct real_pressure_context context = {
.pid = cs,
};
r = sd_event_add_memory_pressure(e, &es, real_pressure_callback, &context);
if (r < 0) {
log_notice_errno(r, "Can't allocate memory pressure fd, skipping test: %m");
return;
}
assert_se(sd_event_source_set_description(es, "real pressure event source") >= 0);
assert_se(sd_event_source_set_memory_pressure_type(es, "some") == 0);
assert_se(sd_event_source_set_memory_pressure_type(es, "full") > 0);
assert_se(sd_event_source_set_memory_pressure_type(es, "full") == 0);
assert_se(sd_event_source_set_memory_pressure_type(es, "some") > 0);
assert_se(sd_event_source_set_memory_pressure_type(es, "some") == 0);
assert_se(sd_event_source_set_memory_pressure_period(es, 70 * USEC_PER_MSEC, USEC_PER_SEC) > 0);
assert_se(sd_event_source_set_memory_pressure_period(es, 70 * USEC_PER_MSEC, USEC_PER_SEC) == 0);
assert_se(sd_event_source_set_enabled(es, SD_EVENT_ONESHOT) >= 0);
_cleanup_free_ char *uo;
assert_se(uo = unit_dbus_path_from_name(scope));
uint64_t mcurrent = UINT64_MAX;
assert_se(sd_bus_get_property_trivial(bus, "org.freedesktop.systemd1", uo, "org.freedesktop.systemd1.Scope", "MemoryCurrent", &error, 't', &mcurrent) >= 0);
printf("current: %" PRIu64 "\n", mcurrent);
if (mcurrent == UINT64_MAX) {
log_notice_errno(r, "Memory accounting not available, skipping test: %m");
return;
}
m = sd_bus_message_unref(m);
assert_se(bus_message_new_method_call(bus, &m, bus_systemd_mgr, "SetUnitProperties") >= 0);
assert_se(sd_bus_message_append(m, "sb", scope, true) >= 0);
assert_se(sd_bus_message_open_container(m, 'a', "(sv)") >= 0);
assert_se(sd_bus_message_append(m, "(sv)", "MemoryHigh", "t", mcurrent + (15 * 1024 * 1024)) >= 0);
assert_se(sd_bus_message_append(m, "(sv)", "MemoryMax", "t", mcurrent + (50 * 1024 * 1024)) >= 0);
assert_se(sd_bus_message_close_container(m) >= 0);
assert_se(sd_bus_call(bus, m, 0, NULL, NULL) >= 0);
/* Generate some memory allocations via mempool */
#define NN (1024)
Hashmap **h = new(Hashmap*, NN);
for (int i = 0; i < NN; i++)
h[i] = hashmap_new(NULL);
for (int i = 0; i < NN; i++)
hashmap_free(h[i]);
free(h);
/* Now start eating memory */
assert_se(write(pipe_fd[1], &(const char) { 'x' }, 1) == 1);
assert_se(sd_event_loop(e) >= 0);
int ex = 0;
assert_se(sd_event_get_exit_code(e, &ex) >= 0);
assert_se(ex == 31);
}
static int outro(void) {
hashmap_trim_pools();
return 0;
}
DEFINE_TEST_MAIN_FULL(LOG_DEBUG, NULL, outro);