1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-31 14:50:15 +03:00

Merge pull request #13496 from wat-ze-hex/custom-bpf-progs-parameterized-3

bpf: extend bpf cgroup program support
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2021-04-12 14:31:42 +02:00 committed by GitHub
commit 839eb4a458
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 1014 additions and 10 deletions

View File

@ -2474,6 +2474,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -3008,6 +3010,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<!--property ManagedOOMPreference is not documented!-->
<!--property BPFProgram is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -3566,6 +3570,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
<variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -4251,6 +4257,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -4811,6 +4819,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<!--property ManagedOOMPreference is not documented!-->
<!--property BPFProgram is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -5365,6 +5375,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
<variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -5952,6 +5964,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -6440,6 +6454,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<!--property ManagedOOMPreference is not documented!-->
<!--property BPFProgram is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -6912,6 +6928,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
<variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -7620,6 +7638,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -8094,6 +8114,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<!--property ManagedOOMPreference is not documented!-->
<!--property BPFProgram is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -8552,6 +8574,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
<variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -9113,6 +9137,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
};
interface org.freedesktop.DBus.Peer { ... };
interface org.freedesktop.DBus.Introspectable { ... };
@ -9251,6 +9277,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
<!--property ManagedOOMPreference is not documented!-->
<!--property BPFProgram is not documented!-->
<!--Autogenerated cross-references for systemd.directives, do not edit-->
<variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
@ -9393,6 +9421,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
<variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
<variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
<!--End of Autogenerated section-->
<refsect2>
@ -9554,6 +9584,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KillMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -9708,6 +9740,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
<!--property ManagedOOMPreference is not documented!-->
<!--property BPFProgram is not documented!-->
<!--property KillMode is not documented!-->
<!--property KillSignal is not documented!-->
@ -9876,6 +9910,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
<variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
<variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>

View File

@ -696,6 +696,12 @@
<para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>
<para>If the path <replaceable>BPF_FS_PROGRAM_PATH</replaceable> in <varname>IPIngressFilterPath=</varname> assignment
is already being handled by <varname>BPFProgram=</varname> ingress hook, e.g.
<varname>BPFProgram=</varname><constant>ingress</constant>:<replaceable>BPF_FS_PROGRAM_PATH</replaceable>,
the assignment will be still considered valid and the program will be attached to a cgroup. Same for
<varname>IPEgressFilterPath=</varname> path and <constant>egress</constant> hook.</para>
<para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
all sockets associated with it directly, but not to any sockets created by the ultimately activated services
for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
@ -710,6 +716,52 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>BPFProgram=<replaceable>type</replaceable><constant>:</constant><replaceable>program-path</replaceable></varname></term>
<listitem>
<para>Add a custom cgroup BPF program.</para>
<para><varname>BPFProgram=</varname> allows attaching BPF hooks to the cgroup of a systemd unit.
(This generalizes the functionality exposed via <varname>IPEgressFilterPath=</varname> for egress and
<varname>IPIngressFilterPath=</varname> for ingress.)
Cgroup-bpf hooks in the form of BPF programs loaded to the BPF filesystem are attached with cgroup-bpf attach
flags determined by the unit. For details about attachment types and flags see <ulink
url="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/linux/bpf.h"/>.
For general BPF documentation please refer to <ulink url="https://www.kernel.org/doc/html/latest/bpf/index.html"/>.</para>
<para>The specification of BPF program consists of a <replaceable>type</replaceable> followed by a
<replaceable>program-path</replaceable> with <literal>:</literal> as the separator:
<replaceable>type</replaceable><constant>:</constant><replaceable>program-path</replaceable>.</para>
<para><replaceable>type</replaceable> is the string name of BPF attach type also used in
<command>bpftool</command>. <replaceable>type</replaceable> can be one of <constant>egress</constant>,
<constant>ingress</constant>, <constant>sock_create</constant>, <constant>sock_ops</constant>,
<constant>device</constant>, <constant>bind4</constant>, <constant>bind6</constant>,
<constant>connect4</constant>, <constant>connect6</constant>, <constant>post_bind4</constant>,
<constant>post_bind6</constant>, <constant>sendmsg4</constant>, <constant>sendmsg6</constant>,
<constant>sysctl</constant>, <constant>recvmsg4</constant>, <constant>recvmsg6</constant>,
<constant>getsockopt</constant>, <constant>setsockopt</constant>.</para>
<para>Setting <varname>BPFProgram=</varname> to an empty value makes previous assignments ineffective.</para>
<para>Multiple assignments of the same <replaceable>type</replaceable>:<replaceable>program-path</replaceable>
value have the same effect as a single assignment: the program with the path <replaceable>program-path</replaceable>
will be attached to cgroup hook <replaceable>type</replaceable> just once.</para>
<para>If BPF <constant>egress</constant> pinned to <replaceable>program-path</replaceable> path is already being
handled by <varname>IPEgressFilterPath=</varname>, <varname>BPFProgram=</varname>
assignment will be considered valid and <varname>BPFProgram=</varname> will be attached to a cgroup.
Similarly for <constant>ingress</constant> hook and <varname>IPIngressFilterPath=</varname> assignment.</para>
<para>BPF programs passed with <varname>BPFProgram=</varname> are attached to the cgroup of a unit with BPF
attach flag <constant>multi</constant>, that allows further attachments of the same
<replaceable>type</replaceable> within cgroup hierarchy topped by the unit cgroup.</para>
<para>Examples:<programlisting>
BPFProgram=egress:/sys/fs/bpf/egress-hook
BPFProgram=bind6:/sys/fs/bpf/sock-addr-hook
</programlisting></para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>DeviceAllow=</varname></term>

View File

@ -2163,6 +2163,7 @@ static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_PIDS] = "pids",
[CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
[CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
[CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
};
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);

View File

@ -30,6 +30,7 @@ typedef enum CGroupController {
/* BPF-based pseudo-controllers, v2 only */
CGROUP_CONTROLLER_BPF_FIREWALL,
CGROUP_CONTROLLER_BPF_DEVICES,
CGROUP_CONTROLLER_BPF_FOREIGN,
_CGROUP_CONTROLLER_MAX,
_CGROUP_CONTROLLER_INVALID = -EINVAL,
@ -49,6 +50,7 @@ typedef enum CGroupMask {
CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
CGROUP_MASK_BPF_FOREIGN = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FOREIGN),
/* All real cgroup v1 controllers */
CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
@ -57,7 +59,7 @@ typedef enum CGroupMask {
CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_CPUSET|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
/* All cgroup v2 BPF pseudo-controllers */
CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES,
CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES|CGROUP_MASK_BPF_FOREIGN,
_CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
} CGroupMask;

View File

@ -698,8 +698,7 @@ int bpf_firewall_install(Unit *u) {
if (r < 0)
return log_unit_error_errno(u, r, "Failed to determine cgroup path: %m");
flags = (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
(u->type == UNIT_SLICE || unit_cgroup_delegate(u))) ? BPF_F_ALLOW_MULTI : 0;
flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0;
/* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
* minimize the time window when we don't account for IP traffic. */
@ -707,8 +706,7 @@ int bpf_firewall_install(Unit *u) {
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
if (u->ip_bpf_egress) {
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
if (r < 0)
return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
@ -717,8 +715,7 @@ int bpf_firewall_install(Unit *u) {
}
if (u->ip_bpf_ingress) {
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
if (r < 0)
return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);

151
src/core/bpf-foreign.c Normal file
View File

@ -0,0 +1,151 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "bpf-foreign.h"
#include "bpf-program.h"
#include "cgroup.h"
#include "memory-util.h"
#include "mountpoint-util.h"
#include "set.h"
typedef struct BPFForeignKey BPFForeignKey;
struct BPFForeignKey {
uint32_t prog_id;
uint32_t attach_type;
};
static int bpf_foreign_key_new(uint32_t prog_id,
enum bpf_attach_type attach_type,
BPFForeignKey **ret) {
_cleanup_free_ BPFForeignKey *p = NULL;
assert(ret);
p = new(BPFForeignKey, 1);
if (!p)
return log_oom();
*p = (BPFForeignKey) {
.prog_id = prog_id,
.attach_type = attach_type,
};
*ret = TAKE_PTR(p);
return 0;
}
static int bpf_foreign_key_compare_func(const BPFForeignKey *a, const BPFForeignKey *b) {
int r = CMP(a->prog_id, b->prog_id);
if (r != 0)
return r;
return CMP(a->attach_type, b->attach_type);
}
static void bpf_foreign_key_hash_func(const BPFForeignKey *p, struct siphash *h) {
siphash24_compress(&p->prog_id, sizeof(p->prog_id), h);
siphash24_compress(&p->attach_type, sizeof(p->attach_type), h);
}
DEFINE_PRIVATE_HASH_OPS_FULL(bpf_foreign_by_key_hash_ops,
BPFForeignKey, bpf_foreign_key_hash_func, bpf_foreign_key_compare_func, free,
BPFProgram, bpf_program_unref);
static int attach_programs(Unit *u, const char *path, Hashmap* foreign_by_key, uint32_t attach_flags) {
const BPFForeignKey *key;
BPFProgram *prog;
int r;
assert(u);
HASHMAP_FOREACH_KEY(prog, key, foreign_by_key) {
r = bpf_program_cgroup_attach(prog, key->attach_type, path, attach_flags);
if (r < 0)
return log_unit_error_errno(u, r, "Attaching foreign BPF program to cgroup %s failed: %m", path);
}
return 0;
}
/*
* Prepare foreign BPF program for installation:
* - Load the program from BPF filesystem to the kernel;
* - Store program FD identified by program ID and attach type in the unit.
*/
static int bpf_foreign_prepare(
Unit *u,
enum bpf_attach_type attach_type,
const char *bpffs_path) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
_cleanup_free_ BPFForeignKey *key = NULL;
uint32_t prog_id;
int r;
assert(u);
assert(bpffs_path);
r = bpf_program_new_from_bpffs_path(bpffs_path, &prog);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to create foreign BPFProgram: %m");
r = bpf_program_get_id_by_fd(prog->kernel_fd, &prog_id);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to get BPF program id by fd: %m");
r = bpf_foreign_key_new(prog_id, attach_type, &key);
if (r < 0)
return log_unit_error_errno(u, r,
"Failed to create foreign BPF program key from path '%s': %m", bpffs_path);
r = hashmap_ensure_put(&u->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
if (r == -EEXIST) {
log_unit_warning_errno(u, r, "Foreign BPF program already exists, ignoring: %m");
return 0;
}
if (r < 0)
return log_unit_error_errno(u, r, "Failed to put foreign BPFProgram into map: %m");
TAKE_PTR(key);
TAKE_PTR(prog);
return 0;
}
int bpf_foreign_supported(void) {
int r;
r = cg_all_unified();
if (r <= 0)
return r;
return path_is_mount_point("/sys/fs/bpf", NULL, 0);
}
int bpf_foreign_install(Unit *u) {
_cleanup_free_ char *cgroup_path = NULL;
CGroupBPFForeignProgram *p;
CGroupContext *cc;
int r;
assert(u);
cc = unit_get_cgroup_context(u);
if (!cc)
return 0;
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to get cgroup path: %m");
LIST_FOREACH(programs, p, cc->bpf_foreign_programs) {
r = bpf_foreign_prepare(u, p->attach_type, p->bpffs_path);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to prepare foreign BPF hashmap: %m");
}
r = attach_programs(u, cgroup_path, u->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to install foreign BPF programs: %m");
return 0;
}

12
src/core/bpf-foreign.h Normal file
View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
#include "unit.h"
int bpf_foreign_supported(void);
/*
* Attach cgroup-bpf programs foreign to systemd, i.e. loaded to the kernel by an entity
* external to systemd.
*/
int bpf_foreign_install(Unit *u);

View File

@ -8,6 +8,7 @@
#include "blockdev-util.h"
#include "bpf-devices.h"
#include "bpf-firewall.h"
#include "bpf-foreign.h"
#include "btrfs-util.h"
#include "bus-error.h"
#include "cgroup-setup.h"
@ -190,6 +191,15 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI
free(b);
}
void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p) {
assert(c);
assert(p);
LIST_REMOVE(programs, c->bpf_foreign_programs, p);
free(p->bpffs_path);
free(p);
}
void cgroup_context_done(CGroupContext *c) {
assert(c);
@ -217,6 +227,9 @@ void cgroup_context_done(CGroupContext *c) {
c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
c->ip_filters_egress = strv_free(c->ip_filters_egress);
while (c->bpf_foreign_programs)
cgroup_context_remove_bpf_foreign_program(c, c->bpf_foreign_programs);
cpu_set_reset(&c->cpuset_cpus);
cpu_set_reset(&c->cpuset_mems);
}
@ -360,6 +373,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
CGroupIODeviceLatency *l;
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupBPFForeignProgram *p;
CGroupDeviceAllow *a;
CGroupContext *c;
IPAddressAccessItem *iaai;
@ -544,6 +558,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
STRV_FOREACH(path, c->ip_filters_egress)
fprintf(f, "%sIPEgressFilterPath: %s\n", prefix, *path);
LIST_FOREACH(programs, p, c->bpf_foreign_programs)
fprintf(f, "%sBPFProgram: %s:%s",
prefix, bpf_cgroup_attach_type_to_string(p->attach_type), p->bpffs_path);
}
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
@ -575,6 +593,34 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode)
return 0;
}
int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *bpffs_path) {
CGroupBPFForeignProgram *p;
_cleanup_free_ char *d = NULL;
assert(c);
assert(bpffs_path);
if (!path_is_normalized(bpffs_path) || !path_is_absolute(bpffs_path))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not normalized: %m");
d = strdup(bpffs_path);
if (!d)
return log_oom();
p = new(CGroupBPFForeignProgram, 1);
if (!p)
return log_oom();
*p = (CGroupBPFForeignProgram) {
.attach_type = attach_type,
.bpffs_path = TAKE_PTR(d),
};
LIST_PREPEND(programs, c->bpf_foreign_programs, TAKE_PTR(p));
return 0;
}
#define UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(entry) \
uint64_t unit_get_ancestor_##entry(Unit *u) { \
CGroupContext *c; \
@ -1115,6 +1161,12 @@ static void set_io_weight(Unit *u, const char *controller, uint64_t weight) {
(void) set_attribute_and_warn(u, controller, p, buf);
}
static void cgroup_apply_bpf_foreign_program(Unit *u) {
assert(u);
(void) bpf_foreign_install(u);
}
static void cgroup_context_apply(
Unit *u,
CGroupMask apply_mask,
@ -1428,6 +1480,9 @@ static void cgroup_context_apply(
if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
cgroup_apply_firewall(u);
if (apply_mask & CGROUP_MASK_BPF_FOREIGN)
cgroup_apply_bpf_foreign_program(u);
}
static bool unit_get_needs_bpf_firewall(Unit *u) {
@ -1460,6 +1515,17 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {
return false;
}
static bool unit_get_needs_bpf_foreign_program(Unit *u) {
CGroupContext *c;
assert(u);
c = unit_get_cgroup_context(u);
if (!c)
return false;
return !LIST_IS_EMPTY(c->bpf_foreign_programs);
}
static CGroupMask unit_get_cgroup_mask(Unit *u) {
CGroupMask mask = 0;
CGroupContext *c;
@ -1511,6 +1577,9 @@ static CGroupMask unit_get_bpf_mask(Unit *u) {
if (unit_get_needs_bpf_firewall(u))
mask |= CGROUP_MASK_BPF_FIREWALL;
if (unit_get_needs_bpf_foreign_program(u))
mask |= CGROUP_MASK_BPF_FOREIGN;
return mask;
}
@ -2989,6 +3058,11 @@ static int cg_bpf_mask_supported(CGroupMask *ret) {
if (r > 0)
mask |= CGROUP_MASK_BPF_DEVICES;
/* BPF pinned prog */
r = bpf_foreign_supported();
if (r > 0)
mask |= CGROUP_MASK_BPF_FOREIGN;
*ret = mask;
return 0;
}

View File

@ -31,6 +31,7 @@ typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
typedef enum CGroupDevicePolicy {
/* When devices listed, will allow those, plus built-in ones, if none are listed will allow
@ -94,6 +95,12 @@ struct CGroupBlockIODeviceBandwidth {
uint64_t wbps;
};
struct CGroupBPFForeignProgram {
LIST_FIELDS(CGroupBPFForeignProgram, programs);
uint32_t attach_type;
char *bpffs_path;
};
struct CGroupContext {
bool cpu_accounting;
bool io_accounting;
@ -142,6 +149,7 @@ struct CGroupContext {
char **ip_filters_ingress;
char **ip_filters_egress;
LIST_HEAD(CGroupBPFForeignProgram, bpf_foreign_programs);
/* For legacy hierarchies */
uint64_t cpu_shares;
@ -202,8 +210,10 @@ void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *
void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p);
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);

View File

@ -5,6 +5,7 @@
#include "af-list.h"
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bpf-foreign.h"
#include "bus-get-properties.h"
#include "cgroup-util.h"
#include "cgroup.h"
@ -347,6 +348,33 @@ static int property_get_ip_address_access(
return sd_bus_message_close_container(reply);
}
static int property_get_bpf_foreign_program(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
CGroupContext *c = userdata;
CGroupBPFForeignProgram *p;
int r;
r = sd_bus_message_open_container(reply, 'a', "(ss)");
if (r < 0)
return r;
LIST_FOREACH(programs, p, c->bpf_foreign_programs) {
const char *attach_type = bpf_cgroup_attach_type_to_string(p->attach_type);
r = sd_bus_message_append(reply, "(ss)", attach_type, p->bpffs_path);
if (r < 0)
return r;
}
return sd_bus_message_close_container(reply);
}
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@ -398,6 +426,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0),
SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimit", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit), 0),
SD_BUS_PROPERTY("ManagedOOMPreference", "s", property_get_managed_oom_preference, offsetof(CGroupContext, moom_preference), 0),
SD_BUS_PROPERTY("BPFProgram", "a(ss)", property_get_bpf_foreign_program, 0, 0),
SD_BUS_VTABLE_END
};
@ -570,6 +599,85 @@ static int bus_cgroup_set_transient_property(
}
}
return 1;
} else if (streq(name, "BPFProgram")) {
const char *a, *p;
size_t n = 0;
r = sd_bus_message_enter_container(message, 'a', "(ss)");
if (r < 0)
return r;
while ((r = sd_bus_message_read(message, "(ss)", &a, &p)) > 0) {
int attach_type = bpf_cgroup_attach_type_from_string(a);
if (attach_type < 0)
return sd_bus_error_setf(
error,
SD_BUS_ERROR_INVALID_ARGS,
"%s expects a valid BPF attach type, got '%s'.",
name, a);
if (!path_is_normalized(p) || !path_is_absolute(p))
return sd_bus_error_setf(
error,
SD_BUS_ERROR_INVALID_ARGS,
"%s= expects a normalized absolute path.",
name);
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
r = cgroup_add_bpf_foreign_program(c, attach_type, p);
if (r < 0)
return r;
}
n++;
}
if (r < 0)
return r;
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
_cleanup_free_ char *buf = NULL;
_cleanup_fclose_ FILE *f = NULL;
CGroupBPFForeignProgram *fp;
size_t size = 0;
if (n == 0)
while (c->bpf_foreign_programs)
cgroup_context_remove_bpf_foreign_program(c, c->bpf_foreign_programs);
f = open_memstream_unlocked(&buf, &size);
if (!f)
return -ENOMEM;
fputs(name, f);
fputs("=\n", f);
LIST_FOREACH(programs, fp, c->bpf_foreign_programs)
fprintf(f, "%s=%s:%s\n", name,
bpf_cgroup_attach_type_to_string(fp->attach_type),
fp->bpffs_path);
r = fflush_and_check(f);
if (r < 0)
return r;
unit_write_setting(u, flags, name, buf);
if (!LIST_IS_EMPTY(c->bpf_foreign_programs)) {
r = bpf_foreign_supported();
if (r < 0)
return r;
if (r == 0)
log_full(LOG_DEBUG,
"Transient unit %s configures a BPF program pinned to BPF "
"filesystem, but the local system does not support that.\n"
"Starting this unit will fail!", u->id);
}
}
return 1;
}

View File

@ -234,7 +234,8 @@ $1.ManagedOOMSwap, config_parse_managed_oom_mode,
$1.ManagedOOMMemoryPressure, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_mem_pressure)
$1.ManagedOOMMemoryPressureLimit, config_parse_managed_oom_mem_pressure_limit, 0, offsetof($1, cgroup_context.moom_mem_pressure_limit)
$1.ManagedOOMPreference, config_parse_managed_oom_preference, 0, offsetof($1, cgroup_context.moom_preference)
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0
$1.BPFProgram, config_parse_bpf_foreign_program, 0, offsetof($1, cgroup_context)'
)m4_dnl
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)

View File

@ -19,6 +19,7 @@
#include "all-units.h"
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bpf-program.h"
#include "bus-error.h"
#include "bus-internal.h"
#include "bus-util.h"
@ -5581,6 +5582,64 @@ int config_parse_ip_filter_bpf_progs(
return 0;
}
int config_parse_bpf_foreign_program(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_free_ char *resolved = NULL, *word = NULL;
CGroupContext *c = data;
Unit *u = userdata;
int attach_type, r;
assert(filename);
assert(lvalue);
assert(rvalue);
if (isempty(rvalue)) {
while (c->bpf_foreign_programs)
cgroup_context_remove_bpf_foreign_program(c, c->bpf_foreign_programs);
return 0;
}
r = extract_first_word(&rvalue, &word, ":", 0);
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse foreign BPF program, ignoring: %s", rvalue);
return 0;
}
attach_type = bpf_cgroup_attach_type_from_string(word);
if (attach_type < 0) {
log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown BPF attach type=%s, ignoring: %s", word, rvalue);
return 0;
}
r = unit_full_printf(u, rvalue, &resolved);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
return 0;
}
r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
if (r < 0)
return 0;
r = cgroup_add_bpf_foreign_program(c, attach_type, resolved);
if (r < 0)
return log_error_errno(r, "Failed to add foreign BPF program to cgroup context: %m");
return 0;
}
static int merge_by_names(Unit **u, Set *names, const char *id) {
char *k;
int r;

View File

@ -140,6 +140,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_swap_priority);
CONFIG_PARSER_PROTOTYPE(config_parse_mount_images);
CONFIG_PARSER_PROTOTYPE(config_parse_socket_timestamping);
CONFIG_PARSER_PROTOTYPE(config_parse_extension_images);
CONFIG_PARSER_PROTOTYPE(config_parse_bpf_foreign_program);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);

View File

@ -11,6 +11,8 @@ libcore_sources = '''
bpf-devices.h
bpf-firewall.c
bpf-firewall.h
bpf-foreign.c
bpf-foreign.h
cgroup.c
cgroup.h
core-varlink.c

View File

@ -11,6 +11,7 @@
#include "all-units.h"
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bpf-foreign.h"
#include "bus-common-errors.h"
#include "bus-util.h"
#include "cgroup-setup.h"
@ -723,6 +724,8 @@ Unit* unit_free(Unit *u) {
set_free(u->ip_bpf_custom_ingress_installed);
set_free(u->ip_bpf_custom_egress_installed);
hashmap_free(u->bpf_foreign_by_key);
bpf_program_unref(u->bpf_device_control_installed);
condition_free_list(u->conditions);

View File

@ -305,6 +305,10 @@ typedef struct Unit {
Set *ip_bpf_custom_egress;
Set *ip_bpf_custom_egress_installed;
/* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
* attached to unit cgroup by provided program fd and attach type. */
Hashmap *bpf_foreign_by_key;
uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
/* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new

View File

@ -11,6 +11,50 @@
#include "memory-util.h"
#include "missing_syscall.h"
#include "path-util.h"
#include "string-table.h"
static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
[BPF_CGROUP_INET_INGRESS] = "ingress",
[BPF_CGROUP_INET_EGRESS] = "egress",
[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
[BPF_CGROUP_SOCK_OPS] = "sock_ops",
[BPF_CGROUP_DEVICE] = "device",
[BPF_CGROUP_INET4_BIND] = "bind4",
[BPF_CGROUP_INET6_BIND] = "bind6",
[BPF_CGROUP_INET4_CONNECT] = "connect4",
[BPF_CGROUP_INET6_CONNECT] = "connect6",
[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
[BPF_CGROUP_SYSCTL] = "sysctl",
[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
};
DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
/* struct bpf_prog_info info must be initialized since its value is both input and output
* for BPF_OBJ_GET_INFO_BY_FD syscall. */
static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
union bpf_attr attr;
/* Explicitly memset to zero since some compilers may produce non-zero-initialized padding when
* structured initialization is used.
* Refer to https://github.com/systemd/systemd/issues/18164
*/
zero(attr);
attr.info.bpf_fd = prog_fd;
attr.info.info_len = info_len;
attr.info.info = PTR_TO_UINT64(info);
if (bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)) < 0)
return -errno;
return 0;
}
int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
@ -28,6 +72,38 @@ int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
return 0;
}
int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
struct bpf_prog_info info = {};
int r;
assert(path);
assert(ret);
p = new(BPFProgram, 1);
if (!p)
return -ENOMEM;
*p = (BPFProgram) {
.prog_type = BPF_PROG_TYPE_UNSPEC,
.n_ref = 1,
.kernel_fd = -1,
};
r = bpf_program_load_from_bpf_fs(p, path);
if (r < 0)
return r;
r = bpf_program_get_info_by_fd(p->kernel_fd, &info, sizeof(info));
if (r < 0)
return r;
p->prog_type = info.type;
*ret = TAKE_PTR(p);
return 0;
}
static BPFProgram *bpf_program_free(BPFProgram *p) {
assert(p);
@ -254,3 +330,31 @@ int bpf_map_lookup_element(int fd, const void *key, void *value) {
return 0;
}
int bpf_program_pin(int prog_fd, const char *bpffs_path) {
union bpf_attr attr;
zero(attr);
attr.pathname = PTR_TO_UINT64((void *) bpffs_path);
attr.bpf_fd = prog_fd;
if (bpf(BPF_OBJ_PIN, &attr, sizeof(attr)) < 0)
return -errno;
return 0;
}
int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
struct bpf_prog_info info = {};
int r;
assert(ret_id);
r = bpf_program_get_info_by_fd(prog_fd, &info, sizeof(info));
if (r < 0)
return r;
*ret_id = info.id;
return 0;
};

View File

@ -26,8 +26,9 @@ struct BPFProgram {
};
int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
BPFProgram *bpf_program_unref(BPFProgram *p);
int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret);
BPFProgram *bpf_program_ref(BPFProgram *p);
BPFProgram *bpf_program_unref(BPFProgram *p);
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
@ -35,9 +36,14 @@ int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
int bpf_program_cgroup_detach(BPFProgram *p);
int bpf_program_pin(int prog_fd, const char *bpffs_path);
int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id);
int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
int bpf_map_update_element(int fd, const void *key, void *value);
int bpf_map_lookup_element(int fd, const void *key, void *value);
int bpf_cgroup_attach_type_from_string(const char *str) _pure_;
const char *bpf_cgroup_attach_type_to_string(int attach_type) _const_;
DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);

View File

@ -842,6 +842,26 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
if (streq(field, "BPFProgram")) {
if (isempty(eq))
r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0);
else {
_cleanup_free_ char *word = NULL;
r = extract_first_word(&eq, &word, ":", 0);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to parse %s: %m", field);
r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, word, eq);
}
if (r < 0)
return bus_log_create_error(r);
return 1;
}
return 0;
}

View File

@ -1694,6 +1694,23 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
return 1;
} else if (streq(name, "BPFProgram")) {
const char *a, *p;
r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
if (r < 0)
return bus_log_parse_error(r);
while ((r = sd_bus_message_read(m, "(ss)", &a, &p)) > 0)
bus_print_property_valuef(name, expected_value, value, "%s:%s", a, p);
if (r < 0)
return bus_log_parse_error(r);
r = sd_bus_message_exit_container(m);
if (r < 0)
return bus_log_parse_error(r);
return 1;
}
break;

View File

@ -324,6 +324,12 @@ tests += [
libblkid],
core_includes],
[['src/test/test-bpf-foreign-programs.c'],
[libcore,
libshared],
[],
core_includes],
[['src/test/test-watch-pid.c'],
[libcore,
libshared],

View File

@ -0,0 +1,332 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <fcntl.h>
#include <linux/bpf_insn.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "bpf-foreign.h"
#include "load-fragment.h"
#include "manager.h"
#include "process-util.h"
#include "rlimit-util.h"
#include "rm-rf.h"
#include "service.h"
#include "tests.h"
#include "unit.h"
#include "virt.h"
struct Test {
const char *option_name;
enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
const char *bpffs_path;
};
typedef struct Test Test;
#define BPFFS_PATH(prog_suffix) ("/sys/fs/bpf/test-bpf-foreing-" # prog_suffix)
static const Test single_prog[] = {
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_INGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
},
};
static const Test path_split_test[] = {
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_INGRESS,
.bpffs_path = BPFFS_PATH("path:split:test"),
},
};
static const Test same_prog_same_hook[] = {
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
.bpffs_path = BPFFS_PATH("trivial-sock"),
},
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
.bpffs_path = BPFFS_PATH("trivial-sock"),
}
};
static const Test multi_prog_same_hook[] = {
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
.bpffs_path = BPFFS_PATH("trivial-sock-0"),
},
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
.bpffs_path = BPFFS_PATH("trivial-sock-1"),
}
};
static const Test same_prog_multi_hook[] = {
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_INGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
},
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_EGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
}
};
static const Test same_prog_multi_option_0[] = {
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_INGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
},
{
.option_name = "IPIngressFilterPath",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_INGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
}
};
static const Test same_prog_multi_option_1[] = {
{
.option_name = "IPEgressFilterPath",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_EGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
},
{
.option_name = "BPFProgram",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.attach_type = BPF_CGROUP_INET_EGRESS,
.bpffs_path = BPFFS_PATH("trivial-skb"),
}
};
#undef BPFFS_PATH
static int bpf_foreign_test_to_string(enum bpf_attach_type attach_type, const char *bpffs_path, char **ret_str) {
const char *s = NULL;
assert_se(bpffs_path);
assert_se(ret_str);
assert_se(s = bpf_cgroup_attach_type_to_string(attach_type));
assert_se(*ret_str = strjoin(s, ":", bpffs_path));
return 0;
}
static char **unlink_paths_and_free(char **paths) {
char **i;
STRV_FOREACH(i, paths)
(void) unlink(*i);
return strv_free(paths);
}
DEFINE_TRIVIAL_CLEANUP_FUNC(char **, unlink_paths_and_free);
static int pin_programs(Unit *u, CGroupContext *cc, const Test *test_suite, size_t test_suite_size, char ***paths_ret) {
_cleanup_(unlink_paths_and_freep) char **bpffs_paths = NULL;
static const struct bpf_insn trivial[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN()
};
char log_buf[0xffff];
int r;
assert_se(paths_ret);
for (size_t i = 0; i < test_suite_size; i++) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
_cleanup_free_ char *str = NULL;
r = bpf_foreign_test_to_string(test_suite[i].attach_type, test_suite[i].bpffs_path, &str);
if (r < 0)
return log_error_errno(r, "Failed to convert program to string");
r = bpf_program_new(test_suite[i].prog_type, &prog);
if (r < 0)
return log_error_errno(r, "Failed to create program '%s'", str);
r = bpf_program_add_instructions(prog, trivial, ELEMENTSOF(trivial));
if (r < 0)
return log_error_errno(r, "Failed to add trivial instructions for '%s'", str);
r = bpf_program_load_kernel(prog, log_buf, ELEMENTSOF(log_buf));
if (r < 0)
return log_error_errno(r, "Failed to load BPF program '%s'", str);
if (strv_contains(bpffs_paths, test_suite[i].bpffs_path))
continue;
r = strv_extend(&bpffs_paths, test_suite[i].bpffs_path);
if (r < 0)
return log_error_errno(r, "Failed to put path into a vector: %m");
r = bpf_program_pin(prog->kernel_fd, test_suite[i].bpffs_path);
if (r < 0)
return log_error_errno(r, "Failed to pin BPF program '%s'", str);
}
*paths_ret = TAKE_PTR(bpffs_paths);
return 0;
}
static int test_bpf_cgroup_programs(Manager *m, const char *unit_name, const Test *test_suite, size_t test_suite_size) {
_cleanup_(unlink_paths_and_freep) char **bpffs_paths = NULL;
_cleanup_(unit_freep) Unit *u = NULL;
CGroupContext *cc = NULL;
int cld_code, r;
assert_se(u = unit_new(m, sizeof(Service)));
assert_se(unit_add_name(u, unit_name) == 0);
assert_se(cc = unit_get_cgroup_context(u));
r = pin_programs(u, cc, test_suite, test_suite_size, &bpffs_paths);
if (r < 0)
return log_error_errno(r, "Failed to pin programs: %m");
for (size_t i = 0; i < test_suite_size; i++) {
if (streq(test_suite[i].option_name, "BPFProgram")) {
_cleanup_free_ char *option = NULL;
r = bpf_foreign_test_to_string(test_suite[i].attach_type, test_suite[i].bpffs_path, &option);
if (r < 0)
return log_error_errno(r, "Failed to compose option string: %m");
r = config_parse_bpf_foreign_program(
u->id, "filename", 1, "Service", 1, test_suite[i].option_name, 0, option, cc, u);
if (r < 0)
return log_error_errno(r, "Failed to parse option string '%s': %m", option);
} else if (STR_IN_SET(test_suite[i].option_name, "IPIngressFilterPath", "IPEgressFilterPath")) {
const char *option = test_suite[i].bpffs_path;
void *paths = NULL;
if (streq(test_suite[i].option_name, "IPIngressFilterPath"))
paths = &cc->ip_filters_ingress;
else
paths = &cc->ip_filters_egress;
r = config_parse_ip_filter_bpf_progs(
u->id, "filename", 1, "Service", 1, test_suite[i].option_name, 0, option, paths, u);
if (r < 0)
return log_error_errno(r, "Failed to parse option string '%s': %m", option);
}
}
r = config_parse_exec(
u->id,
"filename",
1,
"Service",
1,
"ExecStart",
SERVICE_EXEC_START,
"-/bin/ping -c 5 127.0.0.1 -W 1",
SERVICE(u)->exec_command,
u);
if (r < 0)
return log_error_errno(r, "Failed to parse ExecStart");
SERVICE(u)->type = SERVICE_ONESHOT;
u->load_state = UNIT_LOADED;
r = unit_start(u);
if (r < 0)
return log_error_errno(r, "Unit start failed %m");
while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED)) {
r = sd_event_run(m->event, UINT64_MAX);
if (r < 0)
return log_error_errno(errno, "Event run failed %m");
}
cld_code = SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code;
if (cld_code != CLD_EXITED)
return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
"ExecStart didn't exited, code='%s'", sigchld_code_to_string(cld_code));
if (SERVICE(u)->state != SERVICE_DEAD)
return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Service is not dead");
return r;
}
int main(int argc, char *argv[]) {
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
_cleanup_(manager_freep) Manager *m = NULL;
_cleanup_free_ char *unit_dir = NULL;
struct rlimit rl;
int r;
test_setup_logging(LOG_DEBUG);
if (detect_container() > 0)
return log_tests_skipped("test-bpf fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");
if (getuid() != 0)
return log_tests_skipped("not running as root");
assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
(void) setrlimit_closest(RLIMIT_MEMLOCK, &rl);
if (!can_memlock())
return log_tests_skipped("Can't use mlock(), skipping.");
r = cg_all_unified();
if (r <= 0)
return log_tests_skipped_errno(r, "Unified hierarchy is required, skipping.");
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
assert_se(get_testdata_dir("units", &unit_dir) >= 0);
assert_se(set_unit_path(unit_dir) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir());
assert_se(manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m) >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"single_prog.service", single_prog, ELEMENTSOF(single_prog)) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"multi_prog_same_hook.service",
multi_prog_same_hook, ELEMENTSOF(multi_prog_same_hook)) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"same_prog_multi_hook.service",
same_prog_multi_hook, ELEMENTSOF(same_prog_multi_hook)) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"same_prog_multi_option_0.service",
same_prog_multi_option_0, ELEMENTSOF(same_prog_multi_option_0)) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"same_prog_multi_option_1.service",
same_prog_multi_option_1, ELEMENTSOF(same_prog_multi_option_1)) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"same_prog_same_hook.service",
same_prog_same_hook,
ELEMENTSOF(same_prog_same_hook)) >= 0);
assert_se(test_bpf_cgroup_programs(m,
"path_split_test.service",
path_split_test,
ELEMENTSOF(path_split_test)) >= 0);
return 0;
}

View File

@ -140,7 +140,7 @@ static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
static void test_cg_mask_to_string(void) {
test_cg_mask_to_string_one(0, NULL);
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct cpuset io blkio memory devices pids bpf-firewall bpf-devices");
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct cpuset io blkio memory devices pids bpf-firewall bpf-devices bpf-foreign");
test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
test_cg_mask_to_string_one(CGROUP_MASK_CPUSET, "cpuset");

View File

@ -4,6 +4,7 @@ AllowedCPUs=
AllowedMemoryNodes=
AmbientCapabilities=
AppArmorProfile=
BPFProgram=
BindPaths=
BindReadOnlyPaths=
BlockIOAccounting=

View File

@ -2,6 +2,7 @@ scope
[Scope]
AllowedCPUs=
AllowedMemoryNodes=
BPFProgram=
BlockIOAccounting=
BlockIODeviceWeight=
BlockIOReadBandwidth=

View File

@ -28,6 +28,7 @@ AssertPathIsSymbolicLink=
AssertSecurity=
AssertUser=
AssertVirtualization=
BPFProgram=
Before=
BindTo=
BindsTo=

View File

@ -2,6 +2,7 @@ slice
[Slice]
AllowedCPUs=
AllowedMemoryNodes=
BPFProgram=
BlockIOAccounting=
BlockIODeviceWeight=
BlockIOReadBandwidth=

View File

@ -5,6 +5,7 @@ AllowedCPUs=
AllowedMemoryNodes=
AmbientCapabilities=
AppArmorProfile=
BPFProgram=
Backlog=
BindIPv6Only=
BindPaths=

View File

@ -4,6 +4,7 @@ AllowedCPUs=
AllowedMemoryNodes=
AmbientCapabilities=
AppArmorProfile=
BPFProgram=
BindPaths=
BindReadOnlyPaths=
BlockIOAccounting=