mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-10-28 11:55:23 +03:00
core: refactor bpf firewall support into a pseudo-controller
The idea is to introduce a concept of bpf-based pseudo-controllers to make adding new bpf-based features easier.
This commit is contained in:
parent
b03d6c5f48
commit
17f149556a
@ -2767,6 +2767,7 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
|
||||
[CGROUP_CONTROLLER_MEMORY] = "memory",
|
||||
[CGROUP_CONTROLLER_DEVICES] = "devices",
|
||||
[CGROUP_CONTROLLER_PIDS] = "pids",
|
||||
[CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
/* An enum of well known cgroup controllers */
|
||||
typedef enum CGroupController {
|
||||
/* Original cgroup controllers */
|
||||
CGROUP_CONTROLLER_CPU,
|
||||
CGROUP_CONTROLLER_CPUACCT, /* v1 only */
|
||||
CGROUP_CONTROLLER_IO, /* v2 only */
|
||||
@ -26,6 +27,10 @@ typedef enum CGroupController {
|
||||
CGROUP_CONTROLLER_MEMORY,
|
||||
CGROUP_CONTROLLER_DEVICES, /* v1 only */
|
||||
CGROUP_CONTROLLER_PIDS,
|
||||
|
||||
/* BPF-based pseudo-controllers, v2 only */
|
||||
CGROUP_CONTROLLER_BPF_FIREWALL,
|
||||
|
||||
_CGROUP_CONTROLLER_MAX,
|
||||
_CGROUP_CONTROLLER_INVALID = -1,
|
||||
} CGroupController;
|
||||
@ -41,6 +46,7 @@ typedef enum CGroupMask {
|
||||
CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
|
||||
CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
|
||||
CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
|
||||
CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
|
||||
_CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
|
||||
} CGroupMask;
|
||||
|
||||
|
@ -770,7 +770,6 @@ static void cgroup_apply_firewall(Unit *u) {
|
||||
static void cgroup_context_apply(
|
||||
Unit *u,
|
||||
CGroupMask apply_mask,
|
||||
bool apply_bpf,
|
||||
ManagerState state) {
|
||||
|
||||
const char *path;
|
||||
@ -781,7 +780,7 @@ static void cgroup_context_apply(
|
||||
assert(u);
|
||||
|
||||
/* Nothing to do? Exit early! */
|
||||
if (apply_mask == 0 && !apply_bpf)
|
||||
if (apply_mask == 0)
|
||||
return;
|
||||
|
||||
/* Some cgroup attributes are not supported on the root cgroup, hence silently ignore */
|
||||
@ -1127,7 +1126,7 @@ static void cgroup_context_apply(
|
||||
}
|
||||
}
|
||||
|
||||
if (apply_bpf)
|
||||
if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
|
||||
cgroup_apply_firewall(u);
|
||||
}
|
||||
|
||||
@ -1161,6 +1160,15 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
|
||||
return mask;
|
||||
}
|
||||
|
||||
CGroupMask unit_get_bpf_mask(Unit *u) {
|
||||
CGroupMask mask = 0;
|
||||
|
||||
if (unit_get_needs_bpf_firewall(u))
|
||||
mask |= CGROUP_MASK_BPF_FIREWALL;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
CGroupMask unit_get_own_mask(Unit *u) {
|
||||
CGroupContext *c;
|
||||
|
||||
@ -1170,7 +1178,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
|
||||
if (!c)
|
||||
return 0;
|
||||
|
||||
return cgroup_context_get_mask(c) | unit_get_delegate_mask(u);
|
||||
return cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u);
|
||||
}
|
||||
|
||||
CGroupMask unit_get_delegate_mask(Unit *u) {
|
||||
@ -1278,7 +1286,7 @@ CGroupMask unit_get_enable_mask(Unit *u) {
|
||||
return mask;
|
||||
}
|
||||
|
||||
bool unit_get_needs_bpf(Unit *u) {
|
||||
bool unit_get_needs_bpf_firewall(Unit *u) {
|
||||
CGroupContext *c;
|
||||
Unit *p;
|
||||
assert(u);
|
||||
@ -1508,8 +1516,7 @@ int unit_pick_cgroup_path(Unit *u) {
|
||||
static int unit_create_cgroup(
|
||||
Unit *u,
|
||||
CGroupMask target_mask,
|
||||
CGroupMask enable_mask,
|
||||
bool needs_bpf) {
|
||||
CGroupMask enable_mask) {
|
||||
|
||||
CGroupContext *c;
|
||||
int r;
|
||||
@ -1549,7 +1556,6 @@ static int unit_create_cgroup(
|
||||
u->cgroup_realized = true;
|
||||
u->cgroup_realized_mask = target_mask;
|
||||
u->cgroup_enabled_mask = enable_mask;
|
||||
u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
|
||||
|
||||
@ -1725,16 +1731,14 @@ static void cgroup_xattr_apply(Unit *u) {
|
||||
static bool unit_has_mask_realized(
|
||||
Unit *u,
|
||||
CGroupMask target_mask,
|
||||
CGroupMask enable_mask,
|
||||
bool needs_bpf) {
|
||||
CGroupMask enable_mask) {
|
||||
|
||||
assert(u);
|
||||
|
||||
return u->cgroup_realized &&
|
||||
u->cgroup_realized_mask == target_mask &&
|
||||
u->cgroup_enabled_mask == enable_mask &&
|
||||
((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
|
||||
(!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
|
||||
u->cgroup_invalidated_mask == 0;
|
||||
}
|
||||
|
||||
static void unit_add_to_cgroup_realize_queue(Unit *u) {
|
||||
@ -1765,7 +1769,6 @@ static void unit_remove_from_cgroup_realize_queue(Unit *u) {
|
||||
* Returns 0 on success and < 0 on failure. */
|
||||
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
||||
CGroupMask target_mask, enable_mask;
|
||||
bool needs_bpf, apply_bpf;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
@ -1774,16 +1777,10 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
||||
|
||||
target_mask = unit_get_target_mask(u);
|
||||
enable_mask = unit_get_enable_mask(u);
|
||||
needs_bpf = unit_get_needs_bpf(u);
|
||||
|
||||
if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
|
||||
if (unit_has_mask_realized(u, target_mask, enable_mask))
|
||||
return 0;
|
||||
|
||||
/* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
|
||||
* the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
|
||||
* this will trickle down properly to cgroupfs. */
|
||||
apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
/* First, realize parents */
|
||||
if (UNIT_ISSET(u->slice)) {
|
||||
r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
|
||||
@ -1792,12 +1789,12 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
||||
}
|
||||
|
||||
/* And then do the real work */
|
||||
r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
|
||||
r = unit_create_cgroup(u, target_mask, enable_mask);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Finally, apply the necessary attributes. */
|
||||
cgroup_context_apply(u, target_mask, apply_bpf, state);
|
||||
cgroup_context_apply(u, target_mask, state);
|
||||
cgroup_xattr_apply(u);
|
||||
|
||||
return 0;
|
||||
@ -1863,8 +1860,7 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) {
|
||||
* any changes. */
|
||||
if (unit_has_mask_realized(m,
|
||||
unit_get_target_mask(m),
|
||||
unit_get_enable_mask(m),
|
||||
unit_get_needs_bpf(m)))
|
||||
unit_get_enable_mask(m)))
|
||||
continue;
|
||||
|
||||
unit_add_to_cgroup_realize_queue(m);
|
||||
@ -2207,11 +2203,25 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
|
||||
}
|
||||
}
|
||||
|
||||
static int cg_bpf_mask_supported(CGroupMask *ret) {
|
||||
CGroupMask mask = 0;
|
||||
int r;
|
||||
|
||||
/* BPF-based firewall */
|
||||
r = bpf_firewall_supported();
|
||||
if (r > 0)
|
||||
mask |= CGROUP_MASK_BPF_FIREWALL;
|
||||
|
||||
*ret = mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int manager_setup_cgroup(Manager *m) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
const char *scope_path;
|
||||
CGroupController c;
|
||||
int r, all_unified;
|
||||
CGroupMask mask;
|
||||
char *e;
|
||||
|
||||
assert(m);
|
||||
@ -2341,10 +2351,18 @@ int manager_setup_cgroup(Manager *m) {
|
||||
if (!all_unified && m->test_run_flags == 0)
|
||||
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
|
||||
|
||||
/* 8. Figure out which controllers are supported, and log about it */
|
||||
/* 8. Figure out which controllers are supported */
|
||||
r = cg_mask_supported(&m->cgroup_supported);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine supported controllers: %m");
|
||||
|
||||
/* 9. Figure out which bpf-based pseudo-controllers are supported */
|
||||
r = cg_bpf_mask_supported(&mask);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m");
|
||||
m->cgroup_supported |= mask;
|
||||
|
||||
/* 10. Log which controllers are supported */
|
||||
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
|
||||
log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
|
||||
|
||||
@ -2718,10 +2736,10 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
|
||||
if (!UNIT_HAS_CGROUP_CONTEXT(u))
|
||||
return;
|
||||
|
||||
if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) /* NOP? */
|
||||
if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
|
||||
return;
|
||||
|
||||
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
|
||||
u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
|
||||
unit_add_to_cgroup_realize_queue(u);
|
||||
|
||||
/* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
|
||||
|
@ -157,7 +157,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
|
||||
CGroupMask unit_get_target_mask(Unit *u);
|
||||
CGroupMask unit_get_enable_mask(Unit *u);
|
||||
|
||||
bool unit_get_needs_bpf(Unit *u);
|
||||
bool unit_get_needs_bpf_firewall(Unit *u);
|
||||
CGroupMask unit_get_bpf_mask(Unit *u);
|
||||
|
||||
void unit_update_cgroup_members_masks(Unit *u);
|
||||
|
||||
|
@ -93,7 +93,7 @@ Unit *unit_new(Manager *m, size_t size) {
|
||||
u->ref_uid = UID_INVALID;
|
||||
u->ref_gid = GID_INVALID;
|
||||
u->cpu_usage_last = NSEC_INFINITY;
|
||||
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
|
||||
u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
|
||||
|
||||
u->ip_accounting_ingress_map_fd = -1;
|
||||
u->ip_accounting_egress_map_fd = -1;
|
||||
@ -3253,7 +3253,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
|
||||
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
|
||||
(void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
|
||||
(void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
|
||||
unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
|
||||
(void) unit_serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
|
||||
|
||||
if (uid_is_valid(u->ref_uid))
|
||||
unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
|
||||
@ -3568,18 +3568,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
|
||||
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
|
||||
continue;
|
||||
|
||||
} else if (streq(l, "cgroup-bpf-realized")) {
|
||||
int i;
|
||||
} else if (streq(l, "cgroup-invalidated-mask")) {
|
||||
|
||||
r = safe_atoi(v, &i);
|
||||
r = cg_mask_from_string(v, &u->cgroup_invalidated_mask);
|
||||
if (r < 0)
|
||||
log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
|
||||
else
|
||||
u->cgroup_bpf_state =
|
||||
i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
|
||||
i > 0 ? UNIT_CGROUP_BPF_ON :
|
||||
UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v);
|
||||
continue;
|
||||
|
||||
} else if (streq(l, "ref-uid")) {
|
||||
|
@ -105,12 +105,6 @@ struct UnitRef {
|
||||
LIST_FIELDS(UnitRef, refs_by_target);
|
||||
};
|
||||
|
||||
typedef enum UnitCGroupBPFState {
|
||||
UNIT_CGROUP_BPF_OFF = 0,
|
||||
UNIT_CGROUP_BPF_ON = 1,
|
||||
UNIT_CGROUP_BPF_INVALIDATED = -1,
|
||||
} UnitCGroupBPFState;
|
||||
|
||||
typedef struct Unit {
|
||||
Manager *manager;
|
||||
|
||||
@ -258,6 +252,7 @@ typedef struct Unit {
|
||||
char *cgroup_path;
|
||||
CGroupMask cgroup_realized_mask;
|
||||
CGroupMask cgroup_enabled_mask;
|
||||
CGroupMask cgroup_invalidated_mask;
|
||||
CGroupMask cgroup_subtree_mask;
|
||||
CGroupMask cgroup_members_mask;
|
||||
int cgroup_inotify_wd;
|
||||
@ -336,8 +331,6 @@ typedef struct Unit {
|
||||
bool cgroup_members_mask_valid:1;
|
||||
bool cgroup_subtree_mask_valid:1;
|
||||
|
||||
UnitCGroupBPFState cgroup_bpf_state:2;
|
||||
|
||||
/* Reset cgroup accounting next time we fork something off */
|
||||
bool reset_accounting:1;
|
||||
|
||||
|
@ -100,7 +100,7 @@ static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
|
||||
|
||||
static void test_cg_mask_to_string(void) {
|
||||
test_cg_mask_to_string_one(0, NULL);
|
||||
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids");
|
||||
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall");
|
||||
test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
|
||||
test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
|
||||
test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");
|
||||
|
Loading…
Reference in New Issue
Block a user