1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-11 09:18:07 +03:00

Merge pull request #13994 from keszybz/bpf-refactor

Refactor the bpf devices code and fix some bugs
This commit is contained in:
Lennart Poettering 2019-11-13 16:36:39 +01:00 committed by GitHub
commit c214e210c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 869 additions and 540 deletions

View File

@ -1,8 +1,17 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <fnmatch.h>
#include <linux/bpf_insn.h>
#include "bpf-devices.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "fileio.h"
#include "nulstr-util.h"
#include "parse-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "string-util.h"
#define PASS_JUMP_OFF 4096
@ -29,92 +38,117 @@ static int bpf_access_type(const char *acc) {
return r;
}
int cgroup_bpf_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) {
struct bpf_insn insn[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
static int bpf_prog_whitelist_device(BPFProgram *prog, char type, int major, int minor, const char *acc) {
int r, access;
assert(prog);
assert(acc);
log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
assert(IN_SET(type, 'b', 'c'));
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
const struct bpf_insn insn[] = {
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
};
if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
else
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m");
return r;
}
static int bpf_prog_whitelist_major(BPFProgram *prog, char type, int major, const char *acc) {
int r, access;
assert(prog);
assert(acc);
log_trace("%s: %c %d:* %s", __func__, type, major, acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
assert(IN_SET(type, 'b', 'c'));
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
const struct bpf_insn insn[] = {
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
};
int r, access;
assert(prog);
assert(acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
insn[2].imm = access;
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
else
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m");
return r;
}
int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) {
struct bpf_insn insn[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
static int bpf_prog_whitelist_class(BPFProgram *prog, char type, const char *acc) {
int r, access;
assert(prog);
assert(acc);
log_trace("%s: %c *:* %s", __func__, type, acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
assert(IN_SET(type, 'b', 'c'));
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
const struct bpf_insn insn[] = {
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
};
int r, access;
assert(prog);
assert(acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
insn[2].imm = access;
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
else
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m");
return r;
}
int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc) {
struct bpf_insn insn[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
};
int r, access;
assert(prog);
assert(acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
insn[2].imm = access;
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m");
return r;
}
int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
struct bpf_insn pre_insn[] = {
int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
const struct bpf_insn pre_insn[] = {
/* load device type to r2 */
BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct bpf_cgroup_dev_ctx, access_type)),
BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
/* load access type to r3 */
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
@ -135,14 +169,14 @@ int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whi
assert(ret);
if (policy == CGROUP_AUTO && !whitelist)
if (policy == CGROUP_DEVICE_POLICY_AUTO && !whitelist)
return 0;
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
if (r < 0)
return log_error_errno(r, "Loading device control BPF program failed: %m");
if (policy == CGROUP_CLOSED || whitelist) {
if (policy == CGROUP_DEVICE_POLICY_CLOSED || whitelist) {
r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
if (r < 0)
return log_error_errno(r, "Extending device control BPF program failed: %m");
@ -153,70 +187,73 @@ int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whi
return 0;
}
int cgroup_apply_device_bpf(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) {
struct bpf_insn post_insn[] = {
int bpf_devices_apply_policy(
BPFProgram *prog,
CGroupDevicePolicy policy,
bool whitelist,
const char *cgroup_path,
BPFProgram **prog_installed) {
_cleanup_free_ char *controller_path = NULL;
int r;
/* This will assign *keep_program if everything goes well. */
if (!prog)
goto finish;
const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !whitelist;
const struct bpf_insn post_insn[] = {
/* return DENY */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
};
struct bpf_insn exit_insn[] = {
/* else return ALLOW */
BPF_MOV64_IMM(BPF_REG_0, 1),
const struct bpf_insn exit_insn[] = {
/* finally return DENY if deny_everything else ALLOW */
BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
BPF_EXIT_INSN()
};
_cleanup_free_ char *path = NULL;
int r;
if (!prog) {
/* Remove existing program. */
u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
return 0;
}
if (policy != CGROUP_STRICT || whitelist) {
size_t off;
if (!deny_everything) {
r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
if (r < 0)
return log_error_errno(r, "Extending device control BPF program failed: %m");
/* Fixup PASS_JUMP_OFF jump offsets. */
for (off = 0; off < prog->n_instructions; off++) {
for (size_t off = 0; off < prog->n_instructions; off++) {
struct bpf_insn *ins = &prog->instructions[off];
if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
ins->off = prog->n_instructions - off - 1;
}
} else
/* Explicitly forbid everything. */
exit_insn[0].imm = 0;
}
r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
if (r < 0)
return log_error_errno(r, "Extending device control BPF program failed: %m");
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
if (r < 0)
return log_error_errno(r, "Failed to determine cgroup path: %m");
r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, BPF_F_ALLOW_MULTI);
r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
if (r < 0)
return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path);
return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
cgroup_path);
finish:
/* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
/* Remember that this BPF program is installed now. */
u->bpf_device_control_installed = bpf_program_ref(prog);
if (prog_installed) {
bpf_program_unref(*prog_installed);
*prog_installed = bpf_program_ref(prog);
}
return 0;
}
int bpf_devices_supported(void) {
struct bpf_insn trivial[] = {
const struct bpf_insn trivial[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN()
};
@ -268,3 +305,185 @@ int bpf_devices_supported(void) {
return supported = 1;
}
static int whitelist_device_pattern(BPFProgram *prog, const char *path, char type, const unsigned *maj, const unsigned *min, const char *acc) {
assert(IN_SET(type, 'b', 'c'));
if (cg_all_unified() > 0) {
if (!prog)
return 0;
if (maj && min)
return bpf_prog_whitelist_device(prog, type, *maj, *min, acc);
else if (maj)
return bpf_prog_whitelist_major(prog, type, *maj, acc);
else
return bpf_prog_whitelist_class(prog, type, acc);
} else {
char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
int r;
if (maj && min)
xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
else if (maj)
xsprintf(buf, "%c %u:* %s", type, *maj, acc);
else
xsprintf(buf, "%c *:* %s", type, acc);
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
* EINVAL here. */
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
r, "Failed to set devices.allow on %s: %m", path);
return r;
}
}
int bpf_devices_whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
mode_t mode;
dev_t rdev;
int r;
assert(path);
assert(acc);
assert(strlen(acc) <= 3);
log_trace("%s: %s %s", __func__, node, acc);
/* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
* /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
* means clients can use these path without the device node actually around */
r = device_path_parse_major_minor(node, &mode, &rdev);
if (r < 0) {
if (r != -ENODEV)
return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
struct stat st;
if (stat(node, &st) < 0)
return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
mode = st.st_mode;
rdev = (dev_t) st.st_rdev;
}
unsigned maj = major(rdev), min = minor(rdev);
return whitelist_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
}
int bpf_devices_whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
unsigned maj;
int r;
assert(path);
assert(acc);
assert(IN_SET(type, 'b', 'c'));
if (streq(name, "*"))
/* If the name is a wildcard, then apply this list to all devices of this type */
return whitelist_device_pattern(prog, path, type, NULL, NULL, acc);
if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
/* The name is numeric and suitable as major. In that case, let's take its major, and create
* the entry directly. */
return whitelist_device_pattern(prog, path, type, &maj, NULL, acc);
_cleanup_fclose_ FILE *f = NULL;
bool good = false, any = false;
f = fopen("/proc/devices", "re");
if (!f)
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
for (;;) {
_cleanup_free_ char *line = NULL;
char *w, *p;
r = read_line(f, LONG_LINE_MAX, &line);
if (r < 0)
return log_warning_errno(r, "Failed to read /proc/devices: %m");
if (r == 0)
break;
if (type == 'c' && streq(line, "Character devices:")) {
good = true;
continue;
}
if (type == 'b' && streq(line, "Block devices:")) {
good = true;
continue;
}
if (isempty(line)) {
good = false;
continue;
}
if (!good)
continue;
p = strstrip(line);
w = strpbrk(p, WHITESPACE);
if (!w)
continue;
*w = 0;
r = safe_atou(p, &maj);
if (r < 0)
continue;
if (maj <= 0)
continue;
w++;
w += strspn(w, WHITESPACE);
if (fnmatch(name, w, 0) != 0)
continue;
any = true;
(void) whitelist_device_pattern(prog, path, type, &maj, NULL, acc);
}
if (!any)
return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
"Device whitelist pattern \"%s\" did not match anything.", name);
return 0;
}
int bpf_devices_whitelist_static(BPFProgram *prog, const char *path) {
static const char auto_devices[] =
"/dev/null\0" "rwm\0"
"/dev/zero\0" "rwm\0"
"/dev/full\0" "rwm\0"
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
"/dev/ptmx\0" "rwm\0"
/* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
"/run/systemd/inaccessible/chr\0" "rwm\0"
"/run/systemd/inaccessible/blk\0" "rwm\0";
int r = 0, k;
const char *node, *acc;
NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
k = bpf_devices_whitelist_device(prog, path, node, acc);
if (r >= 0 && k < 0)
r = k;
}
/* PTS (/dev/pts) devices may not be duplicated, but accessed */
k = bpf_devices_whitelist_major(prog, path, "pts", 'c', "rw");
if (r >= 0 && k < 0)
r = k;
return r;
}

View File

@ -3,15 +3,19 @@
#include <inttypes.h>
#include "unit.h"
#include "cgroup.h"
struct BPFProgram;
typedef struct BPFProgram BPFProgram;
int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
int bpf_devices_apply_policy(
BPFProgram *prog,
CGroupDevicePolicy policy,
bool whitelist,
const char *cgroup_path,
BPFProgram **prog_installed);
int bpf_devices_supported(void);
int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc);
int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc);
int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc);
int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist);
int bpf_devices_whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc);
int bpf_devices_whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc);
int bpf_devices_whitelist_static(BPFProgram *prog, const char *path);

View File

@ -132,7 +132,7 @@ static int add_instructions_for_ip_any(
assert(p);
struct bpf_insn insn[] = {
const struct bpf_insn insn[] = {
BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
};
@ -150,7 +150,7 @@ static int bpf_firewall_compile_bpf(
bool ip_allow_any,
bool ip_deny_any) {
struct bpf_insn pre_insn[] = {
const struct bpf_insn pre_insn[] = {
/*
* When the eBPF program is entered, R1 contains the address of the skb.
* However, R1-R5 are scratch registers that are not preserved when calling
@ -186,7 +186,7 @@ static int bpf_firewall_compile_bpf(
* This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
* is allowed to pass.
*/
struct bpf_insn post_insn[] = {
const struct bpf_insn post_insn[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
@ -321,7 +321,7 @@ static int bpf_firewall_compile_bpf(
* Exit from the eBPF program, R0 contains the verdict.
* 0 means the packet is denied, 1 means the packet may pass.
*/
struct bpf_insn insn[] = {
const struct bpf_insn insn[] = {
BPF_EXIT_INSN()
};
@ -795,7 +795,7 @@ int bpf_firewall_reset_accounting(int map_fd) {
static int bpf_firewall_unsupported_reason = 0;
int bpf_firewall_supported(void) {
struct bpf_insn trivial[] = {
const struct bpf_insn trivial[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN()
};

View File

@ -1,7 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <fcntl.h>
#include <fnmatch.h>
#include "sd-messages.h"
@ -17,7 +16,6 @@
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "nulstr-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
@ -667,192 +665,6 @@ static int lookup_block_device(const char *p, dev_t *ret) {
return 0;
}
static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
dev_t rdev;
mode_t mode;
int r;
assert(path);
assert(acc);
/* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
* /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
* means clients can use these path without the device node actually around */
r = device_path_parse_major_minor(node, &mode, &rdev);
if (r < 0) {
if (r != -ENODEV)
return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
struct stat st;
if (stat(node, &st) < 0)
return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
log_warning("%s is not a device.", node);
return -ENODEV;
}
rdev = (dev_t) st.st_rdev;
mode = st.st_mode;
}
if (cg_all_unified() > 0) {
if (!prog)
return 0;
return cgroup_bpf_whitelist_device(prog, S_ISCHR(mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
major(rdev), minor(rdev), acc);
} else {
char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
sprintf(buf,
"%c %u:%u %s",
S_ISCHR(mode) ? 'c' : 'b',
major(rdev), minor(rdev),
acc);
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL here. */
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
return log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
r, "Failed to set devices.allow on %s: %m", path);
return 0;
}
}
static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
_cleanup_fclose_ FILE *f = NULL;
char buf[2+DECIMAL_STR_MAX(unsigned)+3+4];
bool good = false;
unsigned maj;
int r;
assert(path);
assert(acc);
assert(IN_SET(type, 'b', 'c'));
if (streq(name, "*")) {
/* If the name is a wildcard, then apply this list to all devices of this type */
if (cg_all_unified() > 0) {
if (!prog)
return 0;
(void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc);
} else {
xsprintf(buf, "%c *:* %s", type, acc);
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set devices.allow on %s: %m", path);
return 0;
}
}
if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) {
/* The name is numeric and suitable as major. In that case, let's take is major, and create the entry
* directly */
if (cg_all_unified() > 0) {
if (!prog)
return 0;
(void) cgroup_bpf_whitelist_major(prog,
type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
maj, acc);
} else {
xsprintf(buf, "%c %u:* %s", type, maj, acc);
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set devices.allow on %s: %m", path);
}
return 0;
}
f = fopen("/proc/devices", "re");
if (!f)
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
for (;;) {
_cleanup_free_ char *line = NULL;
char *w, *p;
r = read_line(f, LONG_LINE_MAX, &line);
if (r < 0)
return log_warning_errno(r, "Failed to read /proc/devices: %m");
if (r == 0)
break;
if (type == 'c' && streq(line, "Character devices:")) {
good = true;
continue;
}
if (type == 'b' && streq(line, "Block devices:")) {
good = true;
continue;
}
if (isempty(line)) {
good = false;
continue;
}
if (!good)
continue;
p = strstrip(line);
w = strpbrk(p, WHITESPACE);
if (!w)
continue;
*w = 0;
r = safe_atou(p, &maj);
if (r < 0)
continue;
if (maj <= 0)
continue;
w++;
w += strspn(w, WHITESPACE);
if (fnmatch(name, w, 0) != 0)
continue;
if (cg_all_unified() > 0) {
if (!prog)
continue;
(void) cgroup_bpf_whitelist_major(prog,
type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
maj, acc);
} else {
sprintf(buf,
"%c %u:* %s",
type,
maj,
acc);
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
* here. */
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
r, "Failed to set devices.allow on %s: %m", path);
}
}
return 0;
}
static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
@ -1143,6 +955,95 @@ static void cgroup_apply_firewall(Unit *u) {
(void) bpf_firewall_install(u);
}
static int cgroup_apply_devices(Unit *u) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
const char *path;
CGroupContext *c;
CGroupDeviceAllow *a;
CGroupDevicePolicy policy;
int r;
assert_se(c = unit_get_cgroup_context(u));
assert_se(path = u->cgroup_path);
policy = c->device_policy;
if (cg_all_unified() > 0) {
r = bpf_devices_cgroup_init(&prog, policy, c->device_allow);
if (r < 0)
return log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
} else {
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
* EINVAL here. */
if (c->device_allow || policy != CGROUP_DEVICE_POLICY_AUTO)
r = cg_set_attribute("devices", path, "devices.deny", "a");
else
r = cg_set_attribute("devices", path, "devices.allow", "a");
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to reset devices.allow/devices.deny: %m");
}
bool whitelist_static = policy == CGROUP_DEVICE_POLICY_CLOSED ||
(policy == CGROUP_DEVICE_POLICY_AUTO && c->device_allow);
if (whitelist_static)
(void) bpf_devices_whitelist_static(prog, path);
bool any = whitelist_static;
LIST_FOREACH(device_allow, a, c->device_allow) {
char acc[4], *val;
unsigned k = 0;
if (a->r)
acc[k++] = 'r';
if (a->w)
acc[k++] = 'w';
if (a->m)
acc[k++] = 'm';
if (k == 0)
continue;
acc[k++] = 0;
if (path_startswith(a->path, "/dev/"))
r = bpf_devices_whitelist_device(prog, path, a->path, acc);
else if ((val = startswith(a->path, "block-")))
r = bpf_devices_whitelist_major(prog, path, val, 'b', acc);
else if ((val = startswith(a->path, "char-")))
r = bpf_devices_whitelist_major(prog, path, val, 'c', acc);
else {
log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
continue;
}
if (r >= 0)
any = true;
}
if (prog && !any) {
log_unit_warning_errno(u, SYNTHETIC_ERRNO(ENODEV), "No devices matched by device filter.");
/* The kernel verifier would reject a program we would build with the normal intro and outro
but no whitelisting rules (outro would contain an unreachable instruction for successful
return). */
policy = CGROUP_DEVICE_POLICY_STRICT;
}
r = bpf_devices_apply_policy(prog, policy, any, path, &u->bpf_device_control_installed);
if (r < 0) {
static bool warned = false;
log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
"Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
"Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
"(This warning is only shown for the first loaded unit using device ACL.)", u->id);
warned = true;
}
return r;
}
static void cgroup_context_apply(
Unit *u,
CGroupMask apply_mask,
@ -1419,88 +1320,8 @@ static void cgroup_context_apply(
/* On cgroup v2 we can apply BPF everywhere. On cgroup v1 we apply it everywhere except for the root of
* containers, where we leave this to the manager */
if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
(is_host_root || cg_all_unified() > 0 || !is_local_root)) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
CGroupDeviceAllow *a;
if (cg_all_unified() > 0) {
r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow);
if (r < 0)
log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
} else {
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
* here. */
if (c->device_allow || c->device_policy != CGROUP_AUTO)
r = cg_set_attribute("devices", path, "devices.deny", "a");
else
r = cg_set_attribute("devices", path, "devices.allow", "a");
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to reset devices.allow/devices.deny: %m");
}
if (c->device_policy == CGROUP_CLOSED ||
(c->device_policy == CGROUP_AUTO && c->device_allow)) {
static const char auto_devices[] =
"/dev/null\0" "rwm\0"
"/dev/zero\0" "rwm\0"
"/dev/full\0" "rwm\0"
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
"/dev/ptmx\0" "rwm\0"
/* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
"/run/systemd/inaccessible/chr\0" "rwm\0"
"/run/systemd/inaccessible/blk\0" "rwm\0";
const char *x, *y;
NULSTR_FOREACH_PAIR(x, y, auto_devices)
(void) whitelist_device(prog, path, x, y);
/* PTS (/dev/pts) devices may not be duplicated, but accessed */
(void) whitelist_major(prog, path, "pts", 'c', "rw");
}
LIST_FOREACH(device_allow, a, c->device_allow) {
char acc[4], *val;
unsigned k = 0;
if (a->r)
acc[k++] = 'r';
if (a->w)
acc[k++] = 'w';
if (a->m)
acc[k++] = 'm';
if (k == 0)
continue;
acc[k++] = 0;
if (path_startswith(a->path, "/dev/"))
(void) whitelist_device(prog, path, a->path, acc);
else if ((val = startswith(a->path, "block-")))
(void) whitelist_major(prog, path, val, 'b', acc);
else if ((val = startswith(a->path, "char-")))
(void) whitelist_major(prog, path, val, 'c', acc);
else
log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
}
r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow);
if (r < 0) {
static bool warned = false;
log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
"Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
"Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
"(This warning is only shown for the first loaded unit using device ACL.)", u->id);
warned = true;
}
}
(is_host_root || cg_all_unified() > 0 || !is_local_root))
(void) cgroup_apply_devices(u);
if (apply_mask & CGROUP_MASK_PIDS) {
@ -1609,7 +1430,7 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) {
mask |= CGROUP_MASK_MEMORY;
if (c->device_allow ||
c->device_policy != CGROUP_AUTO)
c->device_policy != CGROUP_DEVICE_POLICY_AUTO)
mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
if (c->tasks_accounting ||
@ -3747,9 +3568,9 @@ int compare_job_priority(const void *a, const void *b) {
}
static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
[CGROUP_AUTO] = "auto",
[CGROUP_CLOSED] = "closed",
[CGROUP_STRICT] = "strict",
[CGROUP_DEVICE_POLICY_AUTO] = "auto",
[CGROUP_DEVICE_POLICY_CLOSED] = "closed",
[CGROUP_DEVICE_POLICY_STRICT] = "strict",
};
int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {

View File

@ -18,16 +18,15 @@ typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
typedef enum CGroupDevicePolicy {
/* When devices listed, will allow those, plus built-in ones,
if none are listed will allow everything. */
CGROUP_AUTO,
/* When devices listed, will allow those, plus built-in ones, if none are listed will allow
* everything. */
CGROUP_DEVICE_POLICY_AUTO,
/* Everything forbidden, except built-in ones and listed ones. */
CGROUP_CLOSED,
CGROUP_DEVICE_POLICY_CLOSED,
/* Everything forbidden, except for the listed devices */
CGROUP_STRICT,
CGROUP_DEVICE_POLICY_STRICT,
_CGROUP_DEVICE_POLICY_MAX,
_CGROUP_DEVICE_POLICY_INVALID = -1

View File

@ -4303,11 +4303,11 @@ int unit_patch_contexts(Unit *u) {
if (cc && ec) {
if (ec->private_devices &&
cc->device_policy == CGROUP_AUTO)
cc->device_policy = CGROUP_CLOSED;
cc->device_policy == CGROUP_DEVICE_POLICY_AUTO)
cc->device_policy = CGROUP_DEVICE_POLICY_CLOSED;
if (ec->root_image &&
(cc->device_policy != CGROUP_AUTO || cc->device_allow)) {
(cc->device_policy != CGROUP_DEVICE_POLICY_AUTO || cc->device_allow)) {
/* When RootImage= is specified, the following devices are touched. */
r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw");

View File

@ -3,6 +3,7 @@
#include <sched.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <util.h>
@ -14,11 +15,14 @@
#undef basename
#include "alloc-util.h"
#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "env-file.h"
#include "env-util.h"
#include "fs-util.h"
#include "log.h"
#include "path-util.h"
#include "random-util.h"
#include "strv.h"
#include "tests.h"
@ -149,3 +153,50 @@ bool have_namespaces(void) {
assert_not_reached("unexpected exit code");
}
bool can_memlock(void) {
/* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
* RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
* cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
* RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
void *p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
if (p == MAP_FAILED)
return false;
bool b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
if (b)
assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
return b;
}
int enter_cgroup_subroot(char **ret_cgroup) {
_cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
CGroupMask supported;
int r;
r = cg_pid_get_path(NULL, 0, &cgroup_root);
if (r == -ENOMEDIUM)
return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
assert(r >= 0);
assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
assert_se(cg_mask_supported(&supported) >= 0);
/* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if
* we handle any errors at that point. */
r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
if (r < 0)
return r;
r = cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
if (r < 0)
return r;
if (ret_cgroup)
*ret_cgroup = TAKE_PTR(cgroup_subroot);
return 0;
}

View File

@ -3,7 +3,23 @@
#include <stdbool.h>
#include "sd-daemon.h"
#include "macro.h"
static inline bool manager_errno_skip_test(int r) {
return IN_SET(abs(r),
EPERM,
EACCES,
EADDRINUSE,
EHOSTDOWN,
ENOENT,
ENOMEDIUM /* cannot determine cgroup */
);
}
char* setup_fake_runtime_dir(void);
int enter_cgroup_subroot(char **ret_cgroup);
const char* get_testdata_dir(void);
const char* get_catalog_dir(void);
bool slow_tests_enabled(void);
@ -12,3 +28,14 @@ int log_tests_skipped(const char *message);
int log_tests_skipped_errno(int r, const char *message);
bool have_namespaces(void);
/* We use the small but non-trivial limit here */
#define CAN_MEMLOCK_SIZE (512 * 1024U)
bool can_memlock(void);
#define TEST_REQ_RUNNING_SYSTEMD(x) \
if (sd_booted() > 0) { \
x; \
} else { \
printf("systemd not booted skipping '%s'\n", #x); \
}

View File

@ -50,8 +50,7 @@ tests += [
[],
[]],
[['src/test/test-engine.c',
'src/test/test-helper.c'],
[['src/test/test-engine.c'],
[libcore,
libudev,
libshared],
@ -140,8 +139,7 @@ tests += [
[],
[]],
[['src/test/test-unit-name.c',
'src/test/test-helper.c'],
[['src/test/test-unit-name.c'],
[libcore,
libshared],
[threads,
@ -151,8 +149,7 @@ tests += [
libmount,
libblkid]],
[['src/test/test-load-fragment.c',
'src/test/test-helper.c'],
[['src/test/test-load-fragment.c'],
[libcore,
libshared],
[threads,
@ -427,8 +424,7 @@ tests += [
[libbasic],
[]],
[['src/test/test-bpf.c',
'src/test/test-helper.c'],
[['src/test/test-bpf-devices.c'],
[libcore,
libshared],
[libmount,
@ -438,8 +434,17 @@ tests += [
libselinux,
libblkid]],
[['src/test/test-watch-pid.c',
'src/test/test-helper.c'],
[['src/test/test-bpf-firewall.c'],
[libcore,
libshared],
[libmount,
threads,
librt,
libseccomp,
libselinux,
libblkid]],
[['src/test/test-watch-pid.c'],
[libcore,
libshared],
[libmount,
@ -589,14 +594,12 @@ tests += [
libshared],
[]],
[['src/test/test-cgroup-unit-default.c',
'src/test/test-helper.c'],
[['src/test/test-cgroup-unit-default.c'],
[libcore,
libshared],
[]],
[['src/test/test-cgroup-mask.c',
'src/test/test-helper.c'],
[['src/test/test-cgroup-mask.c'],
[libcore,
libshared],
[threads,
@ -643,8 +646,7 @@ tests += [
[],
'', 'manual'],
[['src/test/test-path.c',
'src/test/test-helper.c'],
[['src/test/test-path.c'],
[libcore,
libshared],
[threads,
@ -654,8 +656,7 @@ tests += [
libmount,
libblkid]],
[['src/test/test-execute.c',
'src/test/test-helper.c'],
[['src/test/test-execute.c'],
[libcore,
libshared],
[threads,
@ -684,8 +685,7 @@ tests += [
[],
[]],
[['src/test/test-sched-prio.c',
'src/test/test-helper.c'],
[['src/test/test-sched-prio.c'],
[libcore,
libshared],
[threads,

306
src/test/test-bpf-devices.c Normal file
View File

@ -0,0 +1,306 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include "alloc-util.h"
#include "bpf-devices.h"
#include "bpf-program.h"
#include "cgroup-setup.h"
#include "errno-list.h"
#include "fd-util.h"
#include "fs-util.h"
#include "path-util.h"
#include "tests.h"
static void test_policy_closed(const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s */", __func__);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_CLOSED, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_static(prog, cgroup_path);
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_CLOSED, true, cgroup_path, installed_prog);
assert_se(r >= 0);
const char *s;
FOREACH_STRING(s, "/dev/null",
"/dev/zero",
"/dev/full",
"/dev/random",
"/dev/urandom",
"/dev/tty",
"/dev/ptmx") {
_cleanup_close_ int fd, fd2;
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0 && errno == EPERM;
/* We ignore errors other than EPERM, e.g. ENOENT or ENXIO */
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0 && errno == EPERM;
}
assert_se(wrong == 0);
}
static void test_policy_strict(const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s */", __func__);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_device(prog, cgroup_path, "/dev/null", "rw");
assert_se(r >= 0);
r = bpf_devices_whitelist_device(prog, cgroup_path, "/dev/random", "r");
assert_se(r >= 0);
r = bpf_devices_whitelist_device(prog, cgroup_path, "/dev/zero", "w");
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
assert_se(r >= 0);
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/random";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 >= 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/zero";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/full";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 >= 0;
}
assert_se(wrong == 0);
}
static void test_policy_whitelist_major(const char *pattern, const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s(%s) */", __func__, pattern);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_major(prog, cgroup_path, pattern, 'c', "rw");
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
assert_se(r >= 0);
/* /dev/null, /dev/full have major==1, /dev/tty has major==5 */
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/full";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/tty";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 >= 0;
}
assert_se(wrong == 0);
}
static void test_policy_whitelist_major_star(char type, const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s(type=%c) */", __func__, type);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_major(prog, cgroup_path, "*", type, "rw");
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
assert_se(r >= 0);
{
_cleanup_close_ int fd;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDWR|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
if (type == 'c')
wrong += fd < 0;
else
wrong += fd >= 0;
}
assert_se(wrong == 0);
}
static void test_policy_empty(bool add_mismatched, const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s(add_mismatched=%s) */", __func__, yes_no(add_mismatched));
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, add_mismatched);
assert_se(r >= 0);
if (add_mismatched) {
r = bpf_devices_whitelist_major(prog, cgroup_path, "foobarxxx", 'c', "rw");
assert_se(r < 0);
}
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, false, cgroup_path, installed_prog);
assert_se(r >= 0);
{
_cleanup_close_ int fd;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDWR|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
}
assert_se(wrong == 0);
}
int main(int argc, char *argv[]) {
_cleanup_free_ char *cgroup = NULL, *parent = NULL;
_cleanup_(rmdir_and_freep) char *controller_path = NULL;
CGroupMask supported;
struct rlimit rl;
int r;
test_setup_logging(LOG_DEBUG);
assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
(void) setrlimit(RLIMIT_MEMLOCK, &rl);
r = cg_all_unified();
if (r <= 0)
return log_tests_skipped("We don't seem to be running with unified cgroup hierarchy");
if (!can_memlock())
return log_tests_skipped("Can't use mlock()");
r = enter_cgroup_subroot(&cgroup);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
r = bpf_devices_supported();
if (!r)
return log_tests_skipped("BPF device filter not supported");
assert_se(r == 1);
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup, NULL, &controller_path);
assert_se(r >= 0);
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
test_policy_closed(cgroup, &prog);
test_policy_strict(cgroup, &prog);
test_policy_whitelist_major("mem", cgroup, &prog);
test_policy_whitelist_major("1", cgroup, &prog);
test_policy_whitelist_major_star('c', cgroup, &prog);
test_policy_whitelist_major_star('b', cgroup, &prog);
test_policy_empty(false, cgroup, &prog);
test_policy_empty(true, cgroup, &prog);
assert_se(parent = dirname_malloc(cgroup));
assert_se(cg_mask_supported(&supported) >= 0);
r = cg_attach_everywhere(supported, parent, 0, NULL, NULL);
assert_se(r >= 0);
return 0;
}

View File

@ -2,7 +2,6 @@
#include <linux/bpf_insn.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "bpf-firewall.h"
@ -11,37 +10,12 @@
#include "manager.h"
#include "rm-rf.h"
#include "service.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
#include "virt.h"
/* We use the same limit here that PID 1 bumps RLIMIT_MEMLOCK to if it can */
#define CAN_MEMLOCK_SIZE (64U*1024U*1024U)
static bool can_memlock(void) {
void *p;
bool b;
/* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
* RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
* cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
* RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
if (p == MAP_FAILED)
return false;
b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
if (b)
assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
return b;
}
int main(int argc, char *argv[]) {
struct bpf_insn exit_insn[] = {
const struct bpf_insn exit_insn[] = {
BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */
BPF_EXIT_INSN()
};
@ -61,16 +35,16 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG);
if (detect_container() > 0)
return log_tests_skipped("test-bpf fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");
return log_tests_skipped("test-bpf-firewall fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");
assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
rl.rlim_cur = rl.rlim_max = MAX3(rl.rlim_cur, rl.rlim_max, CAN_MEMLOCK_SIZE);
rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
(void) setrlimit(RLIMIT_MEMLOCK, &rl);
if (!can_memlock())
return log_tests_skipped("Can't use mlock(), skipping.");
return log_tests_skipped("Can't use mlock()");
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");

View File

@ -8,7 +8,6 @@
#include "manager.h"
#include "rm-rf.h"
#include "string-util.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
@ -34,7 +33,7 @@ static int test_cgroup_mask(void) {
int r;
CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");

View File

@ -5,7 +5,6 @@
#include "cgroup.h"
#include "manager.h"
#include "rm-rf.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
@ -19,7 +18,7 @@ static int test_default_memory_low(void) {
uint64_t dml_tree_default;
int r;
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");

View File

@ -13,7 +13,6 @@
#include "stat-util.h"
#include "string-util.h"
#include "strv.h"
#include "test-helper.h"
#include "tests.h"
#include "user-util.h"
#include "util.h"

View File

@ -7,7 +7,6 @@
#include "manager.h"
#include "rm-rf.h"
#include "strv.h"
#include "test-helper.h"
#include "tests.h"
#include "service.h"
@ -22,7 +21,7 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG);
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
@ -30,7 +29,7 @@ int main(int argc, char *argv[]) {
assert_se(set_unit_path(get_testdata_dir()) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir());
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
if (MANAGER_SKIP_TEST(r))
if (manager_errno_skip_test(r))
return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);

View File

@ -20,7 +20,6 @@
#endif
#include "service.h"
#include "stat-util.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
#include "user-util.h"
@ -37,6 +36,11 @@ static int cld_dumped_to_killed(int code) {
return code == CLD_DUMPED ? CLD_KILLED : code;
}
_unused_ static bool is_run_on_travis_ci(void) {
/* https://docs.travis-ci.com/user/environment-variables#default-environment-variables */
return streq_ptr(getenv("TRAVIS"), "true");
}
static void wait_for_service_finish(Manager *m, Unit *unit) {
Service *service = NULL;
usec_t ts;
@ -781,7 +785,7 @@ static int run_tests(UnitFileScope scope, const test_entry tests[], char **patte
assert_se(tests);
r = manager_new(scope, MANAGER_TEST_RUN_BASIC, &m);
if (MANAGER_SKIP_TEST(r))
if (manager_errno_skip_test(r))
return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);
@ -865,7 +869,7 @@ int main(int argc, char *argv[]) {
if (getuid() != 0)
return log_tests_skipped("not root");
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");

View File

@ -1,35 +0,0 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "test-helper.h"
#include "random-util.h"
#include "alloc-util.h"
#include "cgroup-setup.h"
#include "string-util.h"
int enter_cgroup_subroot(void) {
_cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
CGroupMask supported;
int r;
r = cg_pid_get_path(NULL, 0, &cgroup_root);
if (r == -ENOMEDIUM)
return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
assert(r >= 0);
assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
assert_se(cg_mask_supported(&supported) >= 0);
/* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if we handle
* any errors at that point. */
r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
if (r < 0)
return r;
return cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
}
/* https://docs.travis-ci.com/user/environment-variables#default-environment-variables */
bool is_run_on_travis_ci(void) {
return streq_ptr(getenv("TRAVIS"), "true");
}

View File

@ -1,31 +0,0 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
/***
Copyright © 2013 Holger Hans Peter Freyther
***/
#include "sd-daemon.h"
#include "macro.h"
#define TEST_REQ_RUNNING_SYSTEMD(x) \
if (sd_booted() > 0) { \
x; \
} else { \
printf("systemd not booted skipping '%s'\n", #x); \
}
#define MANAGER_SKIP_TEST(r) \
IN_SET(r, \
-EPERM, \
-EACCES, \
-EADDRINUSE, \
-EHOSTDOWN, \
-ENOENT, \
-ENOMEDIUM /* cannot determine cgroup */ \
)
int enter_cgroup_subroot(void);
bool is_run_on_travis_ci(void);

View File

@ -23,7 +23,6 @@
#include "specifier.h"
#include "string-util.h"
#include "strv.h"
#include "test-helper.h"
#include "tests.h"
#include "tmpfile-util.h"
#include "user-util.h"
@ -97,7 +96,7 @@ static void test_config_parse_exec(void) {
_cleanup_(unit_freep) Unit *u = NULL;
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
if (MANAGER_SKIP_TEST(r)) {
if (manager_errno_skip_test(r)) {
log_notice_errno(r, "Skipping test: manager_new: %m");
return;
}
@ -442,7 +441,7 @@ static void test_config_parse_log_extra_fields(void) {
ExecContext c = {};
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
if (MANAGER_SKIP_TEST(r)) {
if (manager_errno_skip_test(r)) {
log_notice_errno(r, "Skipping test: manager_new: %m");
return;
}
@ -780,7 +779,7 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_INFO);
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");

View File

@ -16,7 +16,6 @@
#include "rm-rf.h"
#include "string-util.h"
#include "strv.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
#include "util.h"
@ -32,12 +31,12 @@ static int setup_test(Manager **m) {
assert_se(m);
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &tmp);
if (MANAGER_SKIP_TEST(r))
if (manager_errno_skip_test(r))
return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0);
assert_se(manager_startup(tmp, NULL, NULL) >= 0);

View File

@ -25,7 +25,6 @@
#include "stdio-util.h"
#include "string-util.h"
#include "terminal-util.h"
#include "test-helper.h"
#include "tests.h"
#include "util.h"
#include "virt.h"

View File

@ -9,7 +9,6 @@
#include "macro.h"
#include "manager.h"
#include "rm-rf.h"
#include "test-helper.h"
#include "tests.h"
int main(int argc, char *argv[]) {
@ -21,7 +20,7 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_INFO);
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
@ -29,7 +28,7 @@ int main(int argc, char *argv[]) {
assert_se(set_unit_path(get_testdata_dir()) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir());
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
if (MANAGER_SKIP_TEST(r))
if (manager_errno_skip_test(r))
return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);

View File

@ -15,7 +15,6 @@
#include "special.h"
#include "specifier.h"
#include "string-util.h"
#include "test-helper.h"
#include "tests.h"
#include "unit-def.h"
#include "unit-name.h"
@ -234,7 +233,7 @@ static int test_unit_printf(void) {
assert_se(get_shell(&shell) >= 0);
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
if (MANAGER_SKIP_TEST(r))
if (manager_errno_skip_test(r))
return log_tests_skipped_errno(r, "manager_new");
assert_se(r == 0);
@ -871,7 +870,7 @@ int main(int argc, char* argv[]) {
test_setup_logging(LOG_INFO);
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");

View File

@ -4,7 +4,6 @@
#include "manager.h"
#include "rm-rf.h"
#include "service.h"
#include "test-helper.h"
#include "tests.h"
int main(int argc, char *argv[]) {
@ -17,7 +16,7 @@ int main(int argc, char *argv[]) {
if (getuid() != 0)
return log_tests_skipped("not root");
r = enter_cgroup_subroot();
r = enter_cgroup_subroot(NULL);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");