From aed704b7a634954dc28fe5c4b49db478cf2d96b7 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:56:40 -0700 Subject: [PATCH 1/3] cgroup: Add task_under_cgroup_hierarchy cgroup inline function to headers This commit adds an inline function to cgroup.h to check whether a given task is under a given cgroup hierarchy. This is to avoid having to put ifdefs in .c files to gate access to cgroups. When cgroups are disabled this always returns true. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..a4414a11eea7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, return cgrp->ancestor_ids[ancestor->level] == ancestor->id; } +/** + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + struct css_set *cset = task_css_set(task); + + return cgroup_is_descendant(cset->dfl_cgrp, ancestor); +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; +struct cgroup; static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + return true; +} #endif /* !CONFIG_CGROUPS */ /* From 60d20f9195b260bdf0ac10c275ae9f6016f9c069 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:56:52 -0700 Subject: [PATCH 2/3] bpf: Add bpf_current_task_under_cgroup helper This adds a bpf helper that's similar to the skb_in_cgroup helper to check whether the probe is currently executing in the context of a specific subset of the cgroupsv2 hierarchy. It does this based on membership test for a cgroup arraymap. It is invalid to call this in an interrupt, and it'll return an error. The helper is primarily to be used in debugging activities for containers, where you may have multiple programs running in a given top-level "container". Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/arraymap.c | 2 +- kernel/bpf/verifier.c | 4 +++- kernel/trace/bpf_trace.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da218fec6056..bea0c4e2830a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,17 @@ enum bpf_func_id { */ BPF_FUNC_probe_write_user, + /** + * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_current_task_under_cgroup, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 633a650d7aeb..a2ac051c342f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void) } late_initcall(register_perf_event_array_map); -#ifdef CONFIG_SOCK_CGROUP_DATA +#ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7094c69ac199..d504722ebfa4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - if (func_id != BPF_FUNC_skb_in_cgroup) + if (func_id != BPF_FUNC_skb_in_cgroup && + func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; default: @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_current_task_under_cgroup: case BPF_FUNC_skb_in_cgroup: if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b20438fdb029..6b794d6669a7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *)(long)r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + u32 idx = (u32)r2; + + if (unlikely(in_interrupt())) + return -EINVAL; + + if (unlikely(idx >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[idx]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return task_under_cgroup_hierarchy(current, cgrp); +} + +static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { + .func = bpf_current_task_under_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: return bpf_get_probe_write_proto(); + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; default: return NULL; } From 9e6e60ecbd7323d4ac3f98dcdc1bd2c527a736ef Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:57:04 -0700 Subject: [PATCH 3/3] samples/bpf: Add test_current_task_under_cgroup test This test has a BPF program which writes the last known pid to call the sync syscall within a given cgroup to a map. The user mode program creates its own mount namespace, and mounts the cgroupsv2 hierarchy in there, as on all current test systems (Ubuntu 16.04, Debian), the cgroupsv2 vfs is unmounted by default. Once it does this, it proceeds to test. The test checks for positive and negative condition. It ensures that when it's part of a given cgroup, its pid is captured in the map, and that when it leaves the cgroup, this doesn't happen. It populate a cgroups arraymap prior to execution in userspace. This means that the program must be run in the same cgroups namespace as the programs that are being traced. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 + samples/bpf/bpf_helpers.h | 2 + .../bpf/test_current_task_under_cgroup_kern.c | 43 ++++++ .../bpf/test_current_task_under_cgroup_user.c | 145 ++++++++++++++++++ 4 files changed, 195 insertions(+) create mode 100644 samples/bpf/test_current_task_under_cgroup_kern.c create mode 100644 samples/bpf/test_current_task_under_cgroup_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 90ebf7d35c07..eb582c6264c3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -24,6 +24,7 @@ hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += test_current_task_under_cgroup test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o +test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ + test_current_task_under_cgroup_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += test_current_task_under_cgroup_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_test_current_task_under_cgroup += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index cbc52df165b4..5e4c41e256b8 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -45,6 +45,8 @@ static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; static int (*bpf_probe_write_user)(void *dst, void *src, int size) = (void *) BPF_FUNC_probe_write_user; +static int (*bpf_current_task_under_cgroup)(void *map, int index) = + (void *) BPF_FUNC_current_task_under_cgroup; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c new file mode 100644 index 000000000000..86b28d7d6c99 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include "bpf_helpers.h" +#include + +struct bpf_map_def SEC("maps") cgroup_map = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") perf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 1, +}; + +/* Writes the last PID that called sync to a map at index 0 */ +SEC("kprobe/sys_sync") +int bpf_prog1(struct pt_regs *ctx) +{ + u64 pid = bpf_get_current_pid_tgid(); + int idx = 0; + + if (!bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c new file mode 100644 index 000000000000..30b0bce884f9 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_PATH "/mnt/my-cgroup" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) + +static int join_cgroup(char *path) +{ + int fd, rc = 0; + pid_t pid = getpid(); + char cgroup_path[PATH_MAX + 1]; + + snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path); + + fd = open(cgroup_path, O_WRONLY); + if (fd < 0) { + log_err("Opening Cgroup"); + return 1; + } + + if (dprintf(fd, "%d\n", pid) < 0) { + log_err("Joining Cgroup"); + rc = 1; + } + close(fd); + return rc; +} + +int main(int argc, char **argv) +{ + char filename[256]; + int cg2, idx = 0; + pid_t remote_pid, local_pid = getpid(); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* + * This is to avoid interfering with existing cgroups. Unfortunately, + * most people don't have cgroupv2 enabled at this point in time. + * It's easier to create our own mount namespace and manage it + * ourselves. + */ + if (unshare(CLONE_NEWNS)) { + log_err("unshare"); + return 1; + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { + log_err("mount fakeroot"); + return 1; + } + + if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { + log_err("mount cgroup2"); + return 1; + } + + if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup"); + return 1; + } + + cg2 = open(CGROUP_PATH, O_RDONLY); + if (cg2 < 0) { + log_err("opening target cgroup"); + goto cleanup_cgroup_err; + } + + if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { + log_err("Adding target cgroup to map"); + goto cleanup_cgroup_err; + } + if (join_cgroup("/mnt/my-cgroup")) { + log_err("Leaving target cgroup"); + goto cleanup_cgroup_err; + } + + /* + * The installed helper program catched the sync call, and should + * write it to the map. + */ + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid != remote_pid) { + fprintf(stderr, + "BPF Helper didn't write correct PID to map, but: %d\n", + remote_pid); + goto leave_cgroup_err; + } + + /* Verify the negative scenario; leave the cgroup */ + if (join_cgroup(CGROUP_MOUNT_PATH)) + goto leave_cgroup_err; + + remote_pid = 0; + bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid == remote_pid) { + fprintf(stderr, "BPF cgroup negative test did not work\n"); + goto cleanup_cgroup_err; + } + + rmdir(CGROUP_PATH); + return 0; + + /* Error condition, cleanup */ +leave_cgroup_err: + join_cgroup(CGROUP_MOUNT_PATH); +cleanup_cgroup_err: + rmdir(CGROUP_PATH); + return 1; +}