Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2021-09-14 The following pull-request contains BPF updates for your *net* tree. We've added 7 non-merge commits during the last 13 day(s) which contain a total of 18 files changed, 334 insertions(+), 193 deletions(-). The main changes are: 1) Fix mmap_lock lockdep splat in BPF stack map's build_id lookup, from Yonghong Song. 2) Fix BPF cgroup v2 program bypass upon net_cls/prio activation, from Daniel Borkmann. 3) Fix kvcalloc() BTF line info splat on oversized allocation attempts, from Bixuan Cui. 4) Fix BPF selftest build of task_pt_regs test for arm64/s390, from Jean-Philippe Brucker. 5) Fix BPF's disasm.{c,h} to dual-license so that it is aligned with bpftool given the former is a build dependency for the latter, from Daniel Borkmann with ACKs from contributors. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
2865ba8247
@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
|
|||||||
* sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
|
* sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
|
||||||
* per-socket cgroup information except for memcg association.
|
* per-socket cgroup information except for memcg association.
|
||||||
*
|
*
|
||||||
* On legacy hierarchies, net_prio and net_cls controllers directly set
|
* On legacy hierarchies, net_prio and net_cls controllers directly
|
||||||
* attributes on each sock which can then be tested by the network layer.
|
* set attributes on each sock which can then be tested by the network
|
||||||
* On the default hierarchy, each sock is associated with the cgroup it was
|
* layer. On the default hierarchy, each sock is associated with the
|
||||||
* created in and the networking layer can match the cgroup directly.
|
* cgroup it was created in and the networking layer can match the
|
||||||
*
|
* cgroup directly.
|
||||||
* To avoid carrying all three cgroup related fields separately in sock,
|
|
||||||
* sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
|
|
||||||
* On boot, sock_cgroup_data records the cgroup that the sock was created
|
|
||||||
* in so that cgroup2 matches can be made; however, once either net_prio or
|
|
||||||
* net_cls starts being used, the area is overridden to carry prioidx and/or
|
|
||||||
* classid. The two modes are distinguished by whether the lowest bit is
|
|
||||||
* set. Clear bit indicates cgroup pointer while set bit prioidx and
|
|
||||||
* classid.
|
|
||||||
*
|
|
||||||
* While userland may start using net_prio or net_cls at any time, once
|
|
||||||
* either is used, cgroup2 matching no longer works. There is no reason to
|
|
||||||
* mix the two and this is in line with how legacy and v2 compatibility is
|
|
||||||
* handled. On mode switch, cgroup references which are already being
|
|
||||||
* pointed to by socks may be leaked. While this can be remedied by adding
|
|
||||||
* synchronization around sock_cgroup_data, given that the number of leaked
|
|
||||||
* cgroups is bound and highly unlikely to be high, this seems to be the
|
|
||||||
* better trade-off.
|
|
||||||
*/
|
*/
|
||||||
struct sock_cgroup_data {
|
struct sock_cgroup_data {
|
||||||
union {
|
struct cgroup *cgroup; /* v2 */
|
||||||
#ifdef __LITTLE_ENDIAN
|
#ifdef CONFIG_CGROUP_NET_CLASSID
|
||||||
struct {
|
u32 classid; /* v1 */
|
||||||
u8 is_data : 1;
|
#endif
|
||||||
u8 no_refcnt : 1;
|
#ifdef CONFIG_CGROUP_NET_PRIO
|
||||||
u8 unused : 6;
|
u16 prioidx; /* v1 */
|
||||||
u8 padding;
|
|
||||||
u16 prioidx;
|
|
||||||
u32 classid;
|
|
||||||
} __packed;
|
|
||||||
#else
|
|
||||||
struct {
|
|
||||||
u32 classid;
|
|
||||||
u16 prioidx;
|
|
||||||
u8 padding;
|
|
||||||
u8 unused : 6;
|
|
||||||
u8 no_refcnt : 1;
|
|
||||||
u8 is_data : 1;
|
|
||||||
} __packed;
|
|
||||||
#endif
|
#endif
|
||||||
u64 val;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* There's a theoretical window where the following accessors race with
|
|
||||||
* updaters and return part of the previous pointer as the prioidx or
|
|
||||||
* classid. Such races are short-lived and the result isn't critical.
|
|
||||||
*/
|
|
||||||
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
|
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
|
||||||
{
|
{
|
||||||
/* fallback to 1 which is always the ID of the root cgroup */
|
#ifdef CONFIG_CGROUP_NET_PRIO
|
||||||
return (skcd->is_data & 1) ? skcd->prioidx : 1;
|
return READ_ONCE(skcd->prioidx);
|
||||||
|
#else
|
||||||
|
return 1;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
|
static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
|
||||||
{
|
{
|
||||||
/* fallback to 0 which is the unconfigured default classid */
|
#ifdef CONFIG_CGROUP_NET_CLASSID
|
||||||
return (skcd->is_data & 1) ? skcd->classid : 0;
|
return READ_ONCE(skcd->classid);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If invoked concurrently, the updaters may clobber each other. The
|
|
||||||
* caller is responsible for synchronization.
|
|
||||||
*/
|
|
||||||
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
|
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
|
||||||
u16 prioidx)
|
u16 prioidx)
|
||||||
{
|
{
|
||||||
struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
|
#ifdef CONFIG_CGROUP_NET_PRIO
|
||||||
|
WRITE_ONCE(skcd->prioidx, prioidx);
|
||||||
if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
|
#endif
|
||||||
return;
|
|
||||||
|
|
||||||
if (!(skcd_buf.is_data & 1)) {
|
|
||||||
skcd_buf.val = 0;
|
|
||||||
skcd_buf.is_data = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
skcd_buf.prioidx = prioidx;
|
|
||||||
WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
|
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
|
||||||
u32 classid)
|
u32 classid)
|
||||||
{
|
{
|
||||||
struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
|
#ifdef CONFIG_CGROUP_NET_CLASSID
|
||||||
|
WRITE_ONCE(skcd->classid, classid);
|
||||||
if (sock_cgroup_classid(&skcd_buf) == classid)
|
#endif
|
||||||
return;
|
|
||||||
|
|
||||||
if (!(skcd_buf.is_data & 1)) {
|
|
||||||
skcd_buf.val = 0;
|
|
||||||
skcd_buf.is_data = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
skcd_buf.classid = classid;
|
|
||||||
WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_SOCK_CGROUP_DATA */
|
#else /* CONFIG_SOCK_CGROUP_DATA */
|
||||||
|
@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
|
|||||||
*/
|
*/
|
||||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||||
|
|
||||||
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
|
|
||||||
extern spinlock_t cgroup_sk_update_lock;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void cgroup_sk_alloc_disable(void);
|
|
||||||
void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
|
void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
|
||||||
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
|
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
|
||||||
void cgroup_sk_free(struct sock_cgroup_data *skcd);
|
void cgroup_sk_free(struct sock_cgroup_data *skcd);
|
||||||
|
|
||||||
static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
|
static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
|
||||||
{
|
{
|
||||||
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
|
return skcd->cgroup;
|
||||||
unsigned long v;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @skcd->val is 64bit but the following is safe on 32bit too as we
|
|
||||||
* just need the lower ulong to be written and read atomically.
|
|
||||||
*/
|
|
||||||
v = READ_ONCE(skcd->val);
|
|
||||||
|
|
||||||
if (v & 3)
|
|
||||||
return &cgrp_dfl_root.cgrp;
|
|
||||||
|
|
||||||
return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
|
|
||||||
#else
|
|
||||||
return (struct cgroup *)(unsigned long)skcd->val;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_DATA */
|
#else /* CONFIG_CGROUP_DATA */
|
||||||
|
@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
|
|||||||
__mmap_lock_trace_released(mm, false);
|
__mmap_lock_trace_released(mm, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
if (mmap_read_trylock(mm)) {
|
|
||||||
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
|
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
up_read_non_owner(&mm->mmap_lock);
|
up_read_non_owner(&mm->mmap_lock);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
||||||
* Copyright (c) 2016 Facebook
|
* Copyright (c) 2016 Facebook
|
||||||
*/
|
*/
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
||||||
* Copyright (c) 2016 Facebook
|
* Copyright (c) 2016 Facebook
|
||||||
*/
|
*/
|
||||||
|
@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
|||||||
* with build_id.
|
* with build_id.
|
||||||
*/
|
*/
|
||||||
if (!user || !current || !current->mm || irq_work_busy ||
|
if (!user || !current || !current->mm || irq_work_busy ||
|
||||||
!mmap_read_trylock_non_owner(current->mm)) {
|
!mmap_read_trylock(current->mm)) {
|
||||||
/* cannot access current->mm, fall back to ips */
|
/* cannot access current->mm, fall back to ips */
|
||||||
for (i = 0; i < trace_nr; i++) {
|
for (i = 0; i < trace_nr; i++) {
|
||||||
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
|
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
|
||||||
@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!work) {
|
if (!work) {
|
||||||
mmap_read_unlock_non_owner(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
} else {
|
} else {
|
||||||
work->mm = current->mm;
|
work->mm = current->mm;
|
||||||
|
|
||||||
|
/* The lock will be released once we're out of interrupt
|
||||||
|
* context. Tell lockdep that we've released it now so
|
||||||
|
* it doesn't complain that we forgot to release it.
|
||||||
|
*/
|
||||||
|
rwsem_release(¤t->mm->mmap_lock.dep_map, _RET_IP_);
|
||||||
irq_work_queue(&work->irq_work);
|
irq_work_queue(&work->irq_work);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9912,6 +9912,8 @@ static int check_btf_line(struct bpf_verifier_env *env,
|
|||||||
nr_linfo = attr->line_info_cnt;
|
nr_linfo = attr->line_info_cnt;
|
||||||
if (!nr_linfo)
|
if (!nr_linfo)
|
||||||
return 0;
|
return 0;
|
||||||
|
if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
rec_size = attr->line_info_rec_size;
|
rec_size = attr->line_info_rec_size;
|
||||||
if (rec_size < MIN_BPF_LINEINFO_SIZE ||
|
if (rec_size < MIN_BPF_LINEINFO_SIZE ||
|
||||||
|
@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
|
|||||||
*/
|
*/
|
||||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||||
|
|
||||||
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
|
|
||||||
|
|
||||||
DEFINE_SPINLOCK(cgroup_sk_update_lock);
|
|
||||||
static bool cgroup_sk_alloc_disabled __read_mostly;
|
|
||||||
|
|
||||||
void cgroup_sk_alloc_disable(void)
|
|
||||||
{
|
|
||||||
if (cgroup_sk_alloc_disabled)
|
|
||||||
return;
|
|
||||||
pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
|
|
||||||
cgroup_sk_alloc_disabled = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define cgroup_sk_alloc_disabled false
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
|
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
|
||||||
{
|
{
|
||||||
if (cgroup_sk_alloc_disabled) {
|
|
||||||
skcd->no_refcnt = 1;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Don't associate the sock with unrelated interrupted task's cgroup. */
|
/* Don't associate the sock with unrelated interrupted task's cgroup. */
|
||||||
if (in_interrupt())
|
if (in_interrupt())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
struct css_set *cset;
|
struct css_set *cset;
|
||||||
|
|
||||||
cset = task_css_set(current);
|
cset = task_css_set(current);
|
||||||
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
|
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
|
||||||
skcd->val = (unsigned long)cset->dfl_cgrp;
|
skcd->cgroup = cset->dfl_cgrp;
|
||||||
cgroup_bpf_get(cset->dfl_cgrp);
|
cgroup_bpf_get(cset->dfl_cgrp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
void cgroup_sk_clone(struct sock_cgroup_data *skcd)
|
void cgroup_sk_clone(struct sock_cgroup_data *skcd)
|
||||||
{
|
{
|
||||||
if (skcd->val) {
|
struct cgroup *cgrp = sock_cgroup_ptr(skcd);
|
||||||
if (skcd->no_refcnt)
|
|
||||||
return;
|
/*
|
||||||
/*
|
* We might be cloning a socket which is left in an empty
|
||||||
* We might be cloning a socket which is left in an empty
|
* cgroup and the cgroup might have already been rmdir'd.
|
||||||
* cgroup and the cgroup might have already been rmdir'd.
|
* Don't use cgroup_get_live().
|
||||||
* Don't use cgroup_get_live().
|
*/
|
||||||
*/
|
cgroup_get(cgrp);
|
||||||
cgroup_get(sock_cgroup_ptr(skcd));
|
cgroup_bpf_get(cgrp);
|
||||||
cgroup_bpf_get(sock_cgroup_ptr(skcd));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cgroup_sk_free(struct sock_cgroup_data *skcd)
|
void cgroup_sk_free(struct sock_cgroup_data *skcd)
|
||||||
{
|
{
|
||||||
struct cgroup *cgrp = sock_cgroup_ptr(skcd);
|
struct cgroup *cgrp = sock_cgroup_ptr(skcd);
|
||||||
|
|
||||||
if (skcd->no_refcnt)
|
|
||||||
return;
|
|
||||||
cgroup_bpf_put(cgrp);
|
cgroup_bpf_put(cgrp);
|
||||||
cgroup_put(cgrp);
|
cgroup_put(cgrp);
|
||||||
}
|
}
|
||||||
|
@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
|
|||||||
struct update_classid_context *ctx = (void *)v;
|
struct update_classid_context *ctx = (void *)v;
|
||||||
struct socket *sock = sock_from_file(file);
|
struct socket *sock = sock_from_file(file);
|
||||||
|
|
||||||
if (sock) {
|
if (sock)
|
||||||
spin_lock(&cgroup_sk_update_lock);
|
|
||||||
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
|
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
|
||||||
spin_unlock(&cgroup_sk_update_lock);
|
|
||||||
}
|
|
||||||
if (--ctx->batch == 0) {
|
if (--ctx->batch == 0) {
|
||||||
ctx->batch = UPDATE_CLASSID_BATCH;
|
ctx->batch = UPDATE_CLASSID_BATCH;
|
||||||
return n + 1;
|
return n + 1;
|
||||||
@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
|
|||||||
struct css_task_iter it;
|
struct css_task_iter it;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
|
|
||||||
cgroup_sk_alloc_disable();
|
|
||||||
|
|
||||||
cs->classid = (u32)value;
|
cs->classid = (u32)value;
|
||||||
|
|
||||||
css_task_iter_start(css, 0, &it);
|
css_task_iter_start(css, 0, &it);
|
||||||
|
@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
|
|||||||
if (!dev)
|
if (!dev)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
cgroup_sk_alloc_disable();
|
|
||||||
|
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
|
|
||||||
ret = netprio_set_prio(of_css(of), dev, prio);
|
ret = netprio_set_prio(of_css(of), dev, prio);
|
||||||
@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
|
|||||||
static int update_netprio(const void *v, struct file *file, unsigned n)
|
static int update_netprio(const void *v, struct file *file, unsigned n)
|
||||||
{
|
{
|
||||||
struct socket *sock = sock_from_file(file);
|
struct socket *sock = sock_from_file(file);
|
||||||
if (sock) {
|
|
||||||
spin_lock(&cgroup_sk_update_lock);
|
if (sock)
|
||||||
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
|
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
|
||||||
(unsigned long)v);
|
(unsigned long)v);
|
||||||
spin_unlock(&cgroup_sk_update_lock);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
|
|||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
struct cgroup_subsys_state *css;
|
struct cgroup_subsys_state *css;
|
||||||
|
|
||||||
cgroup_sk_alloc_disable();
|
|
||||||
|
|
||||||
cgroup_taskset_for_each(p, css, tset) {
|
cgroup_taskset_for_each(p, css, tset) {
|
||||||
void *v = (void *)(unsigned long)css->id;
|
void *v = (void *)(unsigned long)css->id;
|
||||||
|
|
||||||
|
@ -12,27 +12,36 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <ftw.h>
|
#include <ftw.h>
|
||||||
|
|
||||||
|
|
||||||
#include "cgroup_helpers.h"
|
#include "cgroup_helpers.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To avoid relying on the system setup, when setup_cgroup_env is called
|
* To avoid relying on the system setup, when setup_cgroup_env is called
|
||||||
* we create a new mount namespace, and cgroup namespace. The cgroup2
|
* we create a new mount namespace, and cgroup namespace. The cgroupv2
|
||||||
* root is mounted at CGROUP_MOUNT_PATH
|
* root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
|
||||||
|
* have cgroupv2 enabled at this point in time. It's easier to create our
|
||||||
|
* own mount namespace and manage it ourselves. We assume /mnt exists.
|
||||||
*
|
*
|
||||||
* Unfortunately, most people don't have cgroupv2 enabled at this point in time.
|
* Related cgroupv1 helpers are named *classid*(), since we only use the
|
||||||
* It's easier to create our own mount namespace and manage it ourselves.
|
* net_cls controller for tagging net_cls.classid. We assume the default
|
||||||
*
|
* mount under /sys/fs/cgroup/net_cls, which should be the case for the
|
||||||
* We assume /mnt exists.
|
* vast majority of users.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define WALK_FD_LIMIT 16
|
#define WALK_FD_LIMIT 16
|
||||||
|
|
||||||
#define CGROUP_MOUNT_PATH "/mnt"
|
#define CGROUP_MOUNT_PATH "/mnt"
|
||||||
|
#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
|
||||||
|
#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
|
||||||
#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
|
#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
|
||||||
|
|
||||||
#define format_cgroup_path(buf, path) \
|
#define format_cgroup_path(buf, path) \
|
||||||
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
|
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
|
||||||
CGROUP_WORK_DIR, path)
|
CGROUP_WORK_DIR, path)
|
||||||
|
|
||||||
|
#define format_classid_path(buf) \
|
||||||
|
snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
|
||||||
|
CGROUP_WORK_DIR)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enable_all_controllers() - Enable all available cgroup v2 controllers
|
* enable_all_controllers() - Enable all available cgroup v2 controllers
|
||||||
*
|
*
|
||||||
@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int join_cgroup_from_top(const char *cgroup_path)
|
||||||
static int join_cgroup_from_top(char *cgroup_path)
|
|
||||||
{
|
{
|
||||||
char cgroup_procs_path[PATH_MAX + 1];
|
char cgroup_procs_path[PATH_MAX + 1];
|
||||||
pid_t pid = getpid();
|
pid_t pid = getpid();
|
||||||
@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) {
|
|||||||
}
|
}
|
||||||
return cg_fd;
|
return cg_fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* setup_classid_environment() - Setup the cgroupv1 net_cls environment
|
||||||
|
*
|
||||||
|
* After calling this function, cleanup_classid_environment should be called
|
||||||
|
* once testing is complete.
|
||||||
|
*
|
||||||
|
* This function will print an error to stderr and return 1 if it is unable
|
||||||
|
* to setup the cgroup environment. If setup is successful, 0 is returned.
|
||||||
|
*/
|
||||||
|
int setup_classid_environment(void)
|
||||||
|
{
|
||||||
|
char cgroup_workdir[PATH_MAX + 1];
|
||||||
|
|
||||||
|
format_classid_path(cgroup_workdir);
|
||||||
|
|
||||||
|
if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
|
||||||
|
errno != EBUSY) {
|
||||||
|
log_err("mount cgroup base");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
|
||||||
|
log_err("mkdir cgroup net_cls");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
|
||||||
|
errno != EBUSY) {
|
||||||
|
log_err("mount cgroup net_cls");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_classid_environment();
|
||||||
|
|
||||||
|
if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
|
||||||
|
log_err("mkdir cgroup work dir");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set_classid() - Set a cgroupv1 net_cls classid
|
||||||
|
* @id: the numeric classid
|
||||||
|
*
|
||||||
|
* Writes the passed classid into the cgroup work dir's net_cls.classid
|
||||||
|
* file in order to later on trigger socket tagging.
|
||||||
|
*
|
||||||
|
* On success, it returns 0, otherwise on failure it returns 1. If there
|
||||||
|
* is a failure, it prints the error to stderr.
|
||||||
|
*/
|
||||||
|
int set_classid(unsigned int id)
|
||||||
|
{
|
||||||
|
char cgroup_workdir[PATH_MAX - 42];
|
||||||
|
char cgroup_classid_path[PATH_MAX + 1];
|
||||||
|
int fd, rc = 0;
|
||||||
|
|
||||||
|
format_classid_path(cgroup_workdir);
|
||||||
|
snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
|
||||||
|
"%s/net_cls.classid", cgroup_workdir);
|
||||||
|
|
||||||
|
fd = open(cgroup_classid_path, O_WRONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
log_err("Opening cgroup classid: %s", cgroup_classid_path);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dprintf(fd, "%u\n", id) < 0) {
|
||||||
|
log_err("Setting cgroup classid");
|
||||||
|
rc = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* join_classid() - Join a cgroupv1 net_cls classid
|
||||||
|
*
|
||||||
|
* This function expects the cgroup work dir to be already created, as we
|
||||||
|
* join it here. This causes the process sockets to be tagged with the given
|
||||||
|
* net_cls classid.
|
||||||
|
*
|
||||||
|
* On success, it returns 0, otherwise on failure it returns 1.
|
||||||
|
*/
|
||||||
|
int join_classid(void)
|
||||||
|
{
|
||||||
|
char cgroup_workdir[PATH_MAX + 1];
|
||||||
|
|
||||||
|
format_classid_path(cgroup_workdir);
|
||||||
|
return join_cgroup_from_top(cgroup_workdir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
|
||||||
|
*
|
||||||
|
* At call time, it moves the calling process to the root cgroup, and then
|
||||||
|
* runs the deletion process.
|
||||||
|
*
|
||||||
|
* On failure, it will print an error to stderr, and try to continue.
|
||||||
|
*/
|
||||||
|
void cleanup_classid_environment(void)
|
||||||
|
{
|
||||||
|
char cgroup_workdir[PATH_MAX + 1];
|
||||||
|
|
||||||
|
format_classid_path(cgroup_workdir);
|
||||||
|
join_cgroup_from_top(NETCLS_MOUNT_PATH);
|
||||||
|
nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
|
||||||
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef __CGROUP_HELPERS_H
|
#ifndef __CGROUP_HELPERS_H
|
||||||
#define __CGROUP_HELPERS_H
|
#define __CGROUP_HELPERS_H
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
@ -8,12 +9,21 @@
|
|||||||
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
|
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
|
||||||
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
|
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
|
||||||
|
|
||||||
|
/* cgroupv2 related */
|
||||||
int cgroup_setup_and_join(const char *path);
|
int cgroup_setup_and_join(const char *path);
|
||||||
int create_and_get_cgroup(const char *path);
|
int create_and_get_cgroup(const char *path);
|
||||||
int join_cgroup(const char *path);
|
|
||||||
int setup_cgroup_environment(void);
|
|
||||||
void cleanup_cgroup_environment(void);
|
|
||||||
unsigned long long get_cgroup_id(const char *path);
|
unsigned long long get_cgroup_id(const char *path);
|
||||||
|
|
||||||
#endif
|
int join_cgroup(const char *path);
|
||||||
|
|
||||||
|
int setup_cgroup_environment(void);
|
||||||
|
void cleanup_cgroup_environment(void);
|
||||||
|
|
||||||
|
/* cgroupv1 related */
|
||||||
|
int set_classid(unsigned int id);
|
||||||
|
int join_classid(void);
|
||||||
|
|
||||||
|
int setup_classid_environment(void);
|
||||||
|
void cleanup_classid_environment(void);
|
||||||
|
|
||||||
|
#endif /* __CGROUP_HELPERS_H */
|
||||||
|
@ -208,11 +208,26 @@ error_close:
|
|||||||
|
|
||||||
static int connect_fd_to_addr(int fd,
|
static int connect_fd_to_addr(int fd,
|
||||||
const struct sockaddr_storage *addr,
|
const struct sockaddr_storage *addr,
|
||||||
socklen_t addrlen)
|
socklen_t addrlen, const bool must_fail)
|
||||||
{
|
{
|
||||||
if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
|
int ret;
|
||||||
log_err("Failed to connect to server");
|
|
||||||
return -1;
|
errno = 0;
|
||||||
|
ret = connect(fd, (const struct sockaddr *)addr, addrlen);
|
||||||
|
if (must_fail) {
|
||||||
|
if (!ret) {
|
||||||
|
log_err("Unexpected success to connect to server");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (errno != EPERM) {
|
||||||
|
log_err("Unexpected error from connect to server");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ret) {
|
||||||
|
log_err("Failed to connect to server");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
|
|||||||
strlen(opts->cc) + 1))
|
strlen(opts->cc) + 1))
|
||||||
goto error_close;
|
goto error_close;
|
||||||
|
|
||||||
if (connect_fd_to_addr(fd, &addr, addrlen))
|
if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
|
||||||
goto error_close;
|
goto error_close;
|
||||||
|
|
||||||
return fd;
|
return fd;
|
||||||
@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (connect_fd_to_addr(client_fd, &addr, len))
|
if (connect_fd_to_addr(client_fd, &addr, len, false))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -20,6 +20,7 @@ typedef __u16 __sum16;
|
|||||||
struct network_helper_opts {
|
struct network_helper_opts {
|
||||||
const char *cc;
|
const char *cc;
|
||||||
int timeout_ms;
|
int timeout_ms;
|
||||||
|
bool must_fail;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ipv4 test vector */
|
/* ipv4 test vector */
|
||||||
|
79
tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
Normal file
79
tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <test_progs.h>
|
||||||
|
|
||||||
|
#include "connect4_dropper.skel.h"
|
||||||
|
|
||||||
|
#include "cgroup_helpers.h"
|
||||||
|
#include "network_helpers.h"
|
||||||
|
|
||||||
|
static int run_test(int cgroup_fd, int server_fd, bool classid)
|
||||||
|
{
|
||||||
|
struct network_helper_opts opts = {
|
||||||
|
.must_fail = true,
|
||||||
|
};
|
||||||
|
struct connect4_dropper *skel;
|
||||||
|
int fd, err = 0;
|
||||||
|
|
||||||
|
skel = connect4_dropper__open_and_load();
|
||||||
|
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
skel->links.connect_v4_dropper =
|
||||||
|
bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
|
||||||
|
cgroup_fd);
|
||||||
|
if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
|
||||||
|
err = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
|
||||||
|
err = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = connect_to_fd_opts(server_fd, &opts);
|
||||||
|
if (fd < 0)
|
||||||
|
err = -1;
|
||||||
|
else
|
||||||
|
close(fd);
|
||||||
|
out:
|
||||||
|
connect4_dropper__destroy(skel);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cgroup_v1v2(void)
|
||||||
|
{
|
||||||
|
struct network_helper_opts opts = {};
|
||||||
|
int server_fd, client_fd, cgroup_fd;
|
||||||
|
static const int port = 60123;
|
||||||
|
|
||||||
|
/* Step 1: Check base connectivity works without any BPF. */
|
||||||
|
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
|
||||||
|
if (!ASSERT_GE(server_fd, 0, "server_fd"))
|
||||||
|
return;
|
||||||
|
client_fd = connect_to_fd_opts(server_fd, &opts);
|
||||||
|
if (!ASSERT_GE(client_fd, 0, "client_fd")) {
|
||||||
|
close(server_fd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
close(client_fd);
|
||||||
|
close(server_fd);
|
||||||
|
|
||||||
|
/* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
|
||||||
|
cgroup_fd = test__join_cgroup("/connect_dropper");
|
||||||
|
if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
|
||||||
|
return;
|
||||||
|
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
|
||||||
|
if (!ASSERT_GE(server_fd, 0, "server_fd")) {
|
||||||
|
close(cgroup_fd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
|
||||||
|
setup_classid_environment();
|
||||||
|
set_classid(42);
|
||||||
|
ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
|
||||||
|
cleanup_classid_environment();
|
||||||
|
close(server_fd);
|
||||||
|
close(cgroup_fd);
|
||||||
|
}
|
@ -1,7 +1,6 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
#include <test_progs.h>
|
#include <test_progs.h>
|
||||||
#include <linux/ptrace.h>
|
|
||||||
#include "test_task_pt_regs.skel.h"
|
#include "test_task_pt_regs.skel.h"
|
||||||
|
|
||||||
void test_task_pt_regs(void)
|
void test_task_pt_regs(void)
|
||||||
|
26
tools/testing/selftests/bpf/progs/connect4_dropper.c
Normal file
26
tools/testing/selftests/bpf/progs/connect4_dropper.c
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <linux/stddef.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
#include <sys/socket.h>
|
||||||
|
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <bpf/bpf_endian.h>
|
||||||
|
|
||||||
|
#define VERDICT_REJECT 0
|
||||||
|
#define VERDICT_PROCEED 1
|
||||||
|
|
||||||
|
SEC("cgroup/connect4")
|
||||||
|
int connect_v4_dropper(struct bpf_sock_addr *ctx)
|
||||||
|
{
|
||||||
|
if (ctx->type != SOCK_STREAM)
|
||||||
|
return VERDICT_PROCEED;
|
||||||
|
if (ctx->user_port == bpf_htons(60123))
|
||||||
|
return VERDICT_REJECT;
|
||||||
|
return VERDICT_PROCEED;
|
||||||
|
}
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
@ -1,12 +1,17 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
#include <linux/ptrace.h>
|
#include "vmlinux.h"
|
||||||
#include <linux/bpf.h>
|
|
||||||
#include <bpf/bpf_helpers.h>
|
#include <bpf/bpf_helpers.h>
|
||||||
#include <bpf/bpf_tracing.h>
|
#include <bpf/bpf_tracing.h>
|
||||||
|
|
||||||
struct pt_regs current_regs = {};
|
#define PT_REGS_SIZE sizeof(struct pt_regs)
|
||||||
struct pt_regs ctx_regs = {};
|
|
||||||
|
/*
|
||||||
|
* The kernel struct pt_regs isn't exported in its entirety to userspace.
|
||||||
|
* Pass it as an array to task_pt_regs.c
|
||||||
|
*/
|
||||||
|
char current_regs[PT_REGS_SIZE] = {};
|
||||||
|
char ctx_regs[PT_REGS_SIZE] = {};
|
||||||
int uprobe_res = 0;
|
int uprobe_res = 0;
|
||||||
|
|
||||||
SEC("uprobe/trigger_func")
|
SEC("uprobe/trigger_func")
|
||||||
@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx)
|
|||||||
|
|
||||||
current = bpf_get_current_task_btf();
|
current = bpf_get_current_task_btf();
|
||||||
regs = (struct pt_regs *) bpf_task_pt_regs(current);
|
regs = (struct pt_regs *) bpf_task_pt_regs(current);
|
||||||
__builtin_memcpy(¤t_regs, regs, sizeof(*regs));
|
if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
|
||||||
__builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
|
return 0;
|
||||||
|
if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
|
||||||
|
return 0;
|
||||||
|
|
||||||
/* Prove that uprobe was run */
|
/* Prove that uprobe was run */
|
||||||
uprobe_res = 1;
|
uprobe_res = 1;
|
||||||
|
Loading…
Reference in New Issue
Block a user