Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-08-07

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add cgroup local storage for BPF programs, which provides a fast
   accessible memory for storing various per-cgroup data like number
   of transmitted packets, etc, from Roman.

2) Support bpf_get_socket_cookie() BPF helper in several more program
   types that have a full socket available, from Andrey.

3) Significantly improve the performance of perf events which are
   reported from BPF offload. Also convert a couple of BPF AF_XDP
   samples overto use libbpf, both from Jakub.

4) seg6local LWT provides the End.DT6 action, which allows to
   decapsulate an outer IPv6 header containing a Segment Routing Header.
   Adds this action now to the seg6local BPF interface, from Mathieu.

5) Do not mark dst register as unbounded in MOV64 instruction when
   both src and dst register are the same, from Arthur.

6) Define u_smp_rmb() and u_smp_wmb() to their respective barrier
   instructions on arm64 for the AF_XDP sample code, from Brian.

7) Convert the tcp_client.py and tcp_server.py BPF selftest scripts
   over from Python 2 to Python 3, from Jeremy.

8) Enable BTF build flags to the BPF sample code Makefile, from Taeung.

9) Remove an unnecessary rcu_read_lock() in run_lwt_bpf(), from Taehee.

10) Several improvements to the README.rst from the BPF documentation
    to make it more consistent with RST format, from Tobin.

11) Replace all occurrences of strerror() by calls to strerror_r()
    in libbpf and fix a FORTIFY_SOURCE build error along with it,
    from Thomas.

12) Fix a bug in bpftool's get_btf() function to correctly propagate
    an error via PTR_ERR(), from Yue.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-08-07 11:02:05 -07:00
commit 1ba982806c
50 changed files with 1896 additions and 303 deletions

View File

@ -1,5 +1,5 @@
=================
BPF documentation
BPF Documentation
=================
This directory contains documentation for the BPF (Berkeley Packet
@ -22,14 +22,14 @@ Frequently asked questions (FAQ)
Two sets of Questions and Answers (Q&A) are maintained.
* QA for common questions about BPF see: bpf_design_QA_
.. toctree::
:maxdepth: 1
* QA for developers interacting with BPF subsystem: bpf_devel_QA_
bpf_design_QA
bpf_devel_QA
.. Links:
.. _bpf_design_QA: bpf_design_QA.rst
.. _bpf_devel_QA: bpf_devel_QA.rst
.. _Documentation/networking/filter.txt: ../networking/filter.txt
.. _man-pages: https://www.kernel.org/doc/man-pages/
.. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html

View File

@ -90,6 +90,7 @@ needed).
crypto/index
filesystems/index
vm/index
bpf/index
Architecture-specific documentation
-----------------------------------

View File

@ -196,14 +196,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
*/
void lirc_bpf_free(struct rc_dev *rcdev)
{
struct bpf_prog **progs;
struct bpf_prog_array_item *item;
if (!rcdev->raw->progs)
return;
progs = rcu_dereference(rcdev->raw->progs)->progs;
while (*progs)
bpf_prog_put(*progs++);
item = rcu_dereference(rcdev->raw->progs)->items;
while (item->prog) {
bpf_prog_put(item->prog);
item++;
}
bpf_prog_array_free(rcdev->raw->progs);
}

View File

@ -43,8 +43,6 @@
#include "fw.h"
#include "main.h"
#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4)
static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
}
if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
nfp_bpf_event_output(bpf, skb);
if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
dev_consume_skb_any(skb);
else
dev_kfree_skb_any(skb);
return;
}
@ -465,3 +466,21 @@ err_unlock:
err_free:
dev_kfree_skb_any(skb);
}
void
nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
{
struct nfp_app_bpf *bpf = app->priv;
const struct cmsg_hdr *hdr = data;
if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
return;
}
if (hdr->type == CMSG_TYPE_BPF_EVENT)
nfp_bpf_event_output(bpf, data, len);
else
cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",
hdr->type);
}

View File

@ -51,6 +51,7 @@ enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_MAPS = 3,
NFP_BPF_CAP_TYPE_RANDOM = 4,
NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
};
struct nfp_bpf_cap_tlv_func {

View File

@ -1642,6 +1642,51 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
u32 ret_einval, end;
swreg plen, delta;
BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
plen = imm_a(nfp_prog);
delta = reg_a(2 * 2);
ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
end = nfp_prog_current_offset(nfp_prog) + 11;
/* Calculate resulting length */
emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
/* delta == 0 is not allowed by the kernel, add must overflow to make
* length smaller.
*/
emit_br(nfp_prog, BR_BCC, ret_einval, 0);
/* if (new_len < 14) then -EINVAL */
emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
emit_br(nfp_prog, BR_BMI, ret_einval, 0);
emit_alu(nfp_prog, plen_reg(nfp_prog),
plen_reg(nfp_prog), ALU_OP_ADD, delta);
emit_alu(nfp_prog, pv_len(nfp_prog),
pv_len(nfp_prog), ALU_OP_ADD, delta);
emit_br(nfp_prog, BR_UNC, end, 2);
wrp_immed(nfp_prog, reg_both(0), 0);
wrp_immed(nfp_prog, reg_both(1), 0);
if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
return -EINVAL;
wrp_immed(nfp_prog, reg_both(0), -22);
wrp_immed(nfp_prog, reg_both(1), ~0);
if (!nfp_prog_confirm_current_offset(nfp_prog, end))
return -EINVAL;
return 0;
}
static int
map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@ -3041,6 +3086,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
switch (meta->insn.imm) {
case BPF_FUNC_xdp_adjust_head:
return adjust_head(nfp_prog, meta);
case BPF_FUNC_xdp_adjust_tail:
return adjust_tail(nfp_prog, meta);
case BPF_FUNC_map_lookup_elem:
case BPF_FUNC_map_update_elem:
case BPF_FUNC_map_delete_elem:
@ -3883,6 +3930,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
struct nfp_insn_meta *meta1, *meta2;
struct nfp_bpf_map *nfp_map;
struct bpf_map *map;
u32 id;
nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
if (meta1->skip || meta2->skip)
@ -3894,11 +3942,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
map = (void *)(unsigned long)((u32)meta1->insn.imm |
(u64)meta2->insn.imm << 32);
if (bpf_map_offload_neutral(map))
continue;
nfp_map = map_to_offmap(map)->dev_priv;
if (bpf_map_offload_neutral(map)) {
id = map->id;
} else {
nfp_map = map_to_offmap(map)->dev_priv;
id = nfp_map->tid;
}
meta1->insn.imm = nfp_map->tid;
meta1->insn.imm = id;
meta2->insn.imm = 0;
}

View File

@ -45,8 +45,8 @@
const struct rhashtable_params nfp_bpf_maps_neutral_params = {
.nelem_hint = 4,
.key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
.key_offset = offsetof(struct nfp_bpf_neutral_map, ptr),
.key_len = FIELD_SIZEOF(struct bpf_map, id),
.key_offset = offsetof(struct nfp_bpf_neutral_map, map_id),
.head_offset = offsetof(struct nfp_bpf_neutral_map, l),
.automatic_shrinking = true,
};
@ -334,6 +334,14 @@ nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
return 0;
}
static int
nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
u32 length)
{
bpf->adjust_tail = true;
return 0;
}
static int nfp_bpf_parse_capabilities(struct nfp_app *app)
{
struct nfp_cpp *cpp = app->pf->cpp;
@ -380,6 +388,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
if (nfp_bpf_parse_cap_qsel(app->priv, value, length))
goto err_release_free;
break;
case NFP_BPF_CAP_TYPE_ADJUST_TAIL:
if (nfp_bpf_parse_cap_adjust_tail(app->priv, value,
length))
goto err_release_free;
break;
default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break;
@ -490,6 +503,7 @@ const struct nfp_app_type app_bpf = {
.vnic_free = nfp_bpf_vnic_free,
.ctrl_msg_rx = nfp_bpf_ctrl_msg_rx,
.ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw,
.setup_tc = nfp_bpf_setup_tc,
.bpf = nfp_ndo_bpf,

View File

@ -47,6 +47,8 @@
#include "../nfp_asm.h"
#include "fw.h"
#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
/* For relocation logic use up-most byte of branch instruction as scratch
* area. Remember to clear this before sending instructions to HW!
*/
@ -148,6 +150,7 @@ enum pkt_vec {
*
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
* @queue_select: BPF can set the RX queue ID in packet vector
* @adjust_tail: BPF can simply trunc packet size for adjust tail
*/
struct nfp_app_bpf {
struct nfp_app *app;
@ -193,6 +196,7 @@ struct nfp_app_bpf {
bool pseudo_random;
bool queue_select;
bool adjust_tail;
};
enum nfp_bpf_map_use {
@ -221,6 +225,7 @@ struct nfp_bpf_map {
struct nfp_bpf_neutral_map {
struct rhash_head l;
struct bpf_map *ptr;
u32 map_id;
u32 count;
};
@ -501,7 +506,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
void *key, void *next_key);
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
unsigned int len);
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
void
nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data,
unsigned int len);
#endif

View File

@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
ASSERT_RTNL();
/* Reuse path - other offloaded program is already tracking this map. */
record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
nfp_bpf_maps_neutral_params);
if (record) {
nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
}
record->ptr = map;
record->map_id = map->id;
record->count = 1;
err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
@ -379,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
bpf->maps.max_elems - bpf->map_elems_in_use);
return -ENOMEM;
}
if (offmap->map.key_size > bpf->maps.max_key_sz ||
offmap->map.value_size > bpf->maps.max_val_sz ||
round_up(offmap->map.key_size, 8) +
if (round_up(offmap->map.key_size, 8) +
round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) {
pr_info("elements don't fit in device constraints\n");
pr_info("map elements too large: %u, FW max element size (key+value): %u\n",
round_up(offmap->map.key_size, 8) +
round_up(offmap->map.value_size, 8),
bpf->maps.max_elem_sz);
return -ENOMEM;
}
if (offmap->map.key_size > bpf->maps.max_key_sz) {
pr_info("map key size %u, FW max is %u\n",
offmap->map.key_size, bpf->maps.max_key_sz);
return -ENOMEM;
}
if (offmap->map.value_size > bpf->maps.max_val_sz) {
pr_info("map value size %u, FW max is %u\n",
offmap->map.value_size, bpf->maps.max_val_sz);
return -ENOMEM;
}
@ -453,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src,
return 0;
}
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
unsigned int len)
{
struct cmsg_bpf_event *cbe = (void *)skb->data;
u32 pkt_size, data_size;
struct bpf_map *map;
struct cmsg_bpf_event *cbe = (void *)data;
struct nfp_bpf_neutral_map *record;
u32 pkt_size, data_size, map_id;
u64 map_id_full;
if (skb->len < sizeof(struct cmsg_bpf_event))
goto err_drop;
if (len < sizeof(struct cmsg_bpf_event))
return -EINVAL;
pkt_size = be32_to_cpu(cbe->pkt_size);
data_size = be32_to_cpu(cbe->data_size);
map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
map_id_full = be64_to_cpu(cbe->map_ptr);
map_id = map_id_full;
if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
goto err_drop;
if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
return -EINVAL;
if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
goto err_drop;
return -EINVAL;
rcu_read_lock();
if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
nfp_bpf_maps_neutral_params)) {
record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id,
nfp_bpf_maps_neutral_params);
if (!record || map_id_full > U32_MAX) {
rcu_read_unlock();
pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
map);
goto err_drop;
cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n",
map_id_full, map_id_full);
return -EINVAL;
}
bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id),
&cbe->data[round_up(pkt_size, 4)], data_size,
cbe->data, pkt_size, nfp_bpf_perf_event_copy);
rcu_read_unlock();
dev_consume_skb_any(skb);
return 0;
err_drop:
dev_kfree_skb_any(skb);
return -EINVAL;
}
static int

View File

@ -178,6 +178,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
break;
case BPF_FUNC_xdp_adjust_tail:
if (!bpf->adjust_tail) {
pr_vlog(env, "adjust_tail not supported by FW\n");
return -EOPNOTSUPP;
}
break;
case BPF_FUNC_map_lookup_elem:
if (!nfp_bpf_map_call_ok("map_lookup", env, meta,
bpf->helpers.map_lookup, reg1) ||

View File

@ -172,6 +172,8 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc))
return ERR_PTR(-EINVAL);
if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw))
return ERR_PTR(-EINVAL);
app = kzalloc(sizeof(*app), GFP_KERNEL);
if (!app)

View File

@ -98,6 +98,7 @@ extern const struct nfp_app_type app_abm;
* @start: start application logic
* @stop: stop application logic
* @ctrl_msg_rx: control message handler
* @ctrl_msg_rx_raw: handler for control messages from data queues
* @setup_tc: setup TC ndo
* @bpf: BPF ndo offload-related calls
* @xdp_offload: offload an XDP program
@ -150,6 +151,8 @@ struct nfp_app_type {
void (*stop)(struct nfp_app *app);
void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data,
unsigned int len);
int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
enum tc_setup_type type, void *type_data);
@ -318,6 +321,11 @@ static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app)
return app->type->ctrl_has_meta;
}
static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app)
{
return app && app->type->ctrl_msg_rx_raw;
}
static inline const char *nfp_app_extra_cap(struct nfp_app *app,
struct nfp_net *nn)
{
@ -381,6 +389,16 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb)
app->type->ctrl_msg_rx(app, skb);
}
static inline void
nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
{
if (!app || !app->type->ctrl_msg_rx_raw)
return;
trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len);
app->type->ctrl_msg_rx_raw(app, data, len);
}
static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode)
{
if (!app->type->eswitch_mode_get)

View File

@ -93,6 +93,7 @@ enum br_mask {
BR_BNE = 0x01,
BR_BMI = 0x02,
BR_BHS = 0x04,
BR_BCC = 0x05,
BR_BLO = 0x05,
BR_BGE = 0x08,
BR_BLT = 0x09,

View File

@ -1759,6 +1759,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
}
}
if (likely(!meta.portid)) {
netdev = dp->netdev;
} else if (meta.portid == NFP_META_PORT_ID_CTRL) {
struct nfp_net *nn = netdev_priv(dp->netdev);
nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
pkt_len);
nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
} else {
struct nfp_net *nn;
nn = netdev_priv(dp->netdev);
netdev = nfp_app_repr_get(nn->app, meta.portid);
if (unlikely(!netdev)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = build_skb(rxbuf->frag, true_bufsz);
if (unlikely(!skb)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
@ -1774,20 +1797,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
if (likely(!meta.portid)) {
netdev = dp->netdev;
} else {
struct nfp_net *nn;
nn = netdev_priv(dp->netdev);
netdev = nfp_app_repr_get(nn->app, meta.portid);
if (unlikely(!netdev)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
continue;
}
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
@ -3857,6 +3866,9 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.mtu = NFP_NET_DEFAULT_MTU;
nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
if (nfp_app_ctrl_uses_data_vnics(nn->app))
nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
nfp_net_rss_init(nn);
nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:

View File

@ -127,6 +127,7 @@
#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */
#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */
#define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */
#define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */
#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */
#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */
#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */

View File

@ -4,22 +4,46 @@
#include <linux/errno.h>
#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <linux/rbtree.h>
#include <uapi/linux/bpf.h>
struct sock;
struct sockaddr;
struct cgroup;
struct sk_buff;
struct bpf_map;
struct bpf_prog;
struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
#ifdef CONFIG_CGROUP_BPF
extern struct static_key_false cgroup_bpf_enabled_key;
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
DECLARE_PER_CPU(void*, bpf_cgroup_storage);
struct bpf_cgroup_storage_map;
struct bpf_storage_buffer {
struct rcu_head rcu;
char data[0];
};
struct bpf_cgroup_storage {
struct bpf_storage_buffer *buf;
struct bpf_cgroup_storage_map *map;
struct bpf_cgroup_storage_key key;
struct list_head list;
struct rb_node node;
struct rcu_head rcu;
};
struct bpf_prog_list {
struct list_head node;
struct bpf_prog *prog;
struct bpf_cgroup_storage *storage;
};
struct bpf_prog_array;
@ -77,6 +101,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
{
struct bpf_storage_buffer *buf;
if (!storage)
return;
buf = READ_ONCE(storage->buf);
this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
}
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
struct cgroup *cgroup,
enum bpf_attach_type type);
void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
@ -221,6 +265,16 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
struct bpf_map *map) { return 0; }
static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
struct bpf_map *map) {}
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
struct bpf_prog *prog) { return 0; }
static inline void bpf_cgroup_storage_free(
struct bpf_cgroup_storage *storage) {}
#define cgroup_bpf_enabled (0)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })

View File

@ -155,6 +155,7 @@ enum bpf_arg_type {
enum bpf_return_type {
RET_INTEGER, /* function returns integer */
RET_VOID, /* function doesn't return anything */
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
};
@ -282,6 +283,7 @@ struct bpf_prog_aux {
struct bpf_prog *prog;
struct user_struct *user;
u64 load_time; /* ns since boottime */
struct bpf_map *cgroup_storage;
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
@ -348,9 +350,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
* The 'struct bpf_prog_array *' should only be replaced with xchg()
* since other cpus are walking the array of pointers in parallel.
*/
struct bpf_prog_array_item {
struct bpf_prog *prog;
struct bpf_cgroup_storage *cgroup_storage;
};
struct bpf_prog_array {
struct rcu_head rcu;
struct bpf_prog *progs[0];
struct bpf_prog_array_item items[0];
};
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@ -371,7 +378,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog **_prog, *__prog; \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
preempt_disable(); \
@ -379,10 +387,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
_prog = _array->progs; \
while ((__prog = READ_ONCE(*_prog))) { \
_ret &= func(__prog, ctx); \
_prog++; \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \
_ret &= func(_prog, ctx); \
_item++; \
} \
_out: \
rcu_read_unlock(); \
@ -435,6 +444,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
void *bpf_map_area_alloc(size_t size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
@ -777,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

View File

@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
#ifdef CONFIG_CGROUPS
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)

View File

@ -21,10 +21,12 @@
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);
struct seg6_bpf_srh_state {
bool valid;
struct ipv6_sr_hdr *srh;
u16 hdrlen;
bool valid;
};
DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);

View File

@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
__u8 data[0]; /* Arbitrary size */
};
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type */
};
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@ -120,6 +125,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
};
enum bpf_prog_type {
@ -1371,6 +1377,20 @@ union bpf_attr {
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
* *skb*, but gets socket from **struct bpf_sock_addr** contex.
* Return
* A 8-byte long non-decreasing number.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
* *skb*, but gets socket from **struct bpf_sock_ops** contex.
* Return
* A 8-byte long non-decreasing number.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
* The owner UID of the socket associated to *skb*. If the socket
@ -2075,6 +2095,24 @@ union bpf_attr {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*
* void* get_local_storage(void *map, u64 flags)
* Description
* Get the pointer to the local storage area.
* The type and the size of the local storage is defined
* by the *map* argument.
* The *flags* meaning is specific for each map type,
* and has to be 0 for cgroup local storage.
*
* Depending on the bpf program type, a local storage area
* can be shared between multiple instances of the bpf program,
* running simultaneously.
*
* A user should care about the synchronization by himself.
* For example, by using the BPF_STX_XADD instruction to alter
* the shared data.
* Return
* Pointer to the local storage area.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2157,7 +2195,8 @@ union bpf_attr {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id),
FN(get_current_cgroup_id), \
FN(get_local_storage),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call

View File

@ -3,6 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)

View File

@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp)
list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node);
bpf_prog_put(pl->prog);
bpf_cgroup_storage_unlink(pl->storage);
bpf_cgroup_storage_free(pl->storage);
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
@ -115,15 +117,18 @@ static int compute_effective_progs(struct cgroup *cgrp,
cnt = 0;
p = cgrp;
do {
if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
list_for_each_entry(pl,
&p->bpf.progs[type], node) {
if (!pl->prog)
continue;
progs->progs[cnt++] = pl->prog;
}
p = cgroup_parent(p);
} while (p);
if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
continue;
list_for_each_entry(pl, &p->bpf.progs[type], node) {
if (!pl->prog)
continue;
progs->items[cnt].prog = pl->prog;
progs->items[cnt].cgroup_storage = pl->storage;
cnt++;
}
} while ((p = cgroup_parent(p)));
rcu_assign_pointer(*array, progs);
return 0;
@ -172,6 +177,45 @@ cleanup:
return -ENOMEM;
}
static int update_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type)
{
struct cgroup_subsys_state *css;
int err;
/* allocate and recompute effective prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
if (err)
goto cleanup;
}
/* all allocations were successful. Activate all prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
activate_effective_progs(desc, type, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
return 0;
cleanup:
/* oom while computing effective. Free all computed effective arrays
* since they were not activated
*/
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
bpf_prog_array_free(desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
return err;
}
#define BPF_CGROUP_MAX_PROGS 64
/**
@ -188,7 +232,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
{
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct cgroup_subsys_state *css;
struct bpf_cgroup_storage *storage, *old_storage = NULL;
struct bpf_prog_list *pl;
bool pl_was_allocated;
int err;
@ -210,72 +254,71 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
storage = bpf_cgroup_storage_alloc(prog);
if (IS_ERR(storage))
return -ENOMEM;
if (flags & BPF_F_ALLOW_MULTI) {
list_for_each_entry(pl, progs, node)
if (pl->prog == prog)
list_for_each_entry(pl, progs, node) {
if (pl->prog == prog) {
/* disallow attaching the same prog twice */
bpf_cgroup_storage_free(storage);
return -EINVAL;
}
}
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl)
if (!pl) {
bpf_cgroup_storage_free(storage);
return -ENOMEM;
}
pl_was_allocated = true;
pl->prog = prog;
pl->storage = storage;
list_add_tail(&pl->node, progs);
} else {
if (list_empty(progs)) {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl)
if (!pl) {
bpf_cgroup_storage_free(storage);
return -ENOMEM;
}
pl_was_allocated = true;
list_add_tail(&pl->node, progs);
} else {
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog;
old_storage = pl->storage;
bpf_cgroup_storage_unlink(old_storage);
pl_was_allocated = false;
}
pl->prog = prog;
pl->storage = storage;
}
cgrp->bpf.flags[type] = flags;
/* allocate and recompute effective prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
if (err)
goto cleanup;
}
/* all allocations were successful. Activate all prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
activate_effective_progs(desc, type, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
err = update_effective_progs(cgrp, type);
if (err)
goto cleanup;
static_branch_inc(&cgroup_bpf_enabled_key);
if (old_storage)
bpf_cgroup_storage_free(old_storage);
if (old_prog) {
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
bpf_cgroup_storage_link(storage, cgrp, type);
return 0;
cleanup:
/* oom while computing effective. Free all computed effective arrays
* since they were not activated
*/
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
bpf_prog_array_free(desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
/* and cleanup the prog list */
pl->prog = old_prog;
bpf_cgroup_storage_free(pl->storage);
pl->storage = old_storage;
bpf_cgroup_storage_link(old_storage, cgrp, type);
if (pl_was_allocated) {
list_del(&pl->node);
kfree(pl);
@ -298,7 +341,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog *old_prog = NULL;
struct cgroup_subsys_state *css;
struct bpf_prog_list *pl;
int err;
@ -337,25 +379,14 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
}
/* allocate and recompute effective prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
if (err)
goto cleanup;
}
/* all allocations were successful. Activate all prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
activate_effective_progs(desc, type, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
err = update_effective_progs(cgrp, type);
if (err)
goto cleanup;
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
bpf_cgroup_storage_unlink(pl->storage);
bpf_cgroup_storage_free(pl->storage);
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
@ -366,16 +397,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
return 0;
cleanup:
/* oom while computing effective. Free all computed effective arrays
* since they were not activated
*/
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
bpf_prog_array_free(desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
/* and restore back old_prog */
pl->prog = old_prog;
return err;
@ -654,6 +675,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_delete_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();

View File

@ -1542,7 +1542,8 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
{
if (prog_cnt)
return kzalloc(sizeof(struct bpf_prog_array) +
sizeof(struct bpf_prog *) * (prog_cnt + 1),
sizeof(struct bpf_prog_array_item) *
(prog_cnt + 1),
flags);
return &empty_prog_array.hdr;
@ -1556,43 +1557,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
kfree_rcu(progs, rcu);
}
int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
{
struct bpf_prog **prog;
struct bpf_prog_array_item *item;
u32 cnt = 0;
rcu_read_lock();
prog = rcu_dereference(progs)->progs;
for (; *prog; prog++)
if (*prog != &dummy_bpf_prog.prog)
item = rcu_dereference(array)->items;
for (; item->prog; item++)
if (item->prog != &dummy_bpf_prog.prog)
cnt++;
rcu_read_unlock();
return cnt;
}
static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
u32 *prog_ids,
u32 request_cnt)
{
struct bpf_prog_array_item *item;
int i = 0;
for (; *prog; prog++) {
if (*prog == &dummy_bpf_prog.prog)
item = rcu_dereference(array)->items;
for (; item->prog; item++) {
if (item->prog == &dummy_bpf_prog.prog)
continue;
prog_ids[i] = (*prog)->aux->id;
prog_ids[i] = item->prog->aux->id;
if (++i == request_cnt) {
prog++;
item++;
break;
}
}
return !!(*prog);
return !!(item->prog);
}
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 cnt)
{
struct bpf_prog **prog;
unsigned long err = 0;
bool nospc;
u32 *ids;
@ -1611,8 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
if (!ids)
return -ENOMEM;
rcu_read_lock();
prog = rcu_dereference(progs)->progs;
nospc = bpf_prog_array_copy_core(prog, ids, cnt);
nospc = bpf_prog_array_copy_core(array, ids, cnt);
rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids);
@ -1623,14 +1625,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
return 0;
}
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
struct bpf_prog *old_prog)
{
struct bpf_prog **prog = progs->progs;
struct bpf_prog_array_item *item = array->items;
for (; *prog; prog++)
if (*prog == old_prog) {
WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
for (; item->prog; item++)
if (item->prog == old_prog) {
WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
break;
}
}
@ -1641,7 +1643,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
struct bpf_prog **existing_prog;
struct bpf_prog_array_item *existing;
struct bpf_prog_array *array;
bool found_exclude = false;
int new_prog_idx = 0;
@ -1650,15 +1652,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
* the new array.
*/
if (old_array) {
existing_prog = old_array->progs;
for (; *existing_prog; existing_prog++) {
if (*existing_prog == exclude_prog) {
existing = old_array->items;
for (; existing->prog; existing++) {
if (existing->prog == exclude_prog) {
found_exclude = true;
continue;
}
if (*existing_prog != &dummy_bpf_prog.prog)
if (existing->prog != &dummy_bpf_prog.prog)
carry_prog_cnt++;
if (*existing_prog == include_prog)
if (existing->prog == include_prog)
return -EEXIST;
}
}
@ -1684,15 +1686,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
/* Fill in the new prog array */
if (carry_prog_cnt) {
existing_prog = old_array->progs;
for (; *existing_prog; existing_prog++)
if (*existing_prog != exclude_prog &&
*existing_prog != &dummy_bpf_prog.prog)
array->progs[new_prog_idx++] = *existing_prog;
existing = old_array->items;
for (; existing->prog; existing++)
if (existing->prog != exclude_prog &&
existing->prog != &dummy_bpf_prog.prog) {
array->items[new_prog_idx++].prog =
existing->prog;
}
}
if (include_prog)
array->progs[new_prog_idx++] = include_prog;
array->progs[new_prog_idx] = NULL;
array->items[new_prog_idx++].prog = include_prog;
array->items[new_prog_idx].prog = NULL;
*new_array = array;
return 0;
}
@ -1701,7 +1705,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt)
{
struct bpf_prog **prog;
u32 cnt = 0;
if (array)
@ -1714,8 +1717,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
return 0;
/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
prog = rcu_dereference_check(array, 1)->progs;
return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
: 0;
}
@ -1793,6 +1795,7 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{

View File

@ -193,4 +193,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
};
DECLARE_PER_CPU(void*, bpf_cgroup_storage);
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
/* map and flags arguments are not used now,
* but provide an ability to extend the API
* for other types of local storages.
* verifier checks that their values are correct.
*/
return (unsigned long) this_cpu_read(bpf_cgroup_storage);
}
const struct bpf_func_proto bpf_get_local_storage_proto = {
.func = bpf_get_local_storage,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};
#endif

378
kernel/bpf/local_storage.c Normal file
View File

@ -0,0 +1,378 @@
//SPDX-License-Identifier: GPL-2.0
#include <linux/bpf-cgroup.h>
#include <linux/bpf.h>
#include <linux/bug.h>
#include <linux/filter.h>
#include <linux/mm.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
DEFINE_PER_CPU(void*, bpf_cgroup_storage);
#ifdef CONFIG_CGROUP_BPF
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
struct bpf_cgroup_storage_map {
struct bpf_map map;
spinlock_t lock;
struct bpf_prog *prog;
struct rb_root root;
struct list_head list;
};
static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
{
return container_of(map, struct bpf_cgroup_storage_map, map);
}
static int bpf_cgroup_storage_key_cmp(
const struct bpf_cgroup_storage_key *key1,
const struct bpf_cgroup_storage_key *key2)
{
if (key1->cgroup_inode_id < key2->cgroup_inode_id)
return -1;
else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
return 1;
else if (key1->attach_type < key2->attach_type)
return -1;
else if (key1->attach_type > key2->attach_type)
return 1;
return 0;
}
static struct bpf_cgroup_storage *cgroup_storage_lookup(
struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
bool locked)
{
struct rb_root *root = &map->root;
struct rb_node *node;
if (!locked)
spin_lock_bh(&map->lock);
node = root->rb_node;
while (node) {
struct bpf_cgroup_storage *storage;
storage = container_of(node, struct bpf_cgroup_storage, node);
switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
case -1:
node = node->rb_left;
break;
case 1:
node = node->rb_right;
break;
default:
if (!locked)
spin_unlock_bh(&map->lock);
return storage;
}
}
if (!locked)
spin_unlock_bh(&map->lock);
return NULL;
}
static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
struct bpf_cgroup_storage *storage)
{
struct rb_root *root = &map->root;
struct rb_node **new = &(root->rb_node), *parent = NULL;
while (*new) {
struct bpf_cgroup_storage *this;
this = container_of(*new, struct bpf_cgroup_storage, node);
parent = *new;
switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
case -1:
new = &((*new)->rb_left);
break;
case 1:
new = &((*new)->rb_right);
break;
default:
return -EEXIST;
}
}
rb_link_node(&storage->node, parent, new);
rb_insert_color(&storage->node, root);
return 0;
}
static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
storage = cgroup_storage_lookup(map, key, false);
if (!storage)
return NULL;
return &READ_ONCE(storage->buf)->data[0];
}
static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
void *value, u64 flags)
{
struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;
if (flags & BPF_NOEXIST)
return -EINVAL;
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
key, false);
if (!storage)
return -ENOENT;
new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
map->value_size, __GFP_ZERO | GFP_USER,
map->numa_node);
if (!new)
return -ENOMEM;
memcpy(&new->data[0], value, map->value_size);
new = xchg(&storage->buf, new);
kfree_rcu(new, rcu);
return 0;
}
static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
void *_next_key)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage_key *next = _next_key;
struct bpf_cgroup_storage *storage;
spin_lock_bh(&map->lock);
if (list_empty(&map->list))
goto enoent;
if (key) {
storage = cgroup_storage_lookup(map, key, true);
if (!storage)
goto enoent;
storage = list_next_entry(storage, list);
if (!storage)
goto enoent;
} else {
storage = list_first_entry(&map->list,
struct bpf_cgroup_storage, list);
}
spin_unlock_bh(&map->lock);
next->attach_type = storage->key.attach_type;
next->cgroup_inode_id = storage->key.cgroup_inode_id;
return 0;
enoent:
spin_unlock_bh(&map->lock);
return -ENOENT;
}
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL);
if (attr->value_size > PAGE_SIZE)
return ERR_PTR(-E2BIG);
if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
/* reserved bits should not be used */
return ERR_PTR(-EINVAL);
if (attr->max_entries)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
__GFP_ZERO | GFP_USER, numa_node);
if (!map)
return ERR_PTR(-ENOMEM);
map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
PAGE_SIZE) >> PAGE_SHIFT;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr);
spin_lock_init(&map->lock);
map->root = RB_ROOT;
INIT_LIST_HEAD(&map->list);
return &map->map;
}
static void cgroup_storage_map_free(struct bpf_map *_map)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
WARN_ON(!RB_EMPTY_ROOT(&map->root));
WARN_ON(!list_empty(&map->list));
kfree(map);
}
static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
{
return -EINVAL;
}
const struct bpf_map_ops cgroup_storage_map_ops = {
.map_alloc = cgroup_storage_map_alloc,
.map_free = cgroup_storage_map_free,
.map_get_next_key = cgroup_storage_get_next_key,
.map_lookup_elem = cgroup_storage_lookup_elem,
.map_update_elem = cgroup_storage_update_elem,
.map_delete_elem = cgroup_storage_delete_elem,
};
int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
int ret = -EBUSY;
spin_lock_bh(&map->lock);
if (map->prog && map->prog != prog)
goto unlock;
if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
goto unlock;
map->prog = prog;
prog->aux->cgroup_storage = _map;
ret = 0;
unlock:
spin_unlock_bh(&map->lock);
return ret;
}
void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
spin_lock_bh(&map->lock);
if (map->prog == prog) {
WARN_ON(prog->aux->cgroup_storage != _map);
map->prog = NULL;
prog->aux->cgroup_storage = NULL;
}
spin_unlock_bh(&map->lock);
}
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
{
struct bpf_cgroup_storage *storage;
struct bpf_map *map;
u32 pages;
map = prog->aux->cgroup_storage;
if (!map)
return NULL;
pages = round_up(sizeof(struct bpf_cgroup_storage) +
sizeof(struct bpf_storage_buffer) +
map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
if (bpf_map_charge_memlock(map, pages))
return ERR_PTR(-EPERM);
storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
__GFP_ZERO | GFP_USER, map->numa_node);
if (!storage) {
bpf_map_uncharge_memlock(map, pages);
return ERR_PTR(-ENOMEM);
}
storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
map->value_size, __GFP_ZERO | GFP_USER,
map->numa_node);
if (!storage->buf) {
bpf_map_uncharge_memlock(map, pages);
kfree(storage);
return ERR_PTR(-ENOMEM);
}
storage->map = (struct bpf_cgroup_storage_map *)map;
return storage;
}
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
{
u32 pages;
struct bpf_map *map;
if (!storage)
return;
map = &storage->map->map;
pages = round_up(sizeof(struct bpf_cgroup_storage) +
sizeof(struct bpf_storage_buffer) +
map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
bpf_map_uncharge_memlock(map, pages);
kfree_rcu(storage->buf, rcu);
kfree_rcu(storage, rcu);
}
void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
struct cgroup *cgroup,
enum bpf_attach_type type)
{
struct bpf_cgroup_storage_map *map;
if (!storage)
return;
storage->key.attach_type = type;
storage->key.cgroup_inode_id = cgroup->kn->id.id;
map = storage->map;
spin_lock_bh(&map->lock);
WARN_ON(cgroup_storage_insert(map, storage));
list_add(&storage->list, &map->list);
spin_unlock_bh(&map->lock);
}
void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
{
struct bpf_cgroup_storage_map *map;
struct rb_root *root;
if (!storage)
return;
map = storage->map;
spin_lock_bh(&map->lock);
root = &map->root;
rb_erase(&storage->node, root);
list_del(&storage->list);
spin_unlock_bh(&map->lock);
}
#endif

View File

@ -23,7 +23,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
* is a runtime binding. Doing static check alone
* in the verifier is not enough.
*/
if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE) {
fdput(f);
return ERR_PTR(-ENOTSUPP);
}

View File

@ -181,32 +181,60 @@ int bpf_map_precharge_memlock(u32 pages)
return 0;
}
static int bpf_map_charge_memlock(struct bpf_map *map)
static int bpf_charge_memlock(struct user_struct *user, u32 pages)
{
struct user_struct *user = get_current_user();
unsigned long memlock_limit;
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
atomic_long_add(map->pages, &user->locked_vm);
if (atomic_long_read(&user->locked_vm) > memlock_limit) {
atomic_long_sub(map->pages, &user->locked_vm);
free_uid(user);
if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
atomic_long_sub(pages, &user->locked_vm);
return -EPERM;
}
map->user = user;
return 0;
}
static void bpf_map_uncharge_memlock(struct bpf_map *map)
static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
{
atomic_long_sub(pages, &user->locked_vm);
}
static int bpf_map_init_memlock(struct bpf_map *map)
{
struct user_struct *user = get_current_user();
int ret;
ret = bpf_charge_memlock(user, map->pages);
if (ret) {
free_uid(user);
return ret;
}
map->user = user;
return ret;
}
static void bpf_map_release_memlock(struct bpf_map *map)
{
struct user_struct *user = map->user;
atomic_long_sub(map->pages, &user->locked_vm);
bpf_uncharge_memlock(user, map->pages);
free_uid(user);
}
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
{
int ret;
ret = bpf_charge_memlock(map->user, pages);
if (ret)
return ret;
map->pages += pages;
return ret;
}
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
{
bpf_uncharge_memlock(map->user, pages);
map->pages -= pages;
}
static int bpf_map_alloc_id(struct bpf_map *map)
{
int id;
@ -256,7 +284,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
bpf_map_uncharge_memlock(map);
bpf_map_release_memlock(map);
security_bpf_map_free(map);
/* implementation dependent freeing */
map->ops->map_free(map);
@ -492,7 +520,7 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map_nouncharge;
err = bpf_map_charge_memlock(map);
err = bpf_map_init_memlock(map);
if (err)
goto free_map_sec;
@ -515,7 +543,7 @@ static int map_create(union bpf_attr *attr)
return err;
free_map:
bpf_map_uncharge_memlock(map);
bpf_map_release_memlock(map);
free_map_sec:
security_bpf_map_free(map);
free_map_nouncharge:
@ -929,6 +957,9 @@ static void free_used_maps(struct bpf_prog_aux *aux)
{
int i;
if (aux->cgroup_storage)
bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
for (i = 0; i < aux->used_map_cnt; i++)
bpf_map_put(aux->used_maps[i]);

View File

@ -2127,6 +2127,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_STORAGE:
if (func_id != BPF_FUNC_get_local_storage)
goto error;
break;
/* devmap returns a pointer to a live net_device ifindex that we cannot
* allow to be modified from bpf side. So do not allow lookup elements
* for now.
@ -2209,6 +2213,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
goto error;
break;
case BPF_FUNC_get_local_storage:
if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
goto error;
break;
default:
break;
}
@ -2533,6 +2541,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
regs = cur_regs(env);
/* check that flags argument in get_local_storage(map, flags) is 0,
* this is required because get_local_storage() can't return an error.
*/
if (func_id == BPF_FUNC_get_local_storage &&
!register_is_null(&regs[BPF_REG_2])) {
verbose(env, "get_local_storage() doesn't support non-zero flags\n");
return -EINVAL;
}
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
@ -2545,8 +2563,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
mark_reg_unknown(env, regs, BPF_REG_0);
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
fn->ret_type == RET_PTR_TO_MAP_VALUE) {
if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
else
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].off = 0;
@ -3238,8 +3260,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
}
/* check dest operand */
err = check_reg_arg(env, insn->dst_reg, DST_OP);
/* check dest operand, mark as required later */
err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
if (err)
return err;
@ -3265,6 +3287,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* case: R = imm
* remember the value we stored into this reg
*/
/* clear any state __mark_reg_known doesn't set */
mark_reg_unknown(env, regs, insn->dst_reg);
regs[insn->dst_reg].type = SCALAR_VALUE;
if (BPF_CLASS(insn->code) == BPF_ALU64) {
__mark_reg_known(regs + insn->dst_reg,
@ -5152,6 +5176,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
}
env->used_maps[env->used_map_cnt++] = map;
if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
bpf_cgroup_storage_assign(env->prog, map)) {
verbose(env,
"only one cgroup storage is allowed\n");
fdput(f);
return -EBUSY;
}
fdput(f);
next_insn:
insn++;
@ -5178,6 +5210,10 @@ static void release_maps(struct bpf_verifier_env *env)
{
int i;
if (env->prog->aux->cgroup_storage)
bpf_cgroup_storage_release(env->prog,
env->prog->aux->cgroup_storage);
for (i = 0; i < env->used_map_cnt; i++)
bpf_map_put(env->used_maps[i]);
}

View File

@ -11,12 +11,14 @@
#include <linux/filter.h>
#include <linux/sched/signal.h>
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage)
{
u32 ret;
preempt_disable();
rcu_read_lock();
bpf_cgroup_storage_set(storage);
ret = BPF_PROG_RUN(prog, ctx);
rcu_read_unlock();
preempt_enable();
@ -26,14 +28,19 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
{
struct bpf_cgroup_storage *storage = NULL;
u64 time_start, time_spent = 0;
u32 ret = 0, i;
storage = bpf_cgroup_storage_alloc(prog);
if (IS_ERR(storage))
return PTR_ERR(storage);
if (!repeat)
repeat = 1;
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
ret = bpf_test_run_one(prog, ctx);
ret = bpf_test_run_one(prog, ctx, storage);
if (need_resched()) {
if (signal_pending(current))
break;
@ -46,6 +53,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
do_div(time_spent, repeat);
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
bpf_cgroup_storage_free(storage);
return ret;
}

View File

@ -3814,6 +3814,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
{
return sock_gen_cookie(ctx->sk);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
.func = bpf_get_socket_cookie_sock_addr,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
return sock_gen_cookie(ctx->sk);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
.func = bpf_get_socket_cookie_sock_ops,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@ -4544,26 +4568,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_tlvs, *srh_end, *ptr;
struct ipv6_sr_hdr *srh;
int srhoff = 0;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
if (srh == NULL)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
ptr = skb->data + offset;
if (ptr >= srh_tlvs && ptr + len <= srh_end)
srh_state->valid = 0;
srh_state->valid = false;
else if (ptr < (void *)&srh->flags ||
ptr + len > (void *)&srh->segments)
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
memcpy(skb->data + offset, from, len);
return 0;
@ -4579,52 +4605,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE
};
static void bpf_update_srh_state(struct sk_buff *skb)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
int srhoff = 0;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
srh_state->srh = NULL;
} else {
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_state->hdrlen = srh_state->srh->hdrlen << 3;
srh_state->valid = true;
}
}
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
u32, action, void *, param, u32, param_len)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh;
int srhoff = 0;
int hdroff = 0;
int err;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
if (!srh_state->valid) {
if (unlikely((srh_state->hdrlen & 7) != 0))
return -EBADMSG;
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
return -EBADMSG;
srh_state->valid = 1;
}
switch (action) {
case SEG6_LOCAL_ACTION_END_X:
if (!seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
if (param_len != sizeof(struct in6_addr))
return -EINVAL;
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
case SEG6_LOCAL_ACTION_END_T:
if (!seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
if (param_len != sizeof(int))
return -EINVAL;
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_DT6:
if (!seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
if (param_len != sizeof(int))
return -EINVAL;
if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
return -EBADMSG;
if (!pskb_pull(skb, hdroff))
return -EBADMSG;
skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->encapsulation = 0;
bpf_compute_data_pointers(skb);
bpf_update_srh_state(skb);
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_B6:
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
param, param_len);
if (!err)
srh_state->hdrlen =
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
bpf_update_srh_state(skb);
return err;
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
param, param_len);
if (!err)
srh_state->hdrlen =
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
bpf_update_srh_state(skb);
return err;
default:
return -EINVAL;
@ -4646,15 +4698,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_end, *srh_tlvs, *ptr;
struct ipv6_sr_hdr *srh;
struct ipv6hdr *hdr;
int srhoff = 0;
int ret;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
if (unlikely(srh == NULL))
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
((srh->first_segment + 1) << 4));
@ -4684,8 +4735,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
hdr = (struct ipv6hdr *)skb->data;
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_state->hdrlen += len;
srh_state->valid = 0;
srh_state->valid = false;
return 0;
}
@ -4768,6 +4822,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@ -4790,6 +4846,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
default:
return NULL;
}
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_addr_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@ -4812,6 +4872,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return sk_filter_func_proto(func_id, prog);
}
}
static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@ -4932,6 +5003,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_map_update_proto;
case BPF_FUNC_sock_hash_update:
return &bpf_sock_hash_update_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_ops_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@ -4951,6 +5026,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_cork_bytes_proto;
case BPF_FUNC_msg_pull_data:
return &bpf_msg_pull_data_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@ -4978,6 +5055,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@ -6782,7 +6861,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
};
const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};

View File

@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
* mixing with BH RCU lock doesn't work.
*/
preempt_disable();
rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
rcu_read_unlock();
switch (ret) {
case BPF_OK:

View File

@ -459,36 +459,57 @@ drop:
DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
if (unlikely(srh == NULL))
return false;
if (unlikely(!srh_state->valid)) {
if ((srh_state->hdrlen & 7) != 0)
return false;
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))
return false;
srh_state->valid = true;
}
return true;
}
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct seg6_bpf_srh_state local_srh_state;
struct ipv6_sr_hdr *srh;
int srhoff = 0;
int ret;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
if (!srh) {
kfree_skb(skb);
return -EINVAL;
}
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
/* preempt_disable is needed to protect the per-CPU buffer srh_state,
* which is also accessed by the bpf_lwt_seg6_* helpers
*/
preempt_disable();
srh_state->srh = srh;
srh_state->hdrlen = srh->hdrlen << 3;
srh_state->valid = 1;
srh_state->valid = true;
rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
rcu_read_unlock();
local_srh_state = *srh_state;
preempt_enable();
switch (ret) {
case BPF_OK:
case BPF_REDIRECT:
@ -500,24 +521,17 @@ static int input_action_end_bpf(struct sk_buff *skb,
goto drop;
}
if (unlikely((local_srh_state.hdrlen & 7) != 0))
goto drop;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
goto drop;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
if (!local_srh_state.valid &&
unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
goto drop;
preempt_enable();
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
preempt_enable();
kfree_skb(skb);
return -EINVAL;
}

View File

@ -105,8 +105,8 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o syscall_tp_user.o
cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
@ -195,6 +195,8 @@ HOSTLOADLIBES_xdpsock += -pthread
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
LLVM_OBJCOPY ?= llvm-objcopy
BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
ifdef CROSS_COMPILE
@ -202,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc
CLANG_ARCH_ARGS = -target $(ARCH)
endif
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
endif
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
@ -260,4 +272,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif

View File

@ -8,7 +8,8 @@
* information. The number of invocations of the program, which maps
* to the number of packets received, is stored to key 0. Key 1 is
* incremented on each iteration by the number of bytes stored in
* the skb.
* the skb. The program also stores the number of received bytes
* in the cgroup storage.
*
* - Attaches the new program to a cgroup using BPF_PROG_ATTACH
*
@ -21,12 +22,15 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_insn.h"
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define FOO "/foo"
@ -205,6 +209,8 @@ static int map_fd = -1;
static int prog_load_cnt(int verdict, int val)
{
int cgroup_storage_fd;
if (map_fd < 0)
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
if (map_fd < 0) {
@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val)
return -1;
}
cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
if (cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
struct bpf_insn prog[] = {
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, val),
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val)
printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
return 0;
}
close(cgroup_storage_fd);
return ret;
}

View File

@ -24,8 +24,7 @@
#include <fcntl.h>
#include <libgen.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "bpf/libbpf.h"
#include <bpf/bpf.h>
@ -63,9 +62,15 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
const char *prog_name = "xdp_fwd";
struct bpf_program *prog;
char filename[PATH_MAX];
struct bpf_object *obj;
int opt, i, idx, err;
int prog_id = 0;
int prog_fd, map_fd;
int attach = 1;
int ret = 0;
@ -75,7 +80,7 @@ int main(int argc, char **argv)
attach = 0;
break;
case 'D':
prog_id = 1;
prog_name = "xdp_fwd_direct";
break;
default:
usage(basename(argv[0]));
@ -90,6 +95,7 @@ int main(int argc, char **argv)
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
@ -97,19 +103,25 @@ int main(int argc, char **argv)
return 1;
}
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
prog = bpf_object__find_program_by_title(obj, prog_name);
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
printf("program not found: %s\n", strerror(prog_fd));
return 1;
}
if (!prog_fd[prog_id]) {
printf("load_bpf_file: %s\n", strerror(errno));
map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
"tx_port"));
if (map_fd < 0) {
printf("map not found: %s\n", strerror(map_fd));
return 1;
}
}
if (attach) {
for (i = 1; i < 64; ++i)
bpf_map_update_elem(map_fd[0], &i, &i, 0);
bpf_map_update_elem(map_fd, &i, &i, 0);
}
for (i = optind; i < argc; ++i) {
@ -126,7 +138,7 @@ int main(int argc, char **argv)
if (err)
ret = err;
} else {
err = do_attach(idx, prog_fd[prog_id], argv[i]);
err = do_attach(idx, prog_fd, argv[i]);
if (err)
ret = err;
}

View File

@ -26,7 +26,7 @@
#include <sys/types.h>
#include <poll.h>
#include "bpf_load.h"
#include "bpf/libbpf.h"
#include "bpf_util.h"
#include <bpf/bpf.h>
@ -145,8 +145,13 @@ static void dump_stats(void);
} while (0)
#define barrier() __asm__ __volatile__("": : :"memory")
#ifdef __aarch64__
#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
#else
#define u_smp_rmb() barrier()
#define u_smp_wmb() barrier()
#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
int prog_fd, qidconf_map, xsks_map;
struct bpf_object *obj;
char xdp_filename[256];
struct bpf_map *map;
int i, ret, key = 0;
pthread_t pt;
@ -899,24 +910,38 @@ int main(int argc, char **argv)
}
snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
prog_load_attr.file = xdp_filename;
if (load_bpf_file(xdp_filename)) {
fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
exit(EXIT_FAILURE);
if (prog_fd < 0) {
fprintf(stderr, "ERROR: no program found: %s\n",
strerror(prog_fd));
exit(EXIT_FAILURE);
}
if (!prog_fd[0]) {
fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
strerror(errno));
map = bpf_object__find_map_by_name(obj, "qidconf_map");
qidconf_map = bpf_map__fd(map);
if (qidconf_map < 0) {
fprintf(stderr, "ERROR: no qidconf map found: %s\n",
strerror(qidconf_map));
exit(EXIT_FAILURE);
}
if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
map = bpf_object__find_map_by_name(obj, "xsks_map");
xsks_map = bpf_map__fd(map);
if (xsks_map < 0) {
fprintf(stderr, "ERROR: no xsks map found: %s\n",
strerror(xsks_map));
exit(EXIT_FAILURE);
}
if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
exit(EXIT_FAILURE);
@ -933,7 +958,7 @@ int main(int argc, char **argv)
/* ...and insert them into the map. */
for (i = 0; i < num_socks; i++) {
key = i;
ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
exit(EXIT_FAILURE);

5
tools/bpf/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
FEATURE-DUMP.bpf
bpf_asm
bpf_dbg
bpf_exp.yacc.*
bpf_jit_disasm

View File

@ -1,3 +1,5 @@
*.d
bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool

View File

@ -69,6 +69,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_SOCKMAP] = "sockmap",
[BPF_MAP_TYPE_CPUMAP] = "cpumap",
[BPF_MAP_TYPE_SOCKHASH] = "sockhash",
[BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
};
static bool map_is_per_cpu(__u32 type)
@ -232,7 +233,7 @@ static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
if (IS_ERR(*btf)) {
err = PTR_ERR(btf);
err = PTR_ERR(*btf);
*btf = NULL;
}

View File

@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
__u8 data[0]; /* Arbitrary size */
};
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type */
};
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@ -120,6 +125,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
};
enum bpf_prog_type {
@ -1371,6 +1377,20 @@ union bpf_attr {
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
* *skb*, but gets socket from **struct bpf_sock_addr** contex.
* Return
* A 8-byte long non-decreasing number.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
* *skb*, but gets socket from **struct bpf_sock_ops** contex.
* Return
* A 8-byte long non-decreasing number.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
* The owner UID of the socket associated to *skb*. If the socket
@ -2075,6 +2095,24 @@ union bpf_attr {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*
* void* get_local_storage(void *map, u64 flags)
* Description
* Get the pointer to the local storage area.
* The type and the size of the local storage is defined
* by the *map* argument.
* The *flags* meaning is specific for each map type,
* and has to be 0 for cgroup local storage.
*
* Depending on the bpf program type, a local storage area
* can be shared between multiple instances of the bpf program,
* running simultaneously.
*
* A user should care about the synchronization by himself.
* For example, by using the BPF_STX_XADD instruction to alter
* the shared data.
* Return
* Pointer to the local storage area.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2157,7 +2195,8 @@ union bpf_attr {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id),
FN(get_current_cgroup_id), \
FN(get_local_storage),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call

View File

@ -468,8 +468,10 @@ static int bpf_object__elf_init(struct bpf_object *obj)
} else {
obj->efile.fd = open(obj->path, O_RDONLY);
if (obj->efile.fd < 0) {
pr_warning("failed to open %s: %s\n", obj->path,
strerror(errno));
char errmsg[STRERR_BUFSIZE];
char *cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to open %s: %s\n", obj->path, cp);
return -errno;
}
@ -808,10 +810,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size, name, idx);
if (err) {
char errmsg[STRERR_BUFSIZE];
char *cp = strerror_r(-err, errmsg,
sizeof(errmsg));
strerror_r(-err, errmsg, sizeof(errmsg));
pr_warning("failed to alloc program %s (%s): %s",
name, obj->path, errmsg);
name, obj->path, cp);
}
} else if (sh.sh_type == SHT_REL) {
void *reloc = obj->efile.reloc;
@ -874,6 +877,18 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
return NULL;
}
struct bpf_program *
bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
{
struct bpf_program *pos;
bpf_object__for_each_program(pos, obj) {
if (pos->section_name && !strcmp(pos->section_name, title))
return pos;
}
return NULL;
}
static int
bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
Elf_Data *data, struct bpf_object *obj)
@ -1097,6 +1112,7 @@ bpf_object__create_maps(struct bpf_object *obj)
for (i = 0; i < obj->nr_maps; i++) {
struct bpf_map *map = &obj->maps[i];
struct bpf_map_def *def = &map->def;
char *cp, errmsg[STRERR_BUFSIZE];
int *pfd = &map->fd;
if (map->fd >= 0) {
@ -1124,8 +1140,9 @@ bpf_object__create_maps(struct bpf_object *obj)
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_type_id) {
cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, strerror(errno), errno);
map->name, cp, errno);
create_attr.btf_fd = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
@ -1138,9 +1155,9 @@ bpf_object__create_maps(struct bpf_object *obj)
size_t j;
err = *pfd;
cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
map->name,
strerror(errno));
map->name, cp);
for (j = 0; j < i; j++)
zclose(obj->maps[j].fd);
return err;
@ -1292,6 +1309,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
char *license, u32 kern_version, int *pfd, int prog_ifindex)
{
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
char *log_buf;
int ret;
@ -1321,7 +1339,8 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
}
ret = -LIBBPF_ERRNO__LOAD;
pr_warning("load bpf program failed: %s\n", strerror(errno));
cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
if (log_buf && log_buf[0] != '\0') {
ret = -LIBBPF_ERRNO__VERIFY;
@ -1620,6 +1639,7 @@ out:
static int check_path(const char *path)
{
char *cp, errmsg[STRERR_BUFSIZE];
struct statfs st_fs;
char *dname, *dir;
int err = 0;
@ -1633,7 +1653,8 @@ static int check_path(const char *path)
dir = dirname(dname);
if (statfs(dir, &st_fs)) {
pr_warning("failed to statfs %s: %s\n", dir, strerror(errno));
cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to statfs %s: %s\n", dir, cp);
err = -errno;
}
free(dname);
@ -1649,6 +1670,7 @@ static int check_path(const char *path)
int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
int instance)
{
char *cp, errmsg[STRERR_BUFSIZE];
int err;
err = check_path(path);
@ -1667,7 +1689,8 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
pr_warning("failed to pin program: %s\n", strerror(errno));
cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin program: %s\n", cp);
return -errno;
}
pr_debug("pinned program '%s'\n", path);
@ -1677,13 +1700,16 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
static int make_dir(const char *path)
{
char *cp, errmsg[STRERR_BUFSIZE];
int err = 0;
if (mkdir(path, 0700) && errno != EEXIST)
err = -errno;
if (err)
pr_warning("failed to mkdir %s: %s\n", path, strerror(-err));
if (err) {
cp = strerror_r(-err, errmsg, sizeof(errmsg));
pr_warning("failed to mkdir %s: %s\n", path, cp);
}
return err;
}
@ -1730,6 +1756,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
int bpf_map__pin(struct bpf_map *map, const char *path)
{
char *cp, errmsg[STRERR_BUFSIZE];
int err;
err = check_path(path);
@ -1742,7 +1769,8 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
}
if (bpf_obj_pin(map->fd, path)) {
pr_warning("failed to pin map: %s\n", strerror(errno));
cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin map: %s\n", cp);
return -errno;
}
@ -1996,6 +2024,9 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
{
int fd;
if (!prog)
return -EINVAL;
if (n >= prog->instances.nr || n < 0) {
pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
n, prog->section_name, prog->instances.nr);

View File

@ -86,6 +86,9 @@ const char *bpf_object__name(struct bpf_object *obj);
unsigned int bpf_object__kversion(struct bpf_object *obj);
int bpf_object__btf_fd(const struct bpf_object *obj);
struct bpf_program *
bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
for ((pos) = bpf_object__next(NULL), \

View File

@ -22,7 +22,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@ -33,7 +34,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o
get_cgroup_id_kern.o socket_cookie_prog.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@ -60,10 +61,12 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
$(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
.PHONY: force

View File

@ -65,6 +65,8 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_head;
static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_meta;
static int (*bpf_get_socket_cookie)(void *ctx) =
(void *) BPF_FUNC_get_socket_cookie;
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
@ -133,6 +135,8 @@ static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
(void *) BPF_FUNC_rc_keydown;
static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@ -0,0 +1,60 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
struct bpf_map_def SEC("maps") socket_cookies = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__u64),
.value_size = sizeof(__u32),
.max_entries = 1 << 8,
};
SEC("cgroup/connect6")
int set_cookie(struct bpf_sock_addr *ctx)
{
__u32 cookie_value = 0xFF;
__u64 cookie_key;
if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
return 1;
cookie_key = bpf_get_socket_cookie(ctx);
if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
return 0;
return 1;
}
SEC("sockops")
int update_cookie(struct bpf_sock_ops *ctx)
{
__u32 new_cookie_value;
__u32 *cookie_value;
__u64 cookie_key;
if (ctx->family != AF_INET6)
return 1;
if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
return 1;
cookie_key = bpf_get_socket_cookie(ctx);
cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
if (!cookie_value)
return 1;
new_cookie_value = (ctx->local_port << 8) | *cookie_value;
bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
return 1;
}
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0
#
@ -9,11 +9,11 @@ import subprocess
import select
def read(sock, n):
buf = ''
buf = b''
while len(buf) < n:
rem = n - len(buf)
try: s = sock.recv(rem)
except (socket.error), e: return ''
except (socket.error) as e: return b''
buf += s
return buf
@ -22,7 +22,7 @@ def send(sock, s):
count = 0
while count < total:
try: n = sock.send(s)
except (socket.error), e: n = 0
except (socket.error) as e: n = 0
if n == 0:
return count;
count += n
@ -39,10 +39,10 @@ try:
except socket.error as e:
sys.exit(1)
buf = ''
buf = b''
n = 0
while n < 1000:
buf += '+'
buf += b'+'
n += 1
sock.settimeout(1);

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0
#
@ -9,11 +9,11 @@ import subprocess
import select
def read(sock, n):
buf = ''
buf = b''
while len(buf) < n:
rem = n - len(buf)
try: s = sock.recv(rem)
except (socket.error), e: return ''
except (socket.error) as e: return b''
buf += s
return buf
@ -22,7 +22,7 @@ def send(sock, s):
count = 0
while count < total:
try: n = sock.send(s)
except (socket.error), e: n = 0
except (socket.error) as e: n = 0
if n == 0:
return count;
count += n
@ -43,7 +43,7 @@ host = socket.gethostname()
try: serverSocket.bind((host, 0))
except socket.error as msg:
print 'bind fails: ', msg
print('bind fails: ' + str(msg))
sn = serverSocket.getsockname()
serverPort = sn[1]
@ -51,10 +51,10 @@ serverPort = sn[1]
cmdStr = ("./tcp_client.py %d &") % (serverPort)
os.system(cmdStr)
buf = ''
buf = b''
n = 0
while n < 500:
buf += '.'
buf += b'.'
n += 1
serverSocket.listen(MAX_PORTS)
@ -79,5 +79,5 @@ while True:
serverSocket.close()
sys.exit(0)
else:
print 'Select timeout!'
print('Select timeout!')
sys.exit(1)

View File

@ -0,0 +1,130 @@
// SPDX-License-Identifier: GPL-2.0
#include <assert.h>
#include <bpf/bpf.h>
#include <linux/filter.h>
#include <stdio.h>
#include <stdlib.h>
#include "cgroup_helpers.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
int main(int argc, char **argv)
{
struct bpf_insn prog[] = {
BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, 1),
BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
int error = EXIT_FAILURE;
int map_fd, prog_fd, cgroup_fd;
struct bpf_cgroup_storage_key key;
unsigned long long value;
map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
sizeof(value), 0, 0);
if (map_fd < 0) {
printf("Failed to create map: %s\n", strerror(errno));
goto out;
}
prog[0].imm = map_fd;
prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (prog_fd < 0) {
printf("Failed to load bpf program: %s\n", bpf_log_buf);
goto out;
}
if (setup_cgroup_environment()) {
printf("Failed to setup cgroup environment\n");
goto err;
}
/* Create a cgroup, get fd, and join it */
cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
if (!cgroup_fd) {
printf("Failed to create test cgroup\n");
goto err;
}
if (join_cgroup(TEST_CGROUP)) {
printf("Failed to join cgroup\n");
goto err;
}
/* Attach the bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
printf("Failed to attach bpf program\n");
goto err;
}
if (bpf_map_get_next_key(map_fd, NULL, &key)) {
printf("Failed to get the first key in cgroup storage\n");
goto err;
}
if (bpf_map_lookup_elem(map_fd, &key, &value)) {
printf("Failed to lookup cgroup storage\n");
goto err;
}
/* Every second packet should be dropped */
assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
/* Check the counter in the cgroup local storage */
if (bpf_map_lookup_elem(map_fd, &key, &value)) {
printf("Failed to lookup cgroup storage\n");
goto err;
}
if (value != 3) {
printf("Unexpected data in the cgroup storage: %llu\n", value);
goto err;
}
/* Bump the counter in the cgroup local storage */
value++;
if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
printf("Failed to update the data in the cgroup storage\n");
goto err;
}
/* Every second packet should be dropped */
assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
/* Check the final value of the counter in the cgroup local storage */
if (bpf_map_lookup_elem(map_fd, &key, &value)) {
printf("Failed to lookup the cgroup storage\n");
goto err;
}
if (value != 7) {
printf("Unexpected data in the cgroup storage: %llu\n", value);
goto err;
}
error = 0;
printf("test_cgroup_storage:PASS\n");
err:
cleanup_cgroup_environment();
out:
return error;
}

View File

@ -0,0 +1,225 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define CG_PATH "/foo"
#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o"
static int start_server(void)
{
struct sockaddr_in6 addr;
int fd;
fd = socket(AF_INET6, SOCK_STREAM, 0);
if (fd == -1) {
log_err("Failed to create server socket");
goto out;
}
memset(&addr, 0, sizeof(addr));
addr.sin6_family = AF_INET6;
addr.sin6_addr = in6addr_loopback;
addr.sin6_port = 0;
if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
log_err("Failed to bind server socket");
goto close_out;
}
if (listen(fd, 128) == -1) {
log_err("Failed to listen on server socket");
goto close_out;
}
goto out;
close_out:
close(fd);
fd = -1;
out:
return fd;
}
static int connect_to_server(int server_fd)
{
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
int fd;
fd = socket(AF_INET6, SOCK_STREAM, 0);
if (fd == -1) {
log_err("Failed to create client socket");
goto out;
}
if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
log_err("Failed to get server addr");
goto close_out;
}
if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
log_err("Fail to connect to server");
goto close_out;
}
goto out;
close_out:
close(fd);
fd = -1;
out:
return fd;
}
static int validate_map(struct bpf_map *map, int client_fd)
{
__u32 cookie_expected_value;
struct sockaddr_in6 addr;
socklen_t len = sizeof(addr);
__u32 cookie_value;
__u64 cookie_key;
int err = 0;
int map_fd;
if (!map) {
log_err("Map not found in BPF object");
goto err;
}
map_fd = bpf_map__fd(map);
err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
if (err) {
log_err("Can't get cookie key from map");
goto out;
}
err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
if (err) {
log_err("Can't get cookie value from map");
goto out;
}
err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
if (err) {
log_err("Can't get client local addr");
goto out;
}
cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
if (cookie_value != cookie_expected_value) {
log_err("Unexpected value in map: %x != %x", cookie_value,
cookie_expected_value);
goto err;
}
goto out;
err:
err = -1;
out:
return err;
}
static int run_test(int cgfd)
{
enum bpf_attach_type attach_type;
struct bpf_prog_load_attr attr;
struct bpf_program *prog;
struct bpf_object *pobj;
const char *prog_name;
int server_fd = -1;
int client_fd = -1;
int prog_fd = -1;
int err = 0;
memset(&attr, 0, sizeof(attr));
attr.file = SOCKET_COOKIE_PROG;
attr.prog_type = BPF_PROG_TYPE_UNSPEC;
err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
if (err) {
log_err("Failed to load %s", attr.file);
goto out;
}
bpf_object__for_each_program(prog, pobj) {
prog_name = bpf_program__title(prog, /*needs_copy*/ false);
if (strcmp(prog_name, "cgroup/connect6") == 0) {
attach_type = BPF_CGROUP_INET6_CONNECT;
} else if (strcmp(prog_name, "sockops") == 0) {
attach_type = BPF_CGROUP_SOCK_OPS;
} else {
log_err("Unexpected prog: %s", prog_name);
goto err;
}
err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
BPF_F_ALLOW_OVERRIDE);
if (err) {
log_err("Failed to attach prog %s", prog_name);
goto out;
}
}
server_fd = start_server();
if (server_fd == -1)
goto err;
client_fd = connect_to_server(server_fd);
if (client_fd == -1)
goto err;
if (validate_map(bpf_map__next(NULL, pobj), client_fd))
goto err;
goto out;
err:
err = -1;
out:
close(client_fd);
close(server_fd);
bpf_object__close(pobj);
printf("%s\n", err ? "FAILED" : "PASSED");
return err;
}
int main(int argc, char **argv)
{
int cgfd = -1;
int err = 0;
if (setup_cgroup_environment())
goto err;
cgfd = create_and_get_cgroup(CG_PATH);
if (!cgfd)
goto err;
if (join_cgroup(CG_PATH))
goto err;
if (run_test(cgfd))
goto err;
goto out;
err:
err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
return err;
}

View File

@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_FIXUPS 8
#define MAX_NR_MAPS 7
#define MAX_NR_MAPS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@ -70,6 +70,7 @@ struct bpf_test {
int fixup_prog1[MAX_FIXUPS];
int fixup_prog2[MAX_FIXUPS];
int fixup_map_in_map[MAX_FIXUPS];
int fixup_cgroup_storage[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t retval;
@ -4630,6 +4631,121 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"valid cgroup storage access",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.fixup_cgroup_storage = { 1 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid cgroup storage access 1",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.fixup_map1 = { 1 },
.result = REJECT,
.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid cgroup storage access 2",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_LD_MAP_FD(BPF_REG_1, 1),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "fd 1 is not pointing to valid bpf_map",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid per-cgroup storage access 3",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_cgroup_storage = { 1 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=64 off=256 size=4",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid cgroup storage access 4",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
BPF_EXIT_INSN(),
},
.fixup_cgroup_storage = { 1 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=64 off=-2 size=4",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid cgroup storage access 5",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 7),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.fixup_cgroup_storage = { 1 },
.result = REJECT,
.errstr = "get_local_storage() doesn't support non-zero flags",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid cgroup storage access 6",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.fixup_cgroup_storage = { 1 },
.result = REJECT,
.errstr = "get_local_storage() doesn't support non-zero flags",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"multiple registers share map_lookup_elem result",
.insns = {
@ -6997,7 +7113,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem),
BPF_MOV64_REG(BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
@ -7020,7 +7136,7 @@ static struct bpf_test tests[] = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem),
BPF_MOV64_REG(BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
@ -7042,7 +7158,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem),
BPF_MOV64_REG(BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
@ -12372,6 +12488,32 @@ static struct bpf_test tests[] = {
.result = REJECT,
.errstr = "variable ctx access var_off=(0x0; 0x4)",
},
{
"mov64 src == dst",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_2),
// Check bounds are OK
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
{
"mov64 src != dst",
.insns = {
BPF_MOV64_IMM(BPF_REG_3, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
// Check bounds are OK
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
};
static int probe_filter_length(const struct bpf_insn *fp)
@ -12476,6 +12618,19 @@ static int create_map_in_map(void)
return outer_map_fd;
}
static int create_cgroup_storage(void)
{
int fd;
fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
sizeof(struct bpf_cgroup_storage_key),
TEST_DATA_LEN, 0, 0);
if (fd < 0)
printf("Failed to create array '%s'!\n", strerror(errno));
return fd;
}
static char bpf_vlog[UINT_MAX >> 8];
static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
@ -12488,6 +12643,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
int *fixup_prog1 = test->fixup_prog1;
int *fixup_prog2 = test->fixup_prog2;
int *fixup_map_in_map = test->fixup_map_in_map;
int *fixup_cgroup_storage = test->fixup_cgroup_storage;
if (test->fill_helper)
test->fill_helper(test);
@ -12555,6 +12711,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
fixup_map_in_map++;
} while (*fixup_map_in_map);
}
if (*fixup_cgroup_storage) {
map_fds[7] = create_cgroup_storage();
do {
prog[*fixup_cgroup_storage].imm = map_fds[7];
fixup_cgroup_storage++;
} while (*fixup_cgroup_storage);
}
}
static void do_test_single(struct bpf_test *test, bool unpriv,