Merge branch 'bpf-map-prealloc'
Alexei Starovoitov says: ==================== bpf: map pre-alloc v1->v2: . fix few issues spotted by Daniel . converted stackmap into pre-allocation as well . added a workaround for lockdep false positive . added pcpu_freelist_populate to be used by hashmap and stackmap this path set switches bpf hash map to use pre-allocation by default and introduces BPF_F_NO_PREALLOC flag to keep old behavior for cases where full map pre-allocation is too memory expensive. Some time back Daniel Wagner reported crashes when bpf hash map is used to compute time intervals between preempt_disable->preempt_enable and recently Tom Zanussi reported a dead lock in iovisor/bcc/funccount tool if it's used to count the number of invocations of kernel '*spin*' functions. Both problems are due to the recursive use of slub and can only be solved by pre-allocating all map elements. A lot of different solutions were considered. Many implemented, but at the end pre-allocation seems to be the only feasible answer. As far as pre-allocation goes it also was implemented 4 different ways: - simple free-list with single lock - percpu_ida with optimizations - blk-mq-tag variant customized for bpf use case - percpu_freelist For bpf style of alloc/free patterns percpu_freelist is the best and implemented in this patch set. Detailed performance numbers in patch 3. Patch 2 introduces percpu_freelist Patch 1 fixes simple deadlocks due to missing recursion checks Patch 5: converts stackmap to pre-allocation Patches 6-9: prepare test infra Patch 10: stress test for hash map infra. It attaches to spin_lock functions and bpf_map_update/delete are called from different contexts Patch 11: stress for bpf_get_stackid Patch 12: map performance test Reported-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Reported-by: Tom Zanussi <tom.zanussi@linux.intel.com> ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
f14b488d50
@ -10,6 +10,7 @@
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
struct bpf_map;
|
||||
|
||||
@ -36,6 +37,7 @@ struct bpf_map {
|
||||
u32 key_size;
|
||||
u32 value_size;
|
||||
u32 max_entries;
|
||||
u32 map_flags;
|
||||
u32 pages;
|
||||
struct user_struct *user;
|
||||
const struct bpf_map_ops *ops;
|
||||
@ -163,6 +165,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
|
||||
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||
|
||||
void bpf_register_prog_type(struct bpf_prog_type_list *tl);
|
||||
void bpf_register_map_type(struct bpf_map_type_list *tl);
|
||||
|
||||
@ -175,6 +179,7 @@ struct bpf_map *__bpf_map_get(struct fd f);
|
||||
void bpf_map_inc(struct bpf_map *map, bool uref);
|
||||
void bpf_map_put_with_uref(struct bpf_map *map);
|
||||
void bpf_map_put(struct bpf_map *map);
|
||||
int bpf_map_precharge_memlock(u32 pages);
|
||||
|
||||
extern int sysctl_unprivileged_bpf_disabled;
|
||||
|
||||
@ -190,6 +195,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
|
||||
u64 flags);
|
||||
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
|
||||
u64 flags);
|
||||
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
|
||||
|
||||
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
|
||||
* forced to use 'long' read/writes to try to atomically copy long counters.
|
||||
|
@ -101,12 +101,15 @@ enum bpf_prog_type {
|
||||
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
|
||||
#define BPF_EXIST 2 /* update existing element */
|
||||
|
||||
#define BPF_F_NO_PREALLOC (1U << 0)
|
||||
|
||||
union bpf_attr {
|
||||
struct { /* anonymous struct used by BPF_MAP_CREATE command */
|
||||
__u32 map_type; /* one of enum bpf_map_type */
|
||||
__u32 key_size; /* size of key in bytes */
|
||||
__u32 value_size; /* size of value in bytes */
|
||||
__u32 max_entries; /* max number of entries in a map */
|
||||
__u32 map_flags; /* prealloc or not */
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
|
||||
|
@ -1,7 +1,7 @@
|
||||
obj-y := core.o
|
||||
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
|
||||
ifeq ($(CONFIG_PERF_EVENTS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
|
||||
endif
|
||||
|
@ -53,7 +53,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
attr->value_size == 0)
|
||||
attr->value_size == 0 || attr->map_flags)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
|
||||
|
@ -1,4 +1,5 @@
|
||||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
||||
* Copyright (c) 2016 Facebook
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@ -13,6 +14,7 @@
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include "percpu_freelist.h"
|
||||
|
||||
struct bucket {
|
||||
struct hlist_head head;
|
||||
@ -22,6 +24,8 @@ struct bucket {
|
||||
struct bpf_htab {
|
||||
struct bpf_map map;
|
||||
struct bucket *buckets;
|
||||
void *elems;
|
||||
struct pcpu_freelist freelist;
|
||||
atomic_t count; /* number of elements in this hashtable */
|
||||
u32 n_buckets; /* number of hash buckets */
|
||||
u32 elem_size; /* size of each element in bytes */
|
||||
@ -29,15 +33,86 @@ struct bpf_htab {
|
||||
|
||||
/* each htab element is struct htab_elem + key + value */
|
||||
struct htab_elem {
|
||||
struct hlist_node hash_node;
|
||||
struct rcu_head rcu;
|
||||
union {
|
||||
u32 hash;
|
||||
u32 key_size;
|
||||
struct hlist_node hash_node;
|
||||
struct bpf_htab *htab;
|
||||
struct pcpu_freelist_node fnode;
|
||||
};
|
||||
struct rcu_head rcu;
|
||||
u32 hash;
|
||||
char key[0] __aligned(8);
|
||||
};
|
||||
|
||||
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
|
||||
void __percpu *pptr)
|
||||
{
|
||||
*(void __percpu **)(l->key + key_size) = pptr;
|
||||
}
|
||||
|
||||
static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
|
||||
{
|
||||
return *(void __percpu **)(l->key + key_size);
|
||||
}
|
||||
|
||||
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
|
||||
{
|
||||
return (struct htab_elem *) (htab->elems + i * htab->elem_size);
|
||||
}
|
||||
|
||||
static void htab_free_elems(struct bpf_htab *htab)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
|
||||
goto free_elems;
|
||||
|
||||
for (i = 0; i < htab->map.max_entries; i++) {
|
||||
void __percpu *pptr;
|
||||
|
||||
pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
|
||||
htab->map.key_size);
|
||||
free_percpu(pptr);
|
||||
}
|
||||
free_elems:
|
||||
vfree(htab->elems);
|
||||
}
|
||||
|
||||
static int prealloc_elems_and_freelist(struct bpf_htab *htab)
|
||||
{
|
||||
int err = -ENOMEM, i;
|
||||
|
||||
htab->elems = vzalloc(htab->elem_size * htab->map.max_entries);
|
||||
if (!htab->elems)
|
||||
return -ENOMEM;
|
||||
|
||||
if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
|
||||
goto skip_percpu_elems;
|
||||
|
||||
for (i = 0; i < htab->map.max_entries; i++) {
|
||||
u32 size = round_up(htab->map.value_size, 8);
|
||||
void __percpu *pptr;
|
||||
|
||||
pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
|
||||
if (!pptr)
|
||||
goto free_elems;
|
||||
htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
|
||||
pptr);
|
||||
}
|
||||
|
||||
skip_percpu_elems:
|
||||
err = pcpu_freelist_init(&htab->freelist);
|
||||
if (err)
|
||||
goto free_elems;
|
||||
|
||||
pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
|
||||
htab->map.max_entries);
|
||||
return 0;
|
||||
|
||||
free_elems:
|
||||
htab_free_elems(htab);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Called from syscall */
|
||||
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
@ -46,6 +121,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
int err, i;
|
||||
u64 cost;
|
||||
|
||||
if (attr->map_flags & ~BPF_F_NO_PREALLOC)
|
||||
/* reserved bits should not be used */
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
htab = kzalloc(sizeof(*htab), GFP_USER);
|
||||
if (!htab)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -55,6 +134,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
htab->map.key_size = attr->key_size;
|
||||
htab->map.value_size = attr->value_size;
|
||||
htab->map.max_entries = attr->max_entries;
|
||||
htab->map.map_flags = attr->map_flags;
|
||||
|
||||
/* check sanity of attributes.
|
||||
* value_size == 0 may be allowed in the future to use map as a set
|
||||
@ -92,7 +172,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
if (percpu)
|
||||
htab->elem_size += sizeof(void *);
|
||||
else
|
||||
htab->elem_size += htab->map.value_size;
|
||||
htab->elem_size += round_up(htab->map.value_size, 8);
|
||||
|
||||
/* prevent zero size kmalloc and check for u32 overflow */
|
||||
if (htab->n_buckets == 0 ||
|
||||
@ -112,6 +192,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
|
||||
htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
/* if map size is larger than memlock limit, reject it early */
|
||||
err = bpf_map_precharge_memlock(htab->map.pages);
|
||||
if (err)
|
||||
goto free_htab;
|
||||
|
||||
err = -ENOMEM;
|
||||
htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket),
|
||||
GFP_USER | __GFP_NOWARN);
|
||||
@ -127,10 +212,16 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
raw_spin_lock_init(&htab->buckets[i].lock);
|
||||
}
|
||||
|
||||
atomic_set(&htab->count, 0);
|
||||
if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
|
||||
err = prealloc_elems_and_freelist(htab);
|
||||
if (err)
|
||||
goto free_buckets;
|
||||
}
|
||||
|
||||
return &htab->map;
|
||||
|
||||
free_buckets:
|
||||
kvfree(htab->buckets);
|
||||
free_htab:
|
||||
kfree(htab);
|
||||
return ERR_PTR(err);
|
||||
@ -249,42 +340,42 @@ find_first_elem:
|
||||
}
|
||||
}
|
||||
|
||||
/* itereated over all buckets and all elements */
|
||||
/* iterated over all buckets and all elements */
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
|
||||
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
|
||||
void __percpu *pptr)
|
||||
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
|
||||
{
|
||||
*(void __percpu **)(l->key + key_size) = pptr;
|
||||
}
|
||||
|
||||
static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
|
||||
{
|
||||
return *(void __percpu **)(l->key + key_size);
|
||||
}
|
||||
|
||||
static void htab_percpu_elem_free(struct htab_elem *l)
|
||||
{
|
||||
free_percpu(htab_elem_get_ptr(l, l->key_size));
|
||||
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
|
||||
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
|
||||
kfree(l);
|
||||
|
||||
}
|
||||
|
||||
static void htab_percpu_elem_free_rcu(struct rcu_head *head)
|
||||
static void htab_elem_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct htab_elem *l = container_of(head, struct htab_elem, rcu);
|
||||
struct bpf_htab *htab = l->htab;
|
||||
|
||||
htab_percpu_elem_free(l);
|
||||
/* must increment bpf_prog_active to avoid kprobe+bpf triggering while
|
||||
* we're calling kfree, otherwise deadlock is possible if kprobes
|
||||
* are placed somewhere inside of slub
|
||||
*/
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bpf_prog_active);
|
||||
htab_elem_free(htab, l);
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size)
|
||||
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||
{
|
||||
if (percpu) {
|
||||
l->key_size = key_size;
|
||||
call_rcu(&l->rcu, htab_percpu_elem_free_rcu);
|
||||
if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
|
||||
pcpu_freelist_push(&htab->freelist, &l->fnode);
|
||||
} else {
|
||||
kfree_rcu(l, rcu);
|
||||
atomic_dec(&htab->count);
|
||||
l->htab = htab;
|
||||
call_rcu(&l->rcu, htab_elem_free_rcu);
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,23 +384,39 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
bool percpu, bool onallcpus)
|
||||
{
|
||||
u32 size = htab->map.value_size;
|
||||
bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
||||
struct htab_elem *l_new;
|
||||
void __percpu *pptr;
|
||||
|
||||
l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!l_new)
|
||||
return NULL;
|
||||
if (prealloc) {
|
||||
l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
|
||||
if (!l_new)
|
||||
return ERR_PTR(-E2BIG);
|
||||
} else {
|
||||
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
|
||||
atomic_dec(&htab->count);
|
||||
return ERR_PTR(-E2BIG);
|
||||
}
|
||||
l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!l_new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
memcpy(l_new->key, key, key_size);
|
||||
if (percpu) {
|
||||
/* round up value_size to 8 bytes */
|
||||
size = round_up(size, 8);
|
||||
|
||||
/* alloc_percpu zero-fills */
|
||||
pptr = __alloc_percpu_gfp(size, 8, GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!pptr) {
|
||||
kfree(l_new);
|
||||
return NULL;
|
||||
if (prealloc) {
|
||||
pptr = htab_elem_get_ptr(l_new, key_size);
|
||||
} else {
|
||||
/* alloc_percpu zero-fills */
|
||||
pptr = __alloc_percpu_gfp(size, 8,
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!pptr) {
|
||||
kfree(l_new);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
}
|
||||
|
||||
if (!onallcpus) {
|
||||
@ -324,7 +431,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
off += size;
|
||||
}
|
||||
}
|
||||
htab_elem_set_ptr(l_new, key_size, pptr);
|
||||
if (!prealloc)
|
||||
htab_elem_set_ptr(l_new, key_size, pptr);
|
||||
} else {
|
||||
memcpy(l_new->key + round_up(key_size, 8), value, size);
|
||||
}
|
||||
@ -336,12 +444,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
|
||||
u64 map_flags)
|
||||
{
|
||||
if (!l_old && unlikely(atomic_read(&htab->count) >= htab->map.max_entries))
|
||||
/* if elem with this 'key' doesn't exist and we've reached
|
||||
* max_entries limit, fail insertion of new elem
|
||||
*/
|
||||
return -E2BIG;
|
||||
|
||||
if (l_old && map_flags == BPF_NOEXIST)
|
||||
/* elem already exists */
|
||||
return -EEXIST;
|
||||
@ -375,13 +477,6 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
|
||||
hash = htab_map_hash(key, key_size);
|
||||
|
||||
/* allocate new element outside of the lock, since
|
||||
* we're most likley going to insert it
|
||||
*/
|
||||
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
|
||||
if (!l_new)
|
||||
return -ENOMEM;
|
||||
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
@ -394,21 +489,24 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
|
||||
if (IS_ERR(l_new)) {
|
||||
/* all pre-allocated elements are in use or memory exhausted */
|
||||
ret = PTR_ERR(l_new);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* add new element to the head of the list, so that
|
||||
* concurrent search will find it before old elem
|
||||
*/
|
||||
hlist_add_head_rcu(&l_new->hash_node, head);
|
||||
if (l_old) {
|
||||
hlist_del_rcu(&l_old->hash_node);
|
||||
kfree_rcu(l_old, rcu);
|
||||
} else {
|
||||
atomic_inc(&htab->count);
|
||||
free_htab_elem(htab, l_old);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
return 0;
|
||||
ret = 0;
|
||||
err:
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
kfree(l_new);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -466,12 +564,11 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
} else {
|
||||
l_new = alloc_htab_elem(htab, key, value, key_size,
|
||||
hash, true, onallcpus);
|
||||
if (!l_new) {
|
||||
ret = -ENOMEM;
|
||||
if (IS_ERR(l_new)) {
|
||||
ret = PTR_ERR(l_new);
|
||||
goto err;
|
||||
}
|
||||
hlist_add_head_rcu(&l_new->hash_node, head);
|
||||
atomic_inc(&htab->count);
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
@ -489,7 +586,6 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_HASH;
|
||||
struct hlist_head *head;
|
||||
struct bucket *b;
|
||||
struct htab_elem *l;
|
||||
@ -511,8 +607,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
|
||||
if (l) {
|
||||
hlist_del_rcu(&l->hash_node);
|
||||
atomic_dec(&htab->count);
|
||||
free_htab_elem(l, percpu, key_size);
|
||||
free_htab_elem(htab, l);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@ -531,17 +626,10 @@ static void delete_all_elements(struct bpf_htab *htab)
|
||||
|
||||
hlist_for_each_entry_safe(l, n, head, hash_node) {
|
||||
hlist_del_rcu(&l->hash_node);
|
||||
atomic_dec(&htab->count);
|
||||
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) {
|
||||
l->key_size = htab->map.key_size;
|
||||
htab_percpu_elem_free(l);
|
||||
} else {
|
||||
kfree(l);
|
||||
}
|
||||
htab_elem_free(htab, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||
static void htab_map_free(struct bpf_map *map)
|
||||
{
|
||||
@ -554,10 +642,16 @@ static void htab_map_free(struct bpf_map *map)
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
/* some of kfree_rcu() callbacks for elements of this map may not have
|
||||
* executed. It's ok. Proceed to free residual elements and map itself
|
||||
/* some of free_htab_elem() callbacks for elements of this map may
|
||||
* not have executed. Wait for them.
|
||||
*/
|
||||
delete_all_elements(htab);
|
||||
rcu_barrier();
|
||||
if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
|
||||
delete_all_elements(htab);
|
||||
} else {
|
||||
htab_free_elems(htab);
|
||||
pcpu_freelist_destroy(&htab->freelist);
|
||||
}
|
||||
kvfree(htab->buckets);
|
||||
kfree(htab);
|
||||
}
|
||||
|
100
kernel/bpf/percpu_freelist.c
Normal file
100
kernel/bpf/percpu_freelist.c
Normal file
@ -0,0 +1,100 @@
|
||||
/* Copyright (c) 2016 Facebook
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include "percpu_freelist.h"
|
||||
|
||||
int pcpu_freelist_init(struct pcpu_freelist *s)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
s->freelist = alloc_percpu(struct pcpu_freelist_head);
|
||||
if (!s->freelist)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu);
|
||||
|
||||
raw_spin_lock_init(&head->lock);
|
||||
head->first = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pcpu_freelist_destroy(struct pcpu_freelist *s)
|
||||
{
|
||||
free_percpu(s->freelist);
|
||||
}
|
||||
|
||||
static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
|
||||
struct pcpu_freelist_node *node)
|
||||
{
|
||||
raw_spin_lock(&head->lock);
|
||||
node->next = head->first;
|
||||
head->first = node;
|
||||
raw_spin_unlock(&head->lock);
|
||||
}
|
||||
|
||||
void pcpu_freelist_push(struct pcpu_freelist *s,
|
||||
struct pcpu_freelist_node *node)
|
||||
{
|
||||
struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
|
||||
|
||||
__pcpu_freelist_push(head, node);
|
||||
}
|
||||
|
||||
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
|
||||
u32 nr_elems)
|
||||
{
|
||||
struct pcpu_freelist_head *head;
|
||||
unsigned long flags;
|
||||
int i, cpu, pcpu_entries;
|
||||
|
||||
pcpu_entries = nr_elems / num_possible_cpus() + 1;
|
||||
i = 0;
|
||||
|
||||
/* disable irq to workaround lockdep false positive
|
||||
* in bpf usage pcpu_freelist_populate() will never race
|
||||
* with pcpu_freelist_push()
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
for_each_possible_cpu(cpu) {
|
||||
again:
|
||||
head = per_cpu_ptr(s->freelist, cpu);
|
||||
__pcpu_freelist_push(head, buf);
|
||||
i++;
|
||||
buf += elem_size;
|
||||
if (i == nr_elems)
|
||||
break;
|
||||
if (i % pcpu_entries)
|
||||
goto again;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||
{
|
||||
struct pcpu_freelist_head *head;
|
||||
struct pcpu_freelist_node *node;
|
||||
int orig_cpu, cpu;
|
||||
|
||||
orig_cpu = cpu = raw_smp_processor_id();
|
||||
while (1) {
|
||||
head = per_cpu_ptr(s->freelist, cpu);
|
||||
raw_spin_lock(&head->lock);
|
||||
node = head->first;
|
||||
if (node) {
|
||||
head->first = node->next;
|
||||
raw_spin_unlock(&head->lock);
|
||||
return node;
|
||||
}
|
||||
raw_spin_unlock(&head->lock);
|
||||
cpu = cpumask_next(cpu, cpu_possible_mask);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
cpu = 0;
|
||||
if (cpu == orig_cpu)
|
||||
return NULL;
|
||||
}
|
||||
}
|
31
kernel/bpf/percpu_freelist.h
Normal file
31
kernel/bpf/percpu_freelist.h
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2016 Facebook
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef __PERCPU_FREELIST_H__
|
||||
#define __PERCPU_FREELIST_H__
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
struct pcpu_freelist_head {
|
||||
struct pcpu_freelist_node *first;
|
||||
raw_spinlock_t lock;
|
||||
};
|
||||
|
||||
struct pcpu_freelist {
|
||||
struct pcpu_freelist_head __percpu *freelist;
|
||||
};
|
||||
|
||||
struct pcpu_freelist_node {
|
||||
struct pcpu_freelist_node *next;
|
||||
};
|
||||
|
||||
void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
|
||||
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
|
||||
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
|
||||
u32 nr_elems);
|
||||
int pcpu_freelist_init(struct pcpu_freelist *);
|
||||
void pcpu_freelist_destroy(struct pcpu_freelist *s);
|
||||
#endif
|
@ -10,9 +10,10 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include "percpu_freelist.h"
|
||||
|
||||
struct stack_map_bucket {
|
||||
struct rcu_head rcu;
|
||||
struct pcpu_freelist_node fnode;
|
||||
u32 hash;
|
||||
u32 nr;
|
||||
u64 ip[];
|
||||
@ -20,10 +21,34 @@ struct stack_map_bucket {
|
||||
|
||||
struct bpf_stack_map {
|
||||
struct bpf_map map;
|
||||
void *elems;
|
||||
struct pcpu_freelist freelist;
|
||||
u32 n_buckets;
|
||||
struct stack_map_bucket __rcu *buckets[];
|
||||
struct stack_map_bucket *buckets[];
|
||||
};
|
||||
|
||||
static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
|
||||
{
|
||||
u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
|
||||
int err;
|
||||
|
||||
smap->elems = vzalloc(elem_size * smap->map.max_entries);
|
||||
if (!smap->elems)
|
||||
return -ENOMEM;
|
||||
|
||||
err = pcpu_freelist_init(&smap->freelist);
|
||||
if (err)
|
||||
goto free_elems;
|
||||
|
||||
pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
|
||||
smap->map.max_entries);
|
||||
return 0;
|
||||
|
||||
free_elems:
|
||||
vfree(smap->elems);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Called from syscall */
|
||||
static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
@ -35,6 +60,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (attr->map_flags)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
value_size < 8 || value_size % 8 ||
|
||||
@ -67,12 +95,22 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
|
||||
smap->n_buckets = n_buckets;
|
||||
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
err = bpf_map_precharge_memlock(smap->map.pages);
|
||||
if (err)
|
||||
goto free_smap;
|
||||
|
||||
err = get_callchain_buffers();
|
||||
if (err)
|
||||
goto free_smap;
|
||||
|
||||
err = prealloc_elems_and_freelist(smap);
|
||||
if (err)
|
||||
goto put_buffers;
|
||||
|
||||
return &smap->map;
|
||||
|
||||
put_buffers:
|
||||
put_callchain_buffers();
|
||||
free_smap:
|
||||
kvfree(smap);
|
||||
return ERR_PTR(err);
|
||||
@ -118,7 +156,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||
ips = trace->ip + skip + init_nr;
|
||||
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
|
||||
id = hash & (smap->n_buckets - 1);
|
||||
bucket = rcu_dereference(smap->buckets[id]);
|
||||
bucket = READ_ONCE(smap->buckets[id]);
|
||||
|
||||
if (bucket && bucket->hash == hash) {
|
||||
if (flags & BPF_F_FAST_STACK_CMP)
|
||||
@ -132,19 +170,18 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||
if (bucket && !(flags & BPF_F_REUSE_STACKID))
|
||||
return -EEXIST;
|
||||
|
||||
new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size,
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
new_bucket = (struct stack_map_bucket *)
|
||||
pcpu_freelist_pop(&smap->freelist);
|
||||
if (unlikely(!new_bucket))
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(new_bucket->ip, ips, trace_len);
|
||||
memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len);
|
||||
new_bucket->hash = hash;
|
||||
new_bucket->nr = trace_nr;
|
||||
|
||||
old_bucket = xchg(&smap->buckets[id], new_bucket);
|
||||
if (old_bucket)
|
||||
kfree_rcu(old_bucket, rcu);
|
||||
pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
|
||||
return id;
|
||||
}
|
||||
|
||||
@ -157,17 +194,34 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
/* Called from syscall or from eBPF program */
|
||||
/* Called from eBPF program */
|
||||
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Called from syscall */
|
||||
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
|
||||
{
|
||||
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
|
||||
struct stack_map_bucket *bucket;
|
||||
u32 id = *(u32 *)key;
|
||||
struct stack_map_bucket *bucket, *old_bucket;
|
||||
u32 id = *(u32 *)key, trace_len;
|
||||
|
||||
if (unlikely(id >= smap->n_buckets))
|
||||
return NULL;
|
||||
bucket = rcu_dereference(smap->buckets[id]);
|
||||
return bucket ? bucket->ip : NULL;
|
||||
return -ENOENT;
|
||||
|
||||
bucket = xchg(&smap->buckets[id], NULL);
|
||||
if (!bucket)
|
||||
return -ENOENT;
|
||||
|
||||
trace_len = bucket->nr * sizeof(u64);
|
||||
memcpy(value, bucket->ip, trace_len);
|
||||
memset(value + trace_len, 0, map->value_size - trace_len);
|
||||
|
||||
old_bucket = xchg(&smap->buckets[id], bucket);
|
||||
if (old_bucket)
|
||||
pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
@ -193,7 +247,7 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
|
||||
|
||||
old_bucket = xchg(&smap->buckets[id], NULL);
|
||||
if (old_bucket) {
|
||||
kfree_rcu(old_bucket, rcu);
|
||||
pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
|
||||
return 0;
|
||||
} else {
|
||||
return -ENOENT;
|
||||
@ -204,13 +258,12 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
|
||||
static void stack_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
|
||||
int i;
|
||||
|
||||
/* wait for bpf programs to complete before freeing stack map */
|
||||
synchronize_rcu();
|
||||
|
||||
for (i = 0; i < smap->n_buckets; i++)
|
||||
if (smap->buckets[i])
|
||||
kfree_rcu(smap->buckets[i], rcu);
|
||||
vfree(smap->elems);
|
||||
pcpu_freelist_destroy(&smap->freelist);
|
||||
kvfree(smap);
|
||||
put_callchain_buffers();
|
||||
}
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <linux/filter.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
DEFINE_PER_CPU(int, bpf_prog_active);
|
||||
|
||||
int sysctl_unprivileged_bpf_disabled __read_mostly;
|
||||
|
||||
static LIST_HEAD(bpf_map_types);
|
||||
@ -46,6 +48,19 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
|
||||
list_add(&tl->list_node, &bpf_map_types);
|
||||
}
|
||||
|
||||
int bpf_map_precharge_memlock(u32 pages)
|
||||
{
|
||||
struct user_struct *user = get_current_user();
|
||||
unsigned long memlock_limit, cur;
|
||||
|
||||
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
cur = atomic_long_read(&user->locked_vm);
|
||||
free_uid(user);
|
||||
if (cur + pages > memlock_limit)
|
||||
return -EPERM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_map_charge_memlock(struct bpf_map *map)
|
||||
{
|
||||
struct user_struct *user = get_current_user();
|
||||
@ -151,7 +166,7 @@ int bpf_map_new_fd(struct bpf_map *map)
|
||||
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
|
||||
sizeof(attr->CMD##_LAST_FIELD)) != NULL
|
||||
|
||||
#define BPF_MAP_CREATE_LAST_FIELD max_entries
|
||||
#define BPF_MAP_CREATE_LAST_FIELD map_flags
|
||||
/* called via syscall */
|
||||
static int map_create(union bpf_attr *attr)
|
||||
{
|
||||
@ -275,6 +290,8 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
err = bpf_percpu_hash_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
err = bpf_percpu_array_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
||||
err = bpf_stackmap_copy(map, key, value);
|
||||
} else {
|
||||
rcu_read_lock();
|
||||
ptr = map->ops->map_lookup_elem(map, key);
|
||||
@ -347,6 +364,11 @@ static int map_update_elem(union bpf_attr *attr)
|
||||
if (copy_from_user(value, uvalue, value_size) != 0)
|
||||
goto free_value;
|
||||
|
||||
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
|
||||
* inside bpf map update or delete otherwise deadlocks are possible
|
||||
*/
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bpf_prog_active);
|
||||
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
|
||||
err = bpf_percpu_hash_update(map, key, value, attr->flags);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
@ -356,6 +378,8 @@ static int map_update_elem(union bpf_attr *attr)
|
||||
err = map->ops->map_update_elem(map, key, value, attr->flags);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
preempt_enable();
|
||||
|
||||
free_value:
|
||||
kfree(value);
|
||||
@ -394,9 +418,13 @@ static int map_delete_elem(union bpf_attr *attr)
|
||||
if (copy_from_user(key, ukey, map->key_size) != 0)
|
||||
goto free_key;
|
||||
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bpf_prog_active);
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_delete_elem(map, key);
|
||||
rcu_read_unlock();
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
preempt_enable();
|
||||
|
||||
free_key:
|
||||
kfree(key);
|
||||
|
@ -13,8 +13,6 @@
|
||||
#include <linux/ctype.h>
|
||||
#include "trace.h"
|
||||
|
||||
static DEFINE_PER_CPU(int, bpf_prog_active);
|
||||
|
||||
/**
|
||||
* trace_call_bpf - invoke BPF program
|
||||
* @prog: BPF program
|
||||
|
@ -61,6 +61,7 @@ struct bpf_map_def {
|
||||
unsigned int key_size;
|
||||
unsigned int value_size;
|
||||
unsigned int max_entries;
|
||||
unsigned int map_flags;
|
||||
};
|
||||
|
||||
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
|
||||
|
@ -157,9 +157,13 @@ static int load_maps(struct bpf_map_def *maps, int len)
|
||||
map_fd[i] = bpf_create_map(maps[i].type,
|
||||
maps[i].key_size,
|
||||
maps[i].value_size,
|
||||
maps[i].max_entries);
|
||||
if (map_fd[i] < 0)
|
||||
maps[i].max_entries,
|
||||
maps[i].map_flags);
|
||||
if (map_fd[i] < 0) {
|
||||
printf("failed to create a map: %d %s\n",
|
||||
errno, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
|
||||
prog_array_fd = map_fd[i];
|
||||
@ -343,3 +347,65 @@ void read_trace_pipe(void)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_SYMS 300000
|
||||
static struct ksym syms[MAX_SYMS];
|
||||
static int sym_cnt;
|
||||
|
||||
static int ksym_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
|
||||
}
|
||||
|
||||
int load_kallsyms(void)
|
||||
{
|
||||
FILE *f = fopen("/proc/kallsyms", "r");
|
||||
char func[256], buf[256];
|
||||
char symbol;
|
||||
void *addr;
|
||||
int i = 0;
|
||||
|
||||
if (!f)
|
||||
return -ENOENT;
|
||||
|
||||
while (!feof(f)) {
|
||||
if (!fgets(buf, sizeof(buf), f))
|
||||
break;
|
||||
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
|
||||
break;
|
||||
if (!addr)
|
||||
continue;
|
||||
syms[i].addr = (long) addr;
|
||||
syms[i].name = strdup(func);
|
||||
i++;
|
||||
}
|
||||
sym_cnt = i;
|
||||
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ksym *ksym_search(long key)
|
||||
{
|
||||
int start = 0, end = sym_cnt;
|
||||
int result;
|
||||
|
||||
while (start < end) {
|
||||
size_t mid = start + (end - start) / 2;
|
||||
|
||||
result = key - syms[mid].addr;
|
||||
if (result < 0)
|
||||
end = mid;
|
||||
else if (result > 0)
|
||||
start = mid + 1;
|
||||
else
|
||||
return &syms[mid];
|
||||
}
|
||||
|
||||
if (start >= 1 && syms[start - 1].addr < key &&
|
||||
key < syms[start].addr)
|
||||
/* valid ksym */
|
||||
return &syms[start - 1];
|
||||
|
||||
/* out of range. return _stext */
|
||||
return &syms[0];
|
||||
}
|
||||
|
@ -23,5 +23,11 @@ extern int event_fd[MAX_PROGS];
|
||||
int load_bpf_file(char *path);
|
||||
|
||||
void read_trace_pipe(void);
|
||||
struct ksym {
|
||||
long addr;
|
||||
char *name;
|
||||
};
|
||||
|
||||
int load_kallsyms(void);
|
||||
struct ksym *ksym_search(long key);
|
||||
#endif
|
||||
|
@ -44,7 +44,7 @@ static void usage(void)
|
||||
static int bpf_map_create(void)
|
||||
{
|
||||
return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
|
||||
sizeof(uint32_t), 1024);
|
||||
sizeof(uint32_t), 1024, 0);
|
||||
}
|
||||
|
||||
static int bpf_prog_create(const char *object)
|
||||
|
@ -19,13 +19,14 @@ static __u64 ptr_to_u64(void *ptr)
|
||||
}
|
||||
|
||||
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
|
||||
int max_entries)
|
||||
int max_entries, int map_flags)
|
||||
{
|
||||
union bpf_attr attr = {
|
||||
.map_type = map_type,
|
||||
.key_size = key_size,
|
||||
.value_size = value_size,
|
||||
.max_entries = max_entries
|
||||
.max_entries = max_entries,
|
||||
.map_flags = map_flags,
|
||||
};
|
||||
|
||||
return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
|
||||
|
@ -5,7 +5,7 @@
|
||||
struct bpf_insn;
|
||||
|
||||
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
|
||||
int max_entries);
|
||||
int max_entries, int map_flags);
|
||||
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
|
||||
int bpf_lookup_elem(int fd, void *key, void *value);
|
||||
int bpf_delete_elem(int fd, void *key);
|
||||
|
@ -18,80 +18,15 @@
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
|
||||
#define MAX_SYMS 300000
|
||||
#define PRINT_RAW_ADDR 0
|
||||
|
||||
static struct ksym {
|
||||
long addr;
|
||||
char *name;
|
||||
} syms[MAX_SYMS];
|
||||
static int sym_cnt;
|
||||
|
||||
static int ksym_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
|
||||
}
|
||||
|
||||
static int load_kallsyms(void)
|
||||
{
|
||||
FILE *f = fopen("/proc/kallsyms", "r");
|
||||
char func[256], buf[256];
|
||||
char symbol;
|
||||
void *addr;
|
||||
int i = 0;
|
||||
|
||||
if (!f)
|
||||
return -ENOENT;
|
||||
|
||||
while (!feof(f)) {
|
||||
if (!fgets(buf, sizeof(buf), f))
|
||||
break;
|
||||
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
|
||||
break;
|
||||
if (!addr)
|
||||
continue;
|
||||
syms[i].addr = (long) addr;
|
||||
syms[i].name = strdup(func);
|
||||
i++;
|
||||
}
|
||||
sym_cnt = i;
|
||||
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *search(long key)
|
||||
{
|
||||
int start = 0, end = sym_cnt;
|
||||
int result;
|
||||
|
||||
while (start < end) {
|
||||
size_t mid = start + (end - start) / 2;
|
||||
|
||||
result = key - syms[mid].addr;
|
||||
if (result < 0)
|
||||
end = mid;
|
||||
else if (result > 0)
|
||||
start = mid + 1;
|
||||
else
|
||||
return &syms[mid];
|
||||
}
|
||||
|
||||
if (start >= 1 && syms[start - 1].addr < key &&
|
||||
key < syms[start].addr)
|
||||
/* valid ksym */
|
||||
return &syms[start - 1];
|
||||
|
||||
/* out of range. return _stext */
|
||||
return &syms[0];
|
||||
}
|
||||
|
||||
static void print_ksym(__u64 addr)
|
||||
{
|
||||
struct ksym *sym;
|
||||
|
||||
if (!addr)
|
||||
return;
|
||||
sym = search(addr);
|
||||
sym = ksym_search(addr);
|
||||
if (PRINT_RAW_ADDR)
|
||||
printf("%s/%llx;", sym->name, addr);
|
||||
else
|
||||
|
@ -34,7 +34,7 @@ static int test_sock(void)
|
||||
long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
|
||||
256);
|
||||
256, 0);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create map '%s'\n", strerror(errno));
|
||||
goto cleanup;
|
||||
|
@ -2,6 +2,7 @@
|
||||
* Testsuite for eBPF maps
|
||||
*
|
||||
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
|
||||
* Copyright (c) 2016 Facebook
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@ -17,13 +18,16 @@
|
||||
#include <stdlib.h>
|
||||
#include "libbpf.h"
|
||||
|
||||
static int map_flags;
|
||||
|
||||
/* sanity tests for map API */
|
||||
static void test_hashmap_sanity(int i, void *data)
|
||||
{
|
||||
long long key, next_key, value;
|
||||
int map_fd;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2);
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
|
||||
2, map_flags);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create hashmap '%s'\n", strerror(errno));
|
||||
exit(1);
|
||||
@ -99,7 +103,7 @@ static void test_percpu_hashmap_sanity(int task, void *data)
|
||||
int map_fd, i;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
|
||||
sizeof(value[0]), 2);
|
||||
sizeof(value[0]), 2, map_flags);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create hashmap '%s'\n", strerror(errno));
|
||||
exit(1);
|
||||
@ -188,7 +192,8 @@ static void test_arraymap_sanity(int i, void *data)
|
||||
int key, next_key, map_fd;
|
||||
long long value;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2);
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
|
||||
2, 0);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create arraymap '%s'\n", strerror(errno));
|
||||
exit(1);
|
||||
@ -244,7 +249,7 @@ static void test_percpu_arraymap_many_keys(void)
|
||||
int key, map_fd, i;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
|
||||
sizeof(values[0]), nr_keys);
|
||||
sizeof(values[0]), nr_keys, 0);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create per-cpu arraymap '%s'\n",
|
||||
strerror(errno));
|
||||
@ -275,7 +280,7 @@ static void test_percpu_arraymap_sanity(int i, void *data)
|
||||
int key, next_key, map_fd;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
|
||||
sizeof(values[0]), 2);
|
||||
sizeof(values[0]), 2, 0);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create arraymap '%s'\n", strerror(errno));
|
||||
exit(1);
|
||||
@ -336,7 +341,7 @@ static void test_map_large(void)
|
||||
|
||||
/* allocate 4Mbyte of memory */
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
|
||||
MAP_SIZE);
|
||||
MAP_SIZE, map_flags);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create large map '%s'\n", strerror(errno));
|
||||
exit(1);
|
||||
@ -421,7 +426,7 @@ static void test_map_parallel(void)
|
||||
int data[2];
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
|
||||
MAP_SIZE);
|
||||
MAP_SIZE, map_flags);
|
||||
if (map_fd < 0) {
|
||||
printf("failed to create map for parallel test '%s'\n",
|
||||
strerror(errno));
|
||||
@ -463,7 +468,7 @@ static void test_map_parallel(void)
|
||||
assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
static void run_all_tests(void)
|
||||
{
|
||||
test_hashmap_sanity(0, NULL);
|
||||
test_percpu_hashmap_sanity(0, NULL);
|
||||
@ -474,6 +479,14 @@ int main(void)
|
||||
test_map_large();
|
||||
test_map_parallel();
|
||||
test_map_stress();
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
map_flags = 0;
|
||||
run_all_tests();
|
||||
map_flags = BPF_F_NO_PREALLOC;
|
||||
run_all_tests();
|
||||
printf("test_maps: OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -1198,7 +1198,7 @@ static int create_map(void)
|
||||
int map_fd;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
|
||||
sizeof(long long), sizeof(long long), 1024);
|
||||
sizeof(long long), sizeof(long long), 1024, 0);
|
||||
if (map_fd < 0)
|
||||
printf("failed to create map '%s'\n", strerror(errno));
|
||||
|
||||
@ -1210,7 +1210,7 @@ static int create_prog_array(void)
|
||||
int map_fd;
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
|
||||
sizeof(int), sizeof(int), 4);
|
||||
sizeof(int), sizeof(int), 4, 0);
|
||||
if (map_fd < 0)
|
||||
printf("failed to create prog_array '%s'\n", strerror(errno));
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user