Merge branch 'bpf-persistent'

Daniel Borkmann says:

====================
BPF updates

This set adds support for persistent maps/progs. Please see
individual patches for further details. A man-page update
to bpf(2) will be sent later on, also a iproute2 patch for
support in tc.

v1 -> v2:
  - Reworked most of patch 4 and 5
  - Rebased to latest net-next
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-11-02 22:48:39 -05:00
commit 12d4309636
12 changed files with 683 additions and 72 deletions

View File

@ -167,11 +167,18 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_put_rcu(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog);
struct bpf_map *bpf_map_get(struct fd f); struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_put(struct bpf_map *map); void bpf_map_put(struct bpf_map *map);
extern int sysctl_unprivileged_bpf_disabled; extern int sysctl_unprivileged_bpf_disabled;
int bpf_map_new_fd(struct bpf_map *map);
int bpf_prog_new_fd(struct bpf_prog *prog);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname);
/* verify correctness of eBPF program */ /* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
#else #else

View File

@ -63,50 +63,16 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */ __s32 imm; /* signed immediate constant */
}; };
/* BPF syscall commands */ /* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd { enum bpf_cmd {
/* create a map with given type and attributes
* fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
* returns fd or negative error
* map is deleted when fd is closed
*/
BPF_MAP_CREATE, BPF_MAP_CREATE,
/* lookup key in a given map
* err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
* Using attr->map_fd, attr->key, attr->value
* returns zero and stores found elem into value
* or negative error
*/
BPF_MAP_LOOKUP_ELEM, BPF_MAP_LOOKUP_ELEM,
/* create or update key/value pair in a given map
* err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
* Using attr->map_fd, attr->key, attr->value, attr->flags
* returns zero or negative error
*/
BPF_MAP_UPDATE_ELEM, BPF_MAP_UPDATE_ELEM,
/* find and delete elem by key in a given map
* err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
* Using attr->map_fd, attr->key
* returns zero or negative error
*/
BPF_MAP_DELETE_ELEM, BPF_MAP_DELETE_ELEM,
/* lookup key in a given map and return next key
* err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
* Using attr->map_fd, attr->key, attr->next_key
* returns zero and stores next key or negative error
*/
BPF_MAP_GET_NEXT_KEY, BPF_MAP_GET_NEXT_KEY,
/* verify and load eBPF program
* prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
* Using attr->prog_type, attr->insns, attr->license
* returns fd or negative error
*/
BPF_PROG_LOAD, BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
}; };
enum bpf_map_type { enum bpf_map_type {
@ -160,6 +126,11 @@ union bpf_attr {
__aligned_u64 log_buf; /* user supplied buffer */ __aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */ __u32 kern_version; /* checked when prog_type=kprobe */
}; };
struct { /* anonymous struct used by BPF_OBJ_* commands */
__aligned_u64 pathname;
__u32 bpf_fd;
};
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper

View File

@ -75,5 +75,6 @@
#define ANON_INODE_FS_MAGIC 0x09041934 #define ANON_INODE_FS_MAGIC 0x09041934
#define BTRFS_TEST_MAGIC 0x73727279 #define BTRFS_TEST_MAGIC 0x73727279
#define NSFS_MAGIC 0x6e736673 #define NSFS_MAGIC 0x6e736673
#define BPF_FS_MAGIC 0xcafe4a11
#endif /* __LINUX_MAGIC_H__ */ #endif /* __LINUX_MAGIC_H__ */

View File

@ -1,2 +1,4 @@
obj-y := core.o obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o

View File

@ -92,6 +92,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
fp->pages = size / PAGE_SIZE; fp->pages = size / PAGE_SIZE;
fp->aux = aux; fp->aux = aux;
fp->aux->prog = fp;
return fp; return fp;
} }
@ -116,6 +117,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = size / PAGE_SIZE; fp->pages = size / PAGE_SIZE;
fp->aux->prog = fp;
/* We keep fp->aux from fp_old around in the new /* We keep fp->aux from fp_old around in the new
* reallocated structure. * reallocated structure.
@ -726,7 +728,6 @@ void bpf_prog_free(struct bpf_prog *fp)
struct bpf_prog_aux *aux = fp->aux; struct bpf_prog_aux *aux = fp->aux;
INIT_WORK(&aux->work, bpf_prog_free_deferred); INIT_WORK(&aux->work, bpf_prog_free_deferred);
aux->prog = fp;
schedule_work(&aux->work); schedule_work(&aux->work);
} }
EXPORT_SYMBOL_GPL(bpf_prog_free); EXPORT_SYMBOL_GPL(bpf_prog_free);

387
kernel/bpf/inode.c Normal file
View File

@ -0,0 +1,387 @@
/*
* Minimal file system backend for holding eBPF maps and programs,
* used by bpf(2) object pinning.
*
* Authors:
*
* Daniel Borkmann <daniel@iogearbox.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/magic.h>
#include <linux/major.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/filter.h>
#include <linux/bpf.h>
enum bpf_type {
BPF_TYPE_UNSPEC = 0,
BPF_TYPE_PROG,
BPF_TYPE_MAP,
};
static void *bpf_any_get(void *raw, enum bpf_type type)
{
switch (type) {
case BPF_TYPE_PROG:
atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
break;
case BPF_TYPE_MAP:
atomic_inc(&((struct bpf_map *)raw)->refcnt);
break;
default:
WARN_ON_ONCE(1);
break;
}
return raw;
}
static void bpf_any_put(void *raw, enum bpf_type type)
{
switch (type) {
case BPF_TYPE_PROG:
bpf_prog_put(raw);
break;
case BPF_TYPE_MAP:
bpf_map_put(raw);
break;
default:
WARN_ON_ONCE(1);
break;
}
}
static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
{
void *raw;
*type = BPF_TYPE_MAP;
raw = bpf_map_get(ufd);
if (IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
raw = bpf_prog_get(ufd);
}
return raw;
}
static const struct inode_operations bpf_dir_iops;
static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
static struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir,
umode_t mode)
{
struct inode *inode;
switch (mode & S_IFMT) {
case S_IFDIR:
case S_IFREG:
break;
default:
return ERR_PTR(-EINVAL);
}
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOSPC);
inode->i_ino = get_next_ino();
inode->i_atime = CURRENT_TIME;
inode->i_mtime = inode->i_atime;
inode->i_ctime = inode->i_atime;
inode_init_owner(inode, dir, mode);
return inode;
}
static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
{
*type = BPF_TYPE_UNSPEC;
if (inode->i_op == &bpf_prog_iops)
*type = BPF_TYPE_PROG;
else if (inode->i_op == &bpf_map_iops)
*type = BPF_TYPE_MAP;
else
return -EACCES;
return 0;
}
static bool bpf_dname_reserved(const struct dentry *dentry)
{
return strchr(dentry->d_name.name, '.');
}
static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct inode *inode;
if (bpf_dname_reserved(dentry))
return -EPERM;
inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_op = &bpf_dir_iops;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
inc_nlink(dir);
d_instantiate(dentry, inode);
dget(dentry);
return 0;
}
static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
umode_t mode, const struct inode_operations *iops)
{
struct inode *inode;
if (bpf_dname_reserved(dentry))
return -EPERM;
inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_op = iops;
inode->i_private = dentry->d_fsdata;
d_instantiate(dentry, inode);
dget(dentry);
return 0;
}
static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t devt)
{
enum bpf_type type = MINOR(devt);
if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) ||
dentry->d_fsdata == NULL)
return -EPERM;
switch (type) {
case BPF_TYPE_PROG:
return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops);
case BPF_TYPE_MAP:
return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops);
default:
return -EPERM;
}
}
static const struct inode_operations bpf_dir_iops = {
.lookup = simple_lookup,
.mknod = bpf_mkobj,
.mkdir = bpf_mkdir,
.rmdir = simple_rmdir,
.unlink = simple_unlink,
};
static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
enum bpf_type type)
{
struct dentry *dentry;
struct inode *dir;
struct path path;
umode_t mode;
dev_t devt;
int ret;
dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
devt = MKDEV(UNNAMED_MAJOR, type);
ret = security_path_mknod(&path, dentry, mode, devt);
if (ret)
goto out;
dir = d_inode(path.dentry);
if (dir->i_op != &bpf_dir_iops) {
ret = -EPERM;
goto out;
}
dentry->d_fsdata = raw;
ret = vfs_mknod(dir, dentry, mode, devt);
dentry->d_fsdata = NULL;
out:
done_path_create(&path, dentry);
return ret;
}
int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
{
struct filename *pname;
enum bpf_type type;
void *raw;
int ret;
pname = getname(pathname);
if (IS_ERR(pname))
return PTR_ERR(pname);
raw = bpf_fd_probe_obj(ufd, &type);
if (IS_ERR(raw)) {
ret = PTR_ERR(raw);
goto out;
}
ret = bpf_obj_do_pin(pname, raw, type);
if (ret != 0)
bpf_any_put(raw, type);
out:
putname(pname);
return ret;
}
static void *bpf_obj_do_get(const struct filename *pathname,
enum bpf_type *type)
{
struct inode *inode;
struct path path;
void *raw;
int ret;
ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
if (ret)
return ERR_PTR(ret);
inode = d_backing_inode(path.dentry);
ret = inode_permission(inode, MAY_WRITE);
if (ret)
goto out;
ret = bpf_inode_type(inode, type);
if (ret)
goto out;
raw = bpf_any_get(inode->i_private, *type);
touch_atime(&path);
path_put(&path);
return raw;
out:
path_put(&path);
return ERR_PTR(ret);
}
int bpf_obj_get_user(const char __user *pathname)
{
enum bpf_type type = BPF_TYPE_UNSPEC;
struct filename *pname;
int ret = -ENOENT;
void *raw;
pname = getname(pathname);
if (IS_ERR(pname))
return PTR_ERR(pname);
raw = bpf_obj_do_get(pname, &type);
if (IS_ERR(raw)) {
ret = PTR_ERR(raw);
goto out;
}
if (type == BPF_TYPE_PROG)
ret = bpf_prog_new_fd(raw);
else if (type == BPF_TYPE_MAP)
ret = bpf_map_new_fd(raw);
else
goto out;
if (ret < 0)
bpf_any_put(raw, type);
out:
putname(pname);
return ret;
}
static void bpf_evict_inode(struct inode *inode)
{
enum bpf_type type;
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (!bpf_inode_type(inode, &type))
bpf_any_put(inode->i_private, type);
}
static const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.evict_inode = bpf_evict_inode,
};
static int bpf_fill_super(struct super_block *sb, void *data, int silent)
{
static struct tree_descr bpf_rfiles[] = { { "" } };
struct inode *inode;
int ret;
ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
if (ret)
return ret;
sb->s_op = &bpf_super_ops;
inode = sb->s_root->d_inode;
inode->i_op = &bpf_dir_iops;
inode->i_mode &= ~S_IALLUGO;
inode->i_mode |= S_ISVTX | S_IRWXUGO;
return 0;
}
static struct dentry *bpf_mount(struct file_system_type *type, int flags,
const char *dev_name, void *data)
{
return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super);
}
static struct file_system_type bpf_fs_type = {
.owner = THIS_MODULE,
.name = "bpf",
.mount = bpf_mount,
.kill_sb = kill_litter_super,
.fs_flags = FS_USERNS_MOUNT,
};
MODULE_ALIAS_FS("bpf");
static int __init bpf_init(void)
{
int ret;
ret = sysfs_create_mount_point(fs_kobj, "bpf");
if (ret)
return ret;
ret = register_filesystem(&bpf_fs_type);
if (ret)
sysfs_remove_mount_point(fs_kobj, "bpf");
return ret;
}
fs_initcall(bpf_init);

View File

@ -111,6 +111,12 @@ static const struct file_operations bpf_map_fops = {
.release = bpf_map_release, .release = bpf_map_release,
}; };
int bpf_map_new_fd(struct bpf_map *map)
{
return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
O_RDWR | O_CLOEXEC);
}
/* helper macro to check that unused fields 'union bpf_attr' are zero */ /* helper macro to check that unused fields 'union bpf_attr' are zero */
#define CHECK_ATTR(CMD) \ #define CHECK_ATTR(CMD) \
memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
@ -141,8 +147,7 @@ static int map_create(union bpf_attr *attr)
if (err) if (err)
goto free_map; goto free_map;
err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); err = bpf_map_new_fd(map);
if (err < 0) if (err < 0)
/* failed to allocate fd */ /* failed to allocate fd */
goto free_map; goto free_map;
@ -157,19 +162,29 @@ free_map:
/* if error is returned, fd is released. /* if error is returned, fd is released.
* On success caller should complete fd access with matching fdput() * On success caller should complete fd access with matching fdput()
*/ */
struct bpf_map *bpf_map_get(struct fd f) struct bpf_map *__bpf_map_get(struct fd f)
{ {
struct bpf_map *map;
if (!f.file) if (!f.file)
return ERR_PTR(-EBADF); return ERR_PTR(-EBADF);
if (f.file->f_op != &bpf_map_fops) { if (f.file->f_op != &bpf_map_fops) {
fdput(f); fdput(f);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
map = f.file->private_data; return f.file->private_data;
}
struct bpf_map *bpf_map_get(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_map *map;
map = __bpf_map_get(f);
if (IS_ERR(map))
return map;
atomic_inc(&map->refcnt);
fdput(f);
return map; return map;
} }
@ -197,7 +212,7 @@ static int map_lookup_elem(union bpf_attr *attr)
return -EINVAL; return -EINVAL;
f = fdget(ufd); f = fdget(ufd);
map = bpf_map_get(f); map = __bpf_map_get(f);
if (IS_ERR(map)) if (IS_ERR(map))
return PTR_ERR(map); return PTR_ERR(map);
@ -256,7 +271,7 @@ static int map_update_elem(union bpf_attr *attr)
return -EINVAL; return -EINVAL;
f = fdget(ufd); f = fdget(ufd);
map = bpf_map_get(f); map = __bpf_map_get(f);
if (IS_ERR(map)) if (IS_ERR(map))
return PTR_ERR(map); return PTR_ERR(map);
@ -309,7 +324,7 @@ static int map_delete_elem(union bpf_attr *attr)
return -EINVAL; return -EINVAL;
f = fdget(ufd); f = fdget(ufd);
map = bpf_map_get(f); map = __bpf_map_get(f);
if (IS_ERR(map)) if (IS_ERR(map))
return PTR_ERR(map); return PTR_ERR(map);
@ -350,7 +365,7 @@ static int map_get_next_key(union bpf_attr *attr)
return -EINVAL; return -EINVAL;
f = fdget(ufd); f = fdget(ufd);
map = bpf_map_get(f); map = __bpf_map_get(f);
if (IS_ERR(map)) if (IS_ERR(map))
return PTR_ERR(map); return PTR_ERR(map);
@ -498,7 +513,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
free_uid(user); free_uid(user);
} }
static void __prog_put_rcu(struct rcu_head *rcu) static void __prog_put_common(struct rcu_head *rcu)
{ {
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
@ -510,19 +525,14 @@ static void __prog_put_rcu(struct rcu_head *rcu)
/* version of bpf_prog_put() that is called after a grace period */ /* version of bpf_prog_put() that is called after a grace period */
void bpf_prog_put_rcu(struct bpf_prog *prog) void bpf_prog_put_rcu(struct bpf_prog *prog)
{ {
if (atomic_dec_and_test(&prog->aux->refcnt)) { if (atomic_dec_and_test(&prog->aux->refcnt))
prog->aux->prog = prog; call_rcu(&prog->aux->rcu, __prog_put_common);
call_rcu(&prog->aux->rcu, __prog_put_rcu);
}
} }
void bpf_prog_put(struct bpf_prog *prog) void bpf_prog_put(struct bpf_prog *prog)
{ {
if (atomic_dec_and_test(&prog->aux->refcnt)) { if (atomic_dec_and_test(&prog->aux->refcnt))
free_used_maps(prog->aux); __prog_put_common(&prog->aux->rcu);
bpf_prog_uncharge_memlock(prog);
bpf_prog_free(prog);
}
} }
EXPORT_SYMBOL_GPL(bpf_prog_put); EXPORT_SYMBOL_GPL(bpf_prog_put);
@ -538,21 +548,22 @@ static const struct file_operations bpf_prog_fops = {
.release = bpf_prog_release, .release = bpf_prog_release,
}; };
static struct bpf_prog *get_prog(struct fd f) int bpf_prog_new_fd(struct bpf_prog *prog)
{ {
struct bpf_prog *prog; return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
O_RDWR | O_CLOEXEC);
}
static struct bpf_prog *__bpf_prog_get(struct fd f)
{
if (!f.file) if (!f.file)
return ERR_PTR(-EBADF); return ERR_PTR(-EBADF);
if (f.file->f_op != &bpf_prog_fops) { if (f.file->f_op != &bpf_prog_fops) {
fdput(f); fdput(f);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
prog = f.file->private_data; return f.file->private_data;
return prog;
} }
/* called by sockets/tracing/seccomp before attaching program to an event /* called by sockets/tracing/seccomp before attaching program to an event
@ -563,13 +574,13 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
struct fd f = fdget(ufd); struct fd f = fdget(ufd);
struct bpf_prog *prog; struct bpf_prog *prog;
prog = get_prog(f); prog = __bpf_prog_get(f);
if (IS_ERR(prog)) if (IS_ERR(prog))
return prog; return prog;
atomic_inc(&prog->aux->refcnt); atomic_inc(&prog->aux->refcnt);
fdput(f); fdput(f);
return prog; return prog;
} }
EXPORT_SYMBOL_GPL(bpf_prog_get); EXPORT_SYMBOL_GPL(bpf_prog_get);
@ -647,7 +658,7 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0) if (err < 0)
goto free_used_maps; goto free_used_maps;
err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); err = bpf_prog_new_fd(prog);
if (err < 0) if (err < 0)
/* failed to allocate fd */ /* failed to allocate fd */
goto free_used_maps; goto free_used_maps;
@ -663,6 +674,24 @@ free_prog_nouncharge:
return err; return err;
} }
#define BPF_OBJ_LAST_FIELD bpf_fd
static int bpf_obj_pin(const union bpf_attr *attr)
{
if (CHECK_ATTR(BPF_OBJ))
return -EINVAL;
return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
}
static int bpf_obj_get(const union bpf_attr *attr)
{
if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
return -EINVAL;
return bpf_obj_get_user(u64_to_ptr(attr->pathname));
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{ {
union bpf_attr attr = {}; union bpf_attr attr = {};
@ -723,6 +752,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_PROG_LOAD: case BPF_PROG_LOAD:
err = bpf_prog_load(&attr); err = bpf_prog_load(&attr);
break; break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);
break;
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
default: default:
err = -EINVAL; err = -EINVAL;
break; break;

View File

@ -1989,8 +1989,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
} }
f = fdget(insn->imm); f = fdget(insn->imm);
map = __bpf_map_get(f);
map = bpf_map_get(f);
if (IS_ERR(map)) { if (IS_ERR(map)) {
verbose("fd %d is not pointing to valid bpf_map\n", verbose("fd %d is not pointing to valid bpf_map\n",
insn->imm); insn->imm);

View File

@ -4,6 +4,7 @@ obj- := dummy.o
# List of programs to build # List of programs to build
hostprogs-y := test_verifier test_maps hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example hostprogs-y += sock_example
hostprogs-y += fds_example
hostprogs-y += sockex1 hostprogs-y += sockex1
hostprogs-y += sockex2 hostprogs-y += sockex2
hostprogs-y += sockex3 hostprogs-y += sockex3
@ -19,6 +20,7 @@ hostprogs-y += lathist
test_verifier-objs := test_verifier.o libbpf.o test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o sock_example-objs := sock_example.o libbpf.o
fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
sockex3-objs := bpf_load.o libbpf.o sockex3_user.o sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
@ -49,6 +51,7 @@ always += lathist_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTLOADLIBES_fds_example += -lelf
HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf HOSTLOADLIBES_sockex2 += -lelf
HOSTLOADLIBES_sockex3 += -lelf HOSTLOADLIBES_sockex3 += -lelf

183
samples/bpf/fds_example.c Normal file
View File

@ -0,0 +1,183 @@
#include <linux/unistd.h>
#include <linux/bpf.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/socket.h>
#include "bpf_load.h"
#include "libbpf.h"
#define BPF_F_PIN (1 << 0)
#define BPF_F_GET (1 << 1)
#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET)
#define BPF_F_KEY (1 << 2)
#define BPF_F_VAL (1 << 3)
#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL)
#define BPF_M_UNSPEC 0
#define BPF_M_MAP 1
#define BPF_M_PROG 2
static void usage(void)
{
printf("Usage: fds_example [...]\n");
printf(" -F <file> File to pin/get object\n");
printf(" -P |- pin object\n");
printf(" -G `- get object\n");
printf(" -m eBPF map mode\n");
printf(" -k <key> |- map key\n");
printf(" -v <value> `- map value\n");
printf(" -p eBPF prog mode\n");
printf(" -o <object> `- object file\n");
printf(" -h Display this help.\n");
}
static int bpf_map_create(void)
{
return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
sizeof(uint32_t), 1024);
}
static int bpf_prog_create(const char *object)
{
static const struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
};
if (object) {
assert(!load_bpf_file((char *)object));
return prog_fd[0];
} else {
return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
insns, sizeof(insns), "GPL", 0);
}
}
static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
uint32_t value)
{
int fd, ret;
if (flags & BPF_F_PIN) {
fd = bpf_map_create();
printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
assert(fd > 0);
ret = bpf_obj_pin(fd, file);
printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
assert(ret == 0);
} else {
fd = bpf_obj_get(file);
printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
assert(fd > 0);
}
if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
ret = bpf_update_elem(fd, &key, &value, 0);
printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
ret, strerror(errno));
assert(ret == 0);
} else if (flags & BPF_F_KEY) {
ret = bpf_lookup_elem(fd, &key, &value);
printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
ret, strerror(errno));
assert(ret == 0);
}
return 0;
}
static int bpf_do_prog(const char *file, uint32_t flags, const char *object)
{
int fd, sock, ret;
if (flags & BPF_F_PIN) {
fd = bpf_prog_create(object);
printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno));
assert(fd > 0);
ret = bpf_obj_pin(fd, file);
printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
assert(ret == 0);
} else {
fd = bpf_obj_get(file);
printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
assert(fd > 0);
}
sock = open_raw_sock("lo");
assert(sock > 0);
ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd));
printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd,
ret, strerror(errno));
assert(ret == 0);
return 0;
}
int main(int argc, char **argv)
{
const char *file = NULL, *object = NULL;
uint32_t key = 0, value = 0, flags = 0;
int opt, mode = BPF_M_UNSPEC;
while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) {
switch (opt) {
/* General args */
case 'F':
file = optarg;
break;
case 'P':
flags |= BPF_F_PIN;
break;
case 'G':
flags |= BPF_F_GET;
break;
/* Map-related args */
case 'm':
mode = BPF_M_MAP;
break;
case 'k':
key = strtoul(optarg, NULL, 0);
flags |= BPF_F_KEY;
break;
case 'v':
value = strtoul(optarg, NULL, 0);
flags |= BPF_F_VAL;
break;
/* Prog-related args */
case 'p':
mode = BPF_M_PROG;
break;
case 'o':
object = optarg;
break;
default:
goto out;
}
}
if (!(flags & BPF_F_PIN_GET) || !file)
goto out;
switch (mode) {
case BPF_M_MAP:
return bpf_do_map(file, flags, key, value);
case BPF_M_PROG:
return bpf_do_prog(file, flags, object);
}
out:
usage();
return -1;
}

View File

@ -103,6 +103,25 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
} }
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {
.pathname = ptr_to_u64((void *)pathname),
.bpf_fd = fd,
};
return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
}
int bpf_obj_get(const char *pathname)
{
union bpf_attr attr = {
.pathname = ptr_to_u64((void *)pathname),
};
return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
}
int open_raw_sock(const char *name) int open_raw_sock(const char *name)
{ {
struct sockaddr_ll sll; struct sockaddr_ll sll;

View File

@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len, const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version); const char *license, int kern_version);
int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
#define LOG_BUF_SIZE 65536 #define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE]; extern char bpf_log_buf[LOG_BUF_SIZE];