netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
This commit adds support for enabling IP defrag using pre-existing netfilter defrag support. Basically all the flag does is bump a refcnt while the link the active. Checks are also added to ensure the prog requesting defrag support is run _after_ netfilter defrag hooks. We also take care to avoid any issues w.r.t. module unloading -- while defrag is active on a link, the module is prevented from unloading. Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> Reviewed-by: Florian Westphal <fw@strlen.de> Link: https://lore.kernel.org/r/5cff26f97e55161b7d56b09ddcf5f8888a5add1d.1689970773.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
committed by
Alexei Starovoitov
parent
9abddac583
commit
91721c2d02
@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
|
|||||||
*/
|
*/
|
||||||
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
|
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
|
||||||
|
|
||||||
|
/* link_create.netfilter.flags used in LINK_CREATE command for
|
||||||
|
* BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
|
||||||
|
*/
|
||||||
|
#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
|
||||||
|
|
||||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||||
* the following extensions:
|
* the following extensions:
|
||||||
*
|
*
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <linux/filter.h>
|
#include <linux/filter.h>
|
||||||
|
#include <linux/kmod.h>
|
||||||
|
#include <linux/module.h>
|
||||||
#include <linux/netfilter.h>
|
#include <linux/netfilter.h>
|
||||||
|
|
||||||
#include <net/netfilter/nf_bpf_link.h>
|
#include <net/netfilter/nf_bpf_link.h>
|
||||||
@ -23,8 +25,88 @@ struct bpf_nf_link {
|
|||||||
struct nf_hook_ops hook_ops;
|
struct nf_hook_ops hook_ops;
|
||||||
struct net *net;
|
struct net *net;
|
||||||
u32 dead;
|
u32 dead;
|
||||||
|
const struct nf_defrag_hook *defrag_hook;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct nf_defrag_hook *
|
||||||
|
get_proto_defrag_hook(struct bpf_nf_link *link,
|
||||||
|
const struct nf_defrag_hook __rcu *global_hook,
|
||||||
|
const char *mod)
|
||||||
|
{
|
||||||
|
const struct nf_defrag_hook *hook;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
/* RCU protects us from races against module unloading */
|
||||||
|
rcu_read_lock();
|
||||||
|
hook = rcu_dereference(global_hook);
|
||||||
|
if (!hook) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
err = request_module(mod);
|
||||||
|
if (err)
|
||||||
|
return ERR_PTR(err < 0 ? err : -EINVAL);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
hook = rcu_dereference(global_hook);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hook && try_module_get(hook->owner)) {
|
||||||
|
/* Once we have a refcnt on the module, we no longer need RCU */
|
||||||
|
hook = rcu_pointer_handoff(hook);
|
||||||
|
} else {
|
||||||
|
WARN_ONCE(!hook, "%s has bad registration", mod);
|
||||||
|
hook = ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
if (!IS_ERR(hook)) {
|
||||||
|
err = hook->enable(link->net);
|
||||||
|
if (err) {
|
||||||
|
module_put(hook->owner);
|
||||||
|
hook = ERR_PTR(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hook;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
|
||||||
|
{
|
||||||
|
const struct nf_defrag_hook __maybe_unused *hook;
|
||||||
|
|
||||||
|
switch (link->hook_ops.pf) {
|
||||||
|
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
|
||||||
|
if (IS_ERR(hook))
|
||||||
|
return PTR_ERR(hook);
|
||||||
|
|
||||||
|
link->defrag_hook = hook;
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
|
||||||
|
case NFPROTO_IPV6:
|
||||||
|
hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
|
||||||
|
if (IS_ERR(hook))
|
||||||
|
return PTR_ERR(hook);
|
||||||
|
|
||||||
|
link->defrag_hook = hook;
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
return -EAFNOSUPPORT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
|
||||||
|
{
|
||||||
|
const struct nf_defrag_hook *hook = link->defrag_hook;
|
||||||
|
|
||||||
|
if (!hook)
|
||||||
|
return;
|
||||||
|
hook->disable(link->net);
|
||||||
|
module_put(hook->owner);
|
||||||
|
}
|
||||||
|
|
||||||
static void bpf_nf_link_release(struct bpf_link *link)
|
static void bpf_nf_link_release(struct bpf_link *link)
|
||||||
{
|
{
|
||||||
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
|
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
|
||||||
@ -32,11 +114,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
|
|||||||
if (nf_link->dead)
|
if (nf_link->dead)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* prevent hook-not-found warning splat from netfilter core when
|
/* do not double release in case .detach was already called */
|
||||||
* .detach was already called
|
if (!cmpxchg(&nf_link->dead, 0, 1)) {
|
||||||
*/
|
|
||||||
if (!cmpxchg(&nf_link->dead, 0, 1))
|
|
||||||
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
|
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
|
||||||
|
bpf_nf_disable_defrag(nf_link);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bpf_nf_link_dealloc(struct bpf_link *link)
|
static void bpf_nf_link_dealloc(struct bpf_link *link)
|
||||||
@ -92,6 +174,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
|
|||||||
|
|
||||||
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
|
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
|
||||||
{
|
{
|
||||||
|
int prio;
|
||||||
|
|
||||||
switch (attr->link_create.netfilter.pf) {
|
switch (attr->link_create.netfilter.pf) {
|
||||||
case NFPROTO_IPV4:
|
case NFPROTO_IPV4:
|
||||||
case NFPROTO_IPV6:
|
case NFPROTO_IPV6:
|
||||||
@ -102,19 +186,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
|
|||||||
return -EAFNOSUPPORT;
|
return -EAFNOSUPPORT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr->link_create.netfilter.flags)
|
if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
/* make sure conntrack confirm is always last.
|
/* make sure conntrack confirm is always last */
|
||||||
*
|
prio = attr->link_create.netfilter.priority;
|
||||||
* In the future, if userspace can e.g. request defrag, then
|
if (prio == NF_IP_PRI_FIRST)
|
||||||
* "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
|
return -ERANGE; /* sabotage_in and other warts */
|
||||||
* should fail.
|
else if (prio == NF_IP_PRI_LAST)
|
||||||
*/
|
return -ERANGE; /* e.g. conntrack confirm */
|
||||||
switch (attr->link_create.netfilter.priority) {
|
else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
|
||||||
case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
|
prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
|
||||||
case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
|
return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -149,6 +232,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
|||||||
|
|
||||||
link->net = net;
|
link->net = net;
|
||||||
link->dead = false;
|
link->dead = false;
|
||||||
|
link->defrag_hook = NULL;
|
||||||
|
|
||||||
err = bpf_link_prime(&link->link, &link_primer);
|
err = bpf_link_prime(&link->link, &link_primer);
|
||||||
if (err) {
|
if (err) {
|
||||||
@ -156,8 +240,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
|
||||||
|
err = bpf_nf_enable_defrag(link);
|
||||||
|
if (err) {
|
||||||
|
bpf_link_cleanup(&link_primer);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = nf_register_net_hook(net, &link->hook_ops);
|
err = nf_register_net_hook(net, &link->hook_ops);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
bpf_nf_disable_defrag(link);
|
||||||
bpf_link_cleanup(&link_primer);
|
bpf_link_cleanup(&link_primer);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
|
|||||||
*/
|
*/
|
||||||
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
|
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
|
||||||
|
|
||||||
|
/* link_create.netfilter.flags used in LINK_CREATE command for
|
||||||
|
* BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
|
||||||
|
*/
|
||||||
|
#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
|
||||||
|
|
||||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||||
* the following extensions:
|
* the following extensions:
|
||||||
*
|
*
|
||||||
|
Reference in New Issue
Block a user