Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net 1) Reduce number of hardware offload retries from flowtable datapath which might hog system with retries, from Felix Fietkau. 2) Skip neighbour lookup for PPPoE device, fill_forward_path() already provides this and set on destination address from fill_forward_path for PPPoE device, also from Felix. 4) When combining PPPoE on top of a VLAN device, set info->outdev to the PPPoE device so software offload works, from Felix. 5) Fix TCP teardown flowtable state, races with conntrack gc might result in resetting the state to ESTABLISHED and the time to one day. Joint work with Oz Shlomo and Sven Auhagen. 6) Call dst_check() from flowtable datapath to check if dst is stale instead of doing it from garbage collector path. 7) Disable register tracking infrastructure, either user-space or kernel need to pre-fetch keys inconditionally, otherwise register tracking assumes data is already available in register that might not well be there, leading to incorrect reductions. * git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: netfilter: nf_tables: disable expression reduction infra netfilter: flowtable: move dst_check to packet path netfilter: flowtable: fix TCP flow teardown netfilter: nft_flow_offload: fix offload with pppoe + vlan net: fix dev_fill_forward_path with pppoe + bridge netfilter: nft_flow_offload: skip dst neigh lookup for ppp devices netfilter: flowtable: fix excessive hw offload attempts after failure ==================== Link: https://lore.kernel.org/r/20220518213841.359653-1-pablo@netfilter.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
7dc02d7f08
@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
path->encap.proto = htons(ETH_P_PPP_SES);
|
||||
path->encap.id = be16_to_cpu(po->num);
|
||||
memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
|
||||
memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
|
||||
path->dev = ctx->dev;
|
||||
ctx->dev = dev;
|
||||
|
||||
|
@ -900,7 +900,7 @@ struct net_device_path_stack {
|
||||
|
||||
struct net_device_path_ctx {
|
||||
const struct net_device *dev;
|
||||
const u8 *daddr;
|
||||
u8 daddr[ETH_ALEN];
|
||||
|
||||
int num_vlans;
|
||||
struct {
|
||||
|
@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
|
||||
const struct net_device *last_dev;
|
||||
struct net_device_path_ctx ctx = {
|
||||
.dev = dev,
|
||||
.daddr = daddr,
|
||||
};
|
||||
struct net_device_path *path;
|
||||
int ret = 0;
|
||||
|
||||
memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
|
||||
stack->num_paths = 0;
|
||||
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
|
||||
last_dev = ctx.dev;
|
||||
|
@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
|
||||
|
||||
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
|
||||
{
|
||||
tcp->state = TCP_CONNTRACK_ESTABLISHED;
|
||||
tcp->seen[0].td_maxwin = 0;
|
||||
tcp->seen[1].td_maxwin = 0;
|
||||
}
|
||||
|
||||
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
|
||||
static void flow_offload_fixup_ct(struct nf_conn *ct)
|
||||
{
|
||||
struct net *net = nf_ct_net(ct);
|
||||
int l4num = nf_ct_protonum(ct);
|
||||
@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
|
||||
if (l4num == IPPROTO_TCP) {
|
||||
struct nf_tcp_net *tn = nf_tcp_pernet(net);
|
||||
|
||||
timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
|
||||
flow_offload_fixup_tcp(&ct->proto.tcp);
|
||||
|
||||
timeout = tn->timeouts[ct->proto.tcp.state];
|
||||
timeout -= tn->offload_timeout;
|
||||
} else if (l4num == IPPROTO_UDP) {
|
||||
struct nf_udp_net *tn = nf_udp_pernet(net);
|
||||
@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
|
||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
|
||||
}
|
||||
|
||||
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
|
||||
{
|
||||
if (nf_ct_protonum(ct) == IPPROTO_TCP)
|
||||
flow_offload_fixup_tcp(&ct->proto.tcp);
|
||||
}
|
||||
|
||||
static void flow_offload_fixup_ct(struct nf_conn *ct)
|
||||
{
|
||||
flow_offload_fixup_ct_state(ct);
|
||||
flow_offload_fixup_ct_timeout(ct);
|
||||
}
|
||||
|
||||
static void flow_offload_route_release(struct flow_offload *flow)
|
||||
{
|
||||
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
|
||||
u32 timeout;
|
||||
|
||||
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
|
||||
if (READ_ONCE(flow->timeout) != timeout)
|
||||
if (timeout - READ_ONCE(flow->timeout) > HZ)
|
||||
WRITE_ONCE(flow->timeout, timeout);
|
||||
else
|
||||
return;
|
||||
|
||||
if (likely(!nf_flowtable_hw_offload(flow_table)))
|
||||
return;
|
||||
@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||
nf_flow_offload_rhash_params);
|
||||
|
||||
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
|
||||
|
||||
if (nf_flow_has_expired(flow))
|
||||
flow_offload_fixup_ct(flow->ct);
|
||||
else
|
||||
flow_offload_fixup_ct_timeout(flow->ct);
|
||||
|
||||
flow_offload_free(flow);
|
||||
}
|
||||
|
||||
void flow_offload_teardown(struct flow_offload *flow)
|
||||
{
|
||||
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
|
||||
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
|
||||
|
||||
flow_offload_fixup_ct_state(flow->ct);
|
||||
flow_offload_fixup_ct(flow->ct);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_teardown);
|
||||
|
||||
@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
||||
if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
dst = tuple->dst_cache;
|
||||
if (!dst_check(dst, tuple->dst_cookie))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool nf_flow_has_stale_dst(struct flow_offload *flow)
|
||||
{
|
||||
return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
|
||||
flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
|
||||
}
|
||||
|
||||
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
|
||||
struct flow_offload *flow, void *data)
|
||||
{
|
||||
if (nf_flow_has_expired(flow) ||
|
||||
nf_ct_is_dying(flow->ct) ||
|
||||
nf_flow_has_stale_dst(flow))
|
||||
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
|
||||
nf_ct_is_dying(flow->ct))
|
||||
flow_offload_teardown(flow);
|
||||
|
||||
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
|
||||
if (test_bit(NF_FLOW_HW, &flow->flags)) {
|
||||
|
@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
|
||||
{
|
||||
if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
|
||||
tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
|
||||
return true;
|
||||
|
||||
return dst_check(tuple->dst_cache, tuple->dst_cookie);
|
||||
}
|
||||
|
||||
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
|
||||
const struct nf_hook_state *state,
|
||||
struct dst_entry *dst)
|
||||
@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (!nf_flow_dst_check(&tuplehash->tuple)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (!nf_flow_dst_check(&tuplehash->tuple)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
|
@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
|
||||
static bool nft_expr_reduce(struct nft_regs_track *track,
|
||||
const struct nft_expr *expr)
|
||||
{
|
||||
if (!expr->ops->reduce) {
|
||||
pr_warn_once("missing reduce for expression %s ",
|
||||
expr->ops->type->name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nft_reduce_is_readonly(expr))
|
||||
return false;
|
||||
|
||||
return expr->ops->reduce(track, expr);
|
||||
return false;
|
||||
}
|
||||
|
||||
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
|
||||
|
@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
|
||||
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
}
|
||||
|
||||
static bool nft_is_valid_ether_device(const struct net_device *dev)
|
||||
{
|
||||
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
const struct dst_entry *dst_cache,
|
||||
const struct nf_conn *ct,
|
||||
@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
struct neighbour *n;
|
||||
u8 nud_state;
|
||||
|
||||
if (!nft_is_valid_ether_device(dev))
|
||||
goto out;
|
||||
|
||||
n = dst_neigh_lookup(dst_cache, daddr);
|
||||
if (!n)
|
||||
return -1;
|
||||
@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
if (!(nud_state & NUD_VALID))
|
||||
return -1;
|
||||
|
||||
out:
|
||||
return dev_fill_forward_path(dev, ha, stack);
|
||||
}
|
||||
|
||||
@ -78,15 +91,6 @@ struct nft_forward_info {
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
static bool nft_is_valid_ether_device(const struct net_device *dev)
|
||||
{
|
||||
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
struct nft_forward_info *info,
|
||||
unsigned char *ha, struct nf_flowtable *flowtable)
|
||||
@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
info->outdev = path->dev;
|
||||
if (!info->outdev)
|
||||
info->outdev = path->dev;
|
||||
info->encap[info->num_encaps].id = path->encap.id;
|
||||
info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
info->num_encaps++;
|
||||
@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
|
||||
case IPPROTO_TCP:
|
||||
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
|
||||
sizeof(_tcph), &_tcph);
|
||||
if (unlikely(!tcph || tcph->fin || tcph->rst))
|
||||
if (unlikely(!tcph || tcph->fin || tcph->rst ||
|
||||
!nf_conntrack_tcp_established(ct)))
|
||||
goto out;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
|
Loading…
Reference in New Issue
Block a user