Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Reduce number of hardware offload retries from flowtable datapath
   which might hog system with retries, from Felix Fietkau.

2) Skip neighbour lookup for PPPoE device, fill_forward_path() already
   provides this and set on destination address from fill_forward_path for
   PPPoE device, also from Felix.

4) When combining PPPoE on top of a VLAN device, set info->outdev to the
   PPPoE device so software offload works, from Felix.

5) Fix TCP teardown flowtable state, races with conntrack gc might result
   in resetting the state to ESTABLISHED and the time to one day. Joint
   work with Oz Shlomo and Sven Auhagen.

6) Call dst_check() from flowtable datapath to check if dst is stale
   instead of doing it from garbage collector path.

7) Disable register tracking infrastructure, either user-space or
   kernel need to pre-fetch keys inconditionally, otherwise register
   tracking assumes data is already available in register that might
   not well be there, leading to incorrect reductions.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: disable expression reduction infra
  netfilter: flowtable: move dst_check to packet path
  netfilter: flowtable: fix TCP flow teardown
  netfilter: nft_flow_offload: fix offload with pppoe + vlan
  net: fix dev_fill_forward_path with pppoe + bridge
  netfilter: nft_flow_offload: skip dst neigh lookup for ppp devices
  netfilter: flowtable: fix excessive hw offload attempts after failure
====================

Link: https://lore.kernel.org/r/20220518213841.359653-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-05-18 19:34:25 -07:00
commit 7dc02d7f08
7 changed files with 51 additions and 72 deletions

View File

@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
path->encap.proto = htons(ETH_P_PPP_SES);
path->encap.id = be16_to_cpu(po->num);
memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
path->dev = ctx->dev;
ctx->dev = dev;

View File

@ -900,7 +900,7 @@ struct net_device_path_stack {
struct net_device_path_ctx {
const struct net_device *dev;
const u8 *daddr;
u8 daddr[ETH_ALEN];
int num_vlans;
struct {

View File

@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev;
struct net_device_path_ctx ctx = {
.dev = dev,
.daddr = daddr,
};
struct net_device_path *path;
int ret = 0;
memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev;

View File

@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
tcp->state = TCP_CONNTRACK_ESTABLISHED;
tcp->seen[0].td_maxwin = 0;
tcp->seen[1].td_maxwin = 0;
}
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
static void flow_offload_fixup_ct(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
flow_offload_fixup_tcp(&ct->proto.tcp);
timeout = tn->timeouts[ct->proto.tcp.state];
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
if (nf_ct_protonum(ct) == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
}
static void flow_offload_fixup_ct(struct nf_conn *ct)
{
flow_offload_fixup_ct_state(ct);
flow_offload_fixup_ct_timeout(ct);
}
static void flow_offload_route_release(struct flow_offload *flow)
{
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
if (READ_ONCE(flow->timeout) != timeout)
if (timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
else
return;
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
if (nf_flow_has_expired(flow))
flow_offload_fixup_ct(flow->ct);
else
flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
flow_offload_fixup_ct_state(flow->ct);
flow_offload_fixup_ct(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
{
struct dst_entry *dst;
if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
dst = tuple->dst_cache;
if (!dst_check(dst, tuple->dst_cookie))
return true;
}
return false;
}
static bool nf_flow_has_stale_dst(struct flow_offload *flow)
{
return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
}
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
nf_ct_is_dying(flow->ct) ||
nf_flow_has_stale_dst(flow))
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
nf_ct_is_dying(flow->ct))
flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {

View File

@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
{
if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
return true;
return dst_check(tuple->dst_cache, tuple->dst_cookie);
}
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state,
struct dst_entry *dst)
@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;

View File

@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
static bool nft_expr_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
if (!expr->ops->reduce) {
pr_warn_once("missing reduce for expression %s ",
expr->ops->type->name);
return false;
}
if (nft_reduce_is_readonly(expr))
return false;
return expr->ops->reduce(track, expr);
return false;
}
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)

View File

@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
}
static bool nft_is_valid_ether_device(const struct net_device *dev)
{
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
return false;
return true;
}
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache,
const struct nf_conn *ct,
@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct neighbour *n;
u8 nud_state;
if (!nft_is_valid_ether_device(dev))
goto out;
n = dst_neigh_lookup(dst_cache, daddr);
if (!n)
return -1;
@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
if (!(nud_state & NUD_VALID))
return -1;
out:
return dev_fill_forward_path(dev, ha, stack);
}
@ -78,15 +91,6 @@ struct nft_forward_info {
enum flow_offload_xmit_type xmit_type;
};
static bool nft_is_valid_ether_device(const struct net_device *dev)
{
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
return false;
return true;
}
static void nft_dev_path_info(const struct net_device_path_stack *stack,
struct nft_forward_info *info,
unsigned char *ha, struct nf_flowtable *flowtable)
@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
info->indev = NULL;
break;
}
info->outdev = path->dev;
if (!info->outdev)
info->outdev = path->dev;
info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++;
@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
case IPPROTO_TCP:
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
sizeof(_tcph), &_tcph);
if (unlikely(!tcph || tcph->fin || tcph->rst))
if (unlikely(!tcph || tcph->fin || tcph->rst ||
!nf_conntrack_tcp_established(ct)))
goto out;
break;
case IPPROTO_UDP: