netfilter pull request 2023-09-28

-----BEGIN PGP SIGNATURE-----
 
 iQJBBAABCAArFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmUVjwUNHGZ3QHN0cmxl
 bi5kZQAKCRBwkajZrV/2AKneEACzrKtIC0j0DyhgVW4Kb57T8Y7cD5wQCv7oz1Cx
 8A3UJ1pSLYhRnz94zY453GIenK+zx/KKIetDhyWnjA9gjk95HkUN+OwuuiKnUAgu
 7KPGbIYat7hERwoZpR88nrbTYXcDZfcZGTqWA++3yL2vn4Lu4lsuowqXYKBf/axk
 5gEwEtwn2mVsdo0qTVJcXkHqnf5CCdqd26ixF4yB1rz/P6kISi4I9q7ul43paFJW
 +/ifacdG+7raQkGlUlYiDNMVd0uO01HHaAcWfYa+FOMK+GSn+89zzTs906CU0g2O
 GRJSWjNTgfDtM2AHN7peUnf/G9XHSK2Y7Re8FzauKzwWSl5N9w5610nbQnT+ME5O
 uOZE1P/lhnidOwCEV8zU4yhs6fBrCMCHz+S5Yh8C8PCUhi12IEEYRHyGCoUVMOwY
 1LINjdn4HddL57QUGumy0VqVBlxQru8VXnlzm0eIyhsbZ3/mVXQWIHX4u1G36UUQ
 zSkm4/qP4kna/tV86mETNX1MUcJsQ1vQ842abcUbxudKei/uT9av6YHlz/aBOcQZ
 NDMrGVO6mjh7/HnYUr7+zbQfhLZdg424SpGEoiuS7dDcTpGlcT3pnWBJDGEHsy+4
 0VnLI8/GPT1/jQCCYTVLu+tn0XmfZF18j2bvGhz1hM9J/HXaRpuqjGF6thLgYl63
 CZf5Yg==
 =ALU2
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-23-09-28' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
netfilter updates for net-next

First patch, from myself, is a bug fix. The issue (connect timeout) is
ancient, so I think its safe to give this more soak time given the esoteric
conditions needed to trigger this.
Also updates the existing selftest to cover this.

Add netlink extacks when an update references a non-existent
table/chain/set.  This allows userspace to provide much better
errors to the user, from Pablo Neira Ayuso.

Last patch adds more policy checks to nf_tables as a better
alternative to the existing runtime checks, from Phil Sutter.

* tag 'nf-next-23-09-28' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_tables: Utilize NLA_POLICY_NESTED_ARRAY
  netfilter: nf_tables: missing extended netlink error in lookup functions
  selftests: netfilter: test nat source port clash resolution interaction with tcp early demux
  netfilter: nf_nat: undo erroneous tcp edemux lookup after port clash
====================

Link: https://lore.kernel.org/r/20230928144916.18339-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-10-04 14:25:37 -07:00
commit 07cf7974a2
3 changed files with 126 additions and 27 deletions

View File

@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
}
#endif
static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
{
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return false;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
case IPPROTO_UDP:
break;
default:
return false;
}
dir = CTINFO2DIR(ctinfo);
if (dir != IP_CT_DIR_ORIGINAL)
return false;
return ct->tuplehash[!dir].tuple.dst.u.all != sport;
}
static unsigned int
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
ret = nf_nat_ipv4_fn(priv, skb, state);
if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
!inet_sk_transparent(sk))
if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
return ret;
/* skb has a socket assigned via tcp edemux. We need to check
* if nf_nat_ipv4_fn() has mangled the packet in a way that
* edemux would not have found this socket.
*
* This includes both changes to the source address and changes
* to the source port, which are both handled by the
* nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
* might have found a socket for the old (pre-snat) address.
*/
if (saddr != ip_hdr(skb)->saddr ||
nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb); /* TCP edemux obtained wrong socket */
return ret;
@ -937,6 +974,27 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
return nf_nat_inet_fn(priv, skb, state);
}
static unsigned int
nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct in6_addr saddr = ipv6_hdr(skb)->saddr;
struct sock *sk = skb->sk;
unsigned int ret;
ret = nf_nat_ipv6_fn(priv, skb, state);
if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
return ret;
/* see nf_nat_ipv4_local_in */
if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb);
return ret;
}
static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@ -1051,7 +1109,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
},
/* After packet filtering, change source */
{
.hook = nf_nat_ipv6_fn,
.hook = nf_nat_ipv6_local_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,

View File

@ -3316,7 +3316,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
@ -4254,12 +4254,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_SET_HANDLE] = { .type = NLA_U64 },
[NFTA_SET_EXPR] = { .type = NLA_NESTED },
[NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
[NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
[NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED },
[NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
static struct nft_set *nft_set_lookup(const struct nft_table *table,
@ -4695,8 +4699,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
@ -4713,10 +4719,6 @@ err_fill_set_info:
return err;
}
static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
[NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static int nft_set_desc_concat_parse(const struct nlattr *attr,
struct nft_set_desc *desc)
{
@ -5498,7 +5500,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING,
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@ -5506,7 +5508,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy),
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
@ -6025,8 +6027,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
}
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@ -6919,8 +6923,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
}
if (!list_empty(&set->bindings) &&
(set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
@ -7195,8 +7201,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
}
if (nft_set_is_anonymous(set))
return -EOPNOTSUPP;
@ -8680,6 +8688,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
@ -8705,13 +8714,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
genmask, 0);
if (IS_ERR(table))
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
}
flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable))
if (IS_ERR(flowtable)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
return PTR_ERR(flowtable);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)

View File

@ -11,16 +11,18 @@ ret=0
sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
socatpid=0
cleanup()
{
[ $socatpid -gt 0 ] && kill $socatpid
ip netns del $ns1
ip netns del $ns2
}
iperf3 -v > /dev/null 2>&1
socat -h > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without iperf3"
echo "SKIP: Could not run test without socat"
exit $ksft_skip
fi
@ -60,8 +62,8 @@ ip netns exec $ns2 ip link set up dev veth2
ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
# Create a server in one namespace
ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
iperfs=$!
ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
socatpid=$!
# Restrict source port to just one so we don't have to exhaust
# all others.
@ -83,17 +85,43 @@ sleep 1
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null
echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
ret=$?
kill $iperfs
# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
if [ $ret -eq 0 ]; then
echo "PASS: socat can connect via NAT'd address"
else
echo "FAIL: socat cannot connect via NAT'd address"
exit 1
fi
exit 0
# check sport clashres.
ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
cpid1=$!
sleep 1
# if connect succeeds, client closes instantly due to EOF on stdin.
# if connect hangs, it will time out after 5s.
echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
cpid2=$!
time_then=$(date +%s)
wait $cpid2
rv=$?
time_now=$(date +%s)
# Check how much time has elapsed, expectation is for
# 'cpid2' to connect and then exit (and no connect delay).
delta=$((time_now - time_then))
if [ $delta -lt 2 -a $rv -eq 0 ]; then
echo "PASS: could connect to service via redirected ports"
else
echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
ret=1
fi
exit $ret