nf-next pr 2024-01-29

-----BEGIN PGP SIGNATURE-----
 
 iQJBBAABCAArFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmW3ugcNHGZ3QHN0cmxl
 bi5kZQAKCRBwkajZrV/2AP+IEADdlinxL+a5Rqx0W3I0gR4LiOrnHdl2SQesCjEE
 iBm8Fgx7pQh6jQpjsEl+dg85CFbqI4iVxgLV/uAVCOvRFELH5aR/WHjAdoXQjrTS
 55bexDCG9q9KBYCm721h2mSUTdmmx+aKfndFYMhEULzQPfDy+cS2lIh4epQPnlFH
 Idc1zXuMNWM/QY0vvwkAxsZ6TMG61GIYDAH4PtEtfCUVksdkLRPG8qWs5tJJgKFp
 SIyqKSB3Ab4LqY9e/HG0FwcrMwrSmNhcbO4CwpDfIrHEuIUtMKCqOp6X4lU1ekeb
 xVTuQ7fU64KmO+a/sS4QH8rPfDgT31GnxaVfeL7AM9pQsiLhJGMTlfFqgItJjZrS
 uch7Jtx0iWMDfuP7OgIYnS46FYD2wXShuz4wIbHI8RSEkln7GBJ2KGpnvyoF07Tf
 V6ZrGQk0TnAr7MAEXHe8rd0WEVvbZuBiVHo1xpSxKI9rGJYDdgSRz16wMdBowhIW
 Q++nacicTs8ak64vlAsigI4bnDYTNXsHQO2S84tXTikaq88m1/f9EqIVr/V2uMoR
 xTQcAaob2TqaGirS/bx/9twEuiwB/gg/nbqmVHni285SO2JbdNQ/iglopc/+EMYS
 ES3wibdQzfPL9h61KyHMGUbZke3w72Gn5X5Fp3lnoi7+ZSLMMRTBoMFv4T+DLzqJ
 dyouYw==
 =iDKQ
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-24-01-29' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says:

====================
nf-next pr 2024-01-29

This batch contains updates for your *next* tree.

First three changes, from Phil Sutter, allow userspace to define
a table that is exclusively owned by a daemon (via netlink socket
aliveness) without auto-removing this table when the userspace program
exits.  Such table gets marked as orphaned and a restarting management
daemon may re-attach/reassume ownership.

Next patch, from Pablo, passes already-validated flags variable around
rather than having called code re-fetch it from netlnik message.

Patches 5 and 6 update ipvs and nf_conncount to use the recently
introduced KMEM_CACHE() macro.

Last three patches, from myself, tweak kconfig logic a little to
permit a kernel configuration that can run iptables-over-nftables
but not classic (setsockopt) iptables.

Such builds lack the builtin-filter/mangle/raw/nat/security tables,
the set/getsockopt interface and the "old blob format"
interpreter/traverser.  For now, this is 'oldconfig friendly', users
need to manually deselect existing config options for this.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-01-31 15:13:26 +00:00
commit 84fc2408cf
12 changed files with 94 additions and 53 deletions

View File

@ -1271,6 +1271,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table)
return table->flags & NFT_TABLE_F_OWNER;
}
static inline bool nft_table_is_orphan(const struct nft_table *table)
{
return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) ==
NFT_TABLE_F_PERSIST;
}
static inline bool nft_base_chain_netdev(int family, u32 hooknum)
{
return family == NFPROTO_NETDEV ||

View File

@ -179,13 +179,17 @@ enum nft_hook_attributes {
* enum nft_table_flags - nf_tables table flags
*
* @NFT_TABLE_F_DORMANT: this table is not active
* @NFT_TABLE_F_OWNER: this table is owned by a process
* @NFT_TABLE_F_PERSIST: this table shall outlive its owner
*/
enum nft_table_flags {
NFT_TABLE_F_DORMANT = 0x1,
NFT_TABLE_F_OWNER = 0x2,
NFT_TABLE_F_PERSIST = 0x4,
};
#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT | \
NFT_TABLE_F_OWNER)
NFT_TABLE_F_OWNER | \
NFT_TABLE_F_PERSIST)
/**
* enum nft_table_attributes - nf_tables table netlink attributes

View File

@ -39,6 +39,10 @@ config NF_CONNTRACK_BRIDGE
To compile it as a module, choose M here. If unsure, say N.
# old sockopt interface and eval loop
config BRIDGE_NF_EBTABLES_LEGACY
tristate
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
@ -55,6 +59,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@ -65,6 +70,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@ -74,6 +80,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).

View File

@ -9,7 +9,7 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES_LEGACY) += ebtables.o
# tables
obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o

View File

@ -10,6 +10,10 @@ config NF_DEFRAG_IPV4
tristate
default n
# old sockopt interface and eval loop
config IP_NF_IPTABLES_LEGACY
tristate
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
@ -152,7 +156,7 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
depends on IP_NF_MANGLE || IP_NF_RAW
depends on IP_NF_MANGLE || IP_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@ -173,6 +177,7 @@ config IP_NF_MATCH_TTL
config IP_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
select IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@ -182,7 +187,7 @@ config IP_NF_FILTER
config IP_NF_TARGET_REJECT
tristate "REJECT target support"
depends on IP_NF_FILTER
depends on IP_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV4
default m if NETFILTER_ADVANCED=n
help
@ -212,6 +217,7 @@ config IP_NF_NAT
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
select IP6_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@ -252,6 +258,7 @@ endif # IP_NF_NAT
config IP_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
select IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@ -261,7 +268,7 @@ config IP_NF_MANGLE
config IP_NF_TARGET_ECN
tristate "ECN target support"
depends on IP_NF_MANGLE
depends on IP_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `ECN' target, which can be used in the iptables mangle
@ -286,6 +293,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
select IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@ -299,6 +307,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
select IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@ -309,36 +318,34 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
select NETFILTER_XTABLES
select NETFILTER_FAMILY_ARP
depends on NETFILTER_ADVANCED
help
arptables is a general, extensible packet identification framework.
The ARP packet filtering and mangling (manipulation)subsystems
use this: say Y or M here if you want to use either of those.
tristate
To compile it as a module, choose M here. If unsure, say N.
if IP_NF_ARPTABLES
config NFT_COMPAT_ARP
tristate
depends on NF_TABLES_ARP && NFT_COMPAT
default m if NFT_COMPAT=m
default y if NFT_COMPAT=y
config IP_NF_ARPFILTER
tristate "ARP packet filtering"
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
local output. On a bridge, you can also specify filtering rules
for forwarded ARP packets. See the man page for arptables(8).
local output. This is only needed for arptables-legacy(8).
Neither arptables-nft nor nftables need this to work.
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_ARP_MANGLE
tristate "ARP payload mangling"
depends on IP_NF_ARPTABLES || NFT_COMPAT_ARP
help
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
endif # IP_NF_ARPTABLES
This option is needed by both arptables-legacy and arptables-nft.
It is not used by nftables.
endmenu

View File

@ -25,7 +25,7 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
obj-$(CONFIG_IP_NF_IPTABLES_LEGACY) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o

View File

@ -6,6 +6,10 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
tristate
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
@ -147,7 +151,7 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
depends on IP6_NF_MANGLE || IP6_NF_RAW
depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@ -186,6 +190,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
select IP6_NF_IPTABLES_LEGACY
tristate
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@ -195,7 +201,7 @@ config IP6_NF_FILTER
config IP6_NF_TARGET_REJECT
tristate "REJECT target support"
depends on IP6_NF_FILTER
depends on IP6_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV6
default m if NETFILTER_ADVANCED=n
help
@ -221,6 +227,7 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
select IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@ -230,6 +237,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
select IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@ -243,6 +251,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
select IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@ -254,6 +263,7 @@ config IP6_NF_NAT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
@ -262,25 +272,23 @@ config IP6_NF_NAT
To compile it as a module, choose M here. If unsure, say N.
if IP6_NF_NAT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NETFILTER_XT_TARGET_MASQUERADE
depends on IP6_NF_NAT
help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
depends on IP6_NF_NAT || NFT_COMPAT
help
This option adds the `SNPT' and `DNPT' target, which perform
stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
To compile it as a module, choose M here. If unsure, say N.
endif # IP6_NF_NAT
endif # IP6_NF_IPTABLES
endmenu

View File

@ -4,7 +4,7 @@
#
# Link order matters here.
obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o

View File

@ -818,7 +818,7 @@ config NETFILTER_XT_TARGET_AUDIT
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
config NETFILTER_XT_TARGET_CT
tristate '"CT" target support'
depends on NF_CONNTRACK
depends on IP_NF_RAW || IP6_NF_RAW
depends on IP_NF_RAW || IP6_NF_RAW || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This options adds a `CT' target, which allows to specify initial
@ -880,7 +880,7 @@ config NETFILTER_XT_TARGET_CT
config NETFILTER_XT_TARGET_DSCP
tristate '"DSCP" and "TOS" target support'
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `DSCP' target, which allows you to manipulate
@ -896,7 +896,7 @@ config NETFILTER_XT_TARGET_DSCP
config NETFILTER_XT_TARGET_HL
tristate '"HL" hoplimit target support'
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
@ -1080,7 +1080,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
depends on IP_NF_MANGLE || NFT_COMPAT
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
select NF_TPROXY_IPV4
@ -1147,7 +1147,7 @@ config NETFILTER_XT_TARGET_TCPMSS
config NETFILTER_XT_TARGET_TCPOPTSTRIP
tristate '"TCPOPTSTRIP" target support'
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a "TCPOPTSTRIP" target, which allows you to strip

View File

@ -1511,9 +1511,7 @@ int __init ip_vs_conn_init(void)
return -ENOMEM;
/* Allocate ip_vs_conn slab cache */
ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
sizeof(struct ip_vs_conn), 0,
SLAB_HWCACHE_ALIGN, NULL);
ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN);
if (!ip_vs_conn_cachep) {
kvfree(ip_vs_conn_tab);
return -ENOMEM;

View File

@ -605,15 +605,11 @@ static int __init nf_conncount_modinit(void)
for (i = 0; i < CONNCOUNT_SLOTS; ++i)
spin_lock_init(&nf_conncount_locks[i]);
conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
sizeof(struct nf_conncount_tuple),
0, 0, NULL);
conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0);
if (!conncount_conn_cachep)
return -ENOMEM;
conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
sizeof(struct nf_conncount_rb),
0, 0, NULL);
conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0);
if (!conncount_rb_cachep) {
kmem_cache_destroy(conncount_conn_cachep);
return -ENOMEM;

View File

@ -1194,8 +1194,10 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1)
#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0)
#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1)
#define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
__NFT_TABLE_F_WAS_AWAKEN)
__NFT_TABLE_F_WAS_AWAKEN | \
__NFT_TABLE_F_WAS_ORPHAN)
static int nf_tables_updtable(struct nft_ctx *ctx)
{
@ -1215,8 +1217,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if ((nft_table_has_owner(ctx->table) &&
!(flags & NFT_TABLE_F_OWNER)) ||
(!nft_table_has_owner(ctx->table) &&
flags & NFT_TABLE_F_OWNER))
(flags & NFT_TABLE_F_OWNER &&
!nft_table_is_orphan(ctx->table)))
return -EOPNOTSUPP;
if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST)
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
@ -1245,6 +1250,13 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
}
}
if ((flags & NFT_TABLE_F_OWNER) &&
!nft_table_has_owner(ctx->table)) {
ctx->table->nlpid = ctx->portid;
ctx->table->flags |= NFT_TABLE_F_OWNER |
__NFT_TABLE_F_WAS_ORPHAN;
}
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
@ -4235,23 +4247,18 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
* given, in that case the amount of memory per element is used.
*/
static const struct nft_set_ops *
nft_select_set_ops(const struct nft_ctx *ctx,
const struct nlattr * const nla[],
nft_select_set_ops(const struct nft_ctx *ctx, u32 flags,
const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
const struct nft_set_type *type;
u32 flags = 0;
int i;
lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
if (nla[NFTA_SET_FLAGS] != NULL)
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
bops = NULL;
best.size = ~0;
best.lookup = ~0;
@ -5137,7 +5144,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
ops = nft_select_set_ops(&ctx, nla, &desc);
ops = nft_select_set_ops(&ctx, flags, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@ -10420,6 +10427,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
} else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
}
if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER;
trans->ctx.table->nlpid = 0;
}
trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
@ -11345,6 +11356,10 @@ again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
if (table->flags & NFT_TABLE_F_PERSIST) {
table->flags &= ~NFT_TABLE_F_OWNER;
continue;
}
__nft_release_hook(net, table);
list_del_rcu(&table->list);
to_delete[deleted++] = table;