ipv6: do not use per netns icmp sockets

Back in linux-2.6.25 (commit 98c6d1b261 "[NETNS]: Make icmpv6_sk per namespace.",
we added private per-cpu/per-netns ipv6 icmp sockets.

This adds memory and cpu costs, which do not seem needed.
Now typical servers have 256 or more cores, this adds considerable
tax to netns users.

icmp sockets are used from BH context, are not receiving packets,
and do not store any persistent state but the 'struct net' pointer.

icmpv6_xmit_lock() already makes sure to lock the chosen per-cpu
socket.

This patch has a considerable impact on the number of netns
that the worker thread in cleanup_net() can dismantle per second,
because ip6mr_sk_done() is no longer called, meaning we no longer
acquire the rtnl mutex, competing with other threads adding new netns.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2022-01-24 12:24:56 -08:00 committed by David S. Miller
parent a15c89c703
commit 6a17b961ec
2 changed files with 10 additions and 53 deletions

View File

@ -88,7 +88,6 @@ struct netns_ipv6 {
struct fib6_table *fib6_local_tbl;
struct fib_rules_ops *fib6_rules_ops;
#endif
struct sock * __percpu *icmp_sk;
struct sock *ndisc_sk;
struct sock *tcp_sk;
struct sock *igmp_sk;

View File

@ -69,17 +69,7 @@
#include <linux/uaccess.h>
/*
* The ICMP socket(s). This is the most convenient way to flow control
* our ICMP output as well as maintain a clean interface throughout
* all layers. All Socketless IP sends will soon be gone.
*
* On SMP we have one ICMP socket per-cpu.
*/
static struct sock *icmpv6_sk(struct net *net)
{
return this_cpu_read(*net->ipv6.icmp_sk);
}
static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
};
/* Called with BH disabled */
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
static struct sock *icmpv6_xmit_lock(struct net *net)
{
struct sock *sk;
sk = icmpv6_sk(net);
sk = this_cpu_read(ipv6_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path (f.e. SIT or
* ip6ip6 tunnel) signals dst_link_failure() for an
@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
*/
return NULL;
}
sock_net_set(sk, net);
return sk;
}
static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
static void icmpv6_xmit_unlock(struct sock *sk)
{
sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
static void __net_exit icmpv6_sk_exit(struct net *net)
{
int i;
for_each_possible_cpu(i)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
free_percpu(net->ipv6.icmp_sk);
}
static int __net_init icmpv6_sk_init(struct net *net)
int __init icmpv6_init(void)
{
struct sock *sk;
int err, i;
net->ipv6.icmp_sk = alloc_percpu(struct sock *);
if (!net->ipv6.icmp_sk)
return -ENOMEM;
for_each_possible_cpu(i) {
err = inet_ctl_sock_create(&sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net);
SOCK_RAW, IPPROTO_ICMPV6, &init_net);
if (err < 0) {
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
err);
goto fail;
return err;
}
*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
per_cpu(ipv6_icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
}
return 0;
fail:
icmpv6_sk_exit(net);
return err;
}
static struct pernet_operations icmpv6_sk_ops = {
.init = icmpv6_sk_init,
.exit = icmpv6_sk_exit,
};
int __init icmpv6_init(void)
{
int err;
err = register_pernet_subsys(&icmpv6_sk_ops);
if (err < 0)
return err;
err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
@ -1101,14 +1061,12 @@ sender_reg_err:
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail:
pr_err("Failed to register ICMP6 protocol\n");
unregister_pernet_subsys(&icmpv6_sk_ops);
return err;
}
void icmpv6_cleanup(void)
{
inet6_unregister_icmp_sender(icmp6_send);
unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}