Merge branch 'netns-speedup-dismantle'
Eric Dumazet says: ==================== netns: speedup netns dismantles netns are dismantled by a single thread, from cleanup_net() On hosts with many TCP sockets, and/or many cpus, this thread is spending too many cpu cycles, and can not keep up with some workloads. - Removing 3*num_possible_cpus() sockets per netns, for icmp and tcp protocols. - Iterating over all TCP sockets to remove stale timewait sockets. This patch series removes ~50% of cleanup_net() cpu costs on hosts with 256 cpus. It also reduces per netns memory footprint. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
51d555cfdc
@ -65,13 +65,13 @@ struct inet_timewait_sock {
|
||||
/* these three are in inet_sock */
|
||||
__be16 tw_sport;
|
||||
/* And these are ours. */
|
||||
unsigned int tw_kill : 1,
|
||||
tw_transparent : 1,
|
||||
unsigned int tw_transparent : 1,
|
||||
tw_flowlabel : 20,
|
||||
tw_pad : 2, /* 2 bits hole */
|
||||
tw_pad : 3, /* 3 bits hole */
|
||||
tw_tos : 8;
|
||||
u32 tw_txhash;
|
||||
u32 tw_priority;
|
||||
u32 tw_bslot; /* bind bucket slot */
|
||||
struct timer_list tw_timer;
|
||||
struct inet_bind_bucket *tw_tb;
|
||||
};
|
||||
@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
|
||||
|
||||
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
|
||||
|
||||
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
|
||||
|
||||
static inline
|
||||
struct net *twsk_net(const struct inet_timewait_sock *twsk)
|
||||
{
|
||||
|
@ -70,11 +70,9 @@ struct netns_ipv4 {
|
||||
struct hlist_head *fib_table_hash;
|
||||
struct sock *fibnl;
|
||||
|
||||
struct sock * __percpu *icmp_sk;
|
||||
struct sock *mc_autojoin_sk;
|
||||
|
||||
struct inet_peer_base *peers;
|
||||
struct sock * __percpu *tcp_sk;
|
||||
struct fqdir *fqdir;
|
||||
|
||||
u8 sysctl_icmp_echo_ignore_all;
|
||||
|
@ -88,7 +88,6 @@ struct netns_ipv6 {
|
||||
struct fib6_table *fib6_local_tbl;
|
||||
struct fib_rules_ops *fib6_rules_ops;
|
||||
#endif
|
||||
struct sock * __percpu *icmp_sk;
|
||||
struct sock *ndisc_sk;
|
||||
struct sock *tcp_sk;
|
||||
struct sock *igmp_sk;
|
||||
|
@ -1030,15 +1030,9 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
|
||||
inet_ctl_sock_destroy(pn->v4_ctl_sk);
|
||||
}
|
||||
|
||||
static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
|
||||
{
|
||||
inet_twsk_purge(&dccp_hashinfo, AF_INET);
|
||||
}
|
||||
|
||||
static struct pernet_operations dccp_v4_ops = {
|
||||
.init = dccp_v4_init_net,
|
||||
.exit = dccp_v4_exit_net,
|
||||
.exit_batch = dccp_v4_exit_batch,
|
||||
.id = &dccp_v4_pernet_id,
|
||||
.size = sizeof(struct dccp_v4_pernet),
|
||||
};
|
||||
|
@ -1115,15 +1115,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
|
||||
inet_ctl_sock_destroy(pn->v6_ctl_sk);
|
||||
}
|
||||
|
||||
static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
|
||||
{
|
||||
inet_twsk_purge(&dccp_hashinfo, AF_INET6);
|
||||
}
|
||||
|
||||
static struct pernet_operations dccp_v6_ops = {
|
||||
.init = dccp_v6_init_net,
|
||||
.exit = dccp_v6_exit_net,
|
||||
.exit_batch = dccp_v6_exit_batch,
|
||||
.id = &dccp_v6_pernet_id,
|
||||
.size = sizeof(struct dccp_v6_pernet),
|
||||
};
|
||||
|
@ -192,24 +192,14 @@ struct icmp_control {
|
||||
|
||||
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
|
||||
|
||||
/*
|
||||
* The ICMP socket(s). This is the most convenient way to flow control
|
||||
* our ICMP output as well as maintain a clean interface throughout
|
||||
* all layers. All Socketless IP sends will soon be gone.
|
||||
*
|
||||
* On SMP we have one ICMP socket per-cpu.
|
||||
*/
|
||||
static struct sock *icmp_sk(struct net *net)
|
||||
{
|
||||
return this_cpu_read(*net->ipv4.icmp_sk);
|
||||
}
|
||||
static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
|
||||
|
||||
/* Called with BH disabled */
|
||||
static inline struct sock *icmp_xmit_lock(struct net *net)
|
||||
{
|
||||
struct sock *sk;
|
||||
|
||||
sk = icmp_sk(net);
|
||||
sk = this_cpu_read(ipv4_icmp_sk);
|
||||
|
||||
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
|
||||
/* This can happen if the output path signals a
|
||||
@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
sock_net_set(sk, net);
|
||||
return sk;
|
||||
}
|
||||
|
||||
static inline void icmp_xmit_unlock(struct sock *sk)
|
||||
{
|
||||
sock_net_set(sk, &init_net);
|
||||
spin_unlock(&sk->sk_lock.slock);
|
||||
}
|
||||
|
||||
@ -363,14 +355,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void icmp_push_reply(struct icmp_bxm *icmp_param,
|
||||
static void icmp_push_reply(struct sock *sk,
|
||||
struct icmp_bxm *icmp_param,
|
||||
struct flowi4 *fl4,
|
||||
struct ipcm_cookie *ipc, struct rtable **rt)
|
||||
{
|
||||
struct sock *sk;
|
||||
struct sk_buff *skb;
|
||||
|
||||
sk = icmp_sk(dev_net((*rt)->dst.dev));
|
||||
if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
|
||||
icmp_param->data_len+icmp_param->head_len,
|
||||
icmp_param->head_len,
|
||||
@ -452,7 +443,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
|
||||
if (IS_ERR(rt))
|
||||
goto out_unlock;
|
||||
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
|
||||
icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
|
||||
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
|
||||
ip_rt_put(rt);
|
||||
out_unlock:
|
||||
icmp_xmit_unlock(sk);
|
||||
@ -766,7 +757,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
|
||||
if (!fl4.saddr)
|
||||
fl4.saddr = htonl(INADDR_DUMMY);
|
||||
|
||||
icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
|
||||
icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
|
||||
ende:
|
||||
ip_rt_put(rt);
|
||||
out_unlock:
|
||||
@ -1434,46 +1425,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
|
||||
},
|
||||
};
|
||||
|
||||
static void __net_exit icmp_sk_exit(struct net *net)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
|
||||
free_percpu(net->ipv4.icmp_sk);
|
||||
net->ipv4.icmp_sk = NULL;
|
||||
}
|
||||
|
||||
static int __net_init icmp_sk_init(struct net *net)
|
||||
{
|
||||
int i, err;
|
||||
|
||||
net->ipv4.icmp_sk = alloc_percpu(struct sock *);
|
||||
if (!net->ipv4.icmp_sk)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct sock *sk;
|
||||
|
||||
err = inet_ctl_sock_create(&sk, PF_INET,
|
||||
SOCK_RAW, IPPROTO_ICMP, net);
|
||||
if (err < 0)
|
||||
goto fail;
|
||||
|
||||
*per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
|
||||
|
||||
/* Enough space for 2 64K ICMP packets, including
|
||||
* sk_buff/skb_shared_info struct overhead.
|
||||
*/
|
||||
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
|
||||
|
||||
/*
|
||||
* Speedup sock_wfree()
|
||||
*/
|
||||
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
|
||||
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
|
||||
}
|
||||
|
||||
/* Control parameters for ECHO replies. */
|
||||
net->ipv4.sysctl_icmp_echo_ignore_all = 0;
|
||||
net->ipv4.sysctl_icmp_echo_enable_probe = 0;
|
||||
@ -1499,18 +1452,36 @@ static int __net_init icmp_sk_init(struct net *net)
|
||||
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
icmp_sk_exit(net);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct pernet_operations __net_initdata icmp_sk_ops = {
|
||||
.init = icmp_sk_init,
|
||||
.exit = icmp_sk_exit,
|
||||
};
|
||||
|
||||
int __init icmp_init(void)
|
||||
{
|
||||
int err, i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct sock *sk;
|
||||
|
||||
err = inet_ctl_sock_create(&sk, PF_INET,
|
||||
SOCK_RAW, IPPROTO_ICMP, &init_net);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
per_cpu(ipv4_icmp_sk, i) = sk;
|
||||
|
||||
/* Enough space for 2 64K ICMP packets, including
|
||||
* sk_buff/skb_shared_info struct overhead.
|
||||
*/
|
||||
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
|
||||
|
||||
/*
|
||||
* Speedup sock_wfree()
|
||||
*/
|
||||
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
|
||||
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
|
||||
}
|
||||
return register_pernet_subsys(&icmp_sk_ops);
|
||||
}
|
||||
|
@ -52,8 +52,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
|
||||
spin_unlock(lock);
|
||||
|
||||
/* Disassociate with bind bucket. */
|
||||
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
|
||||
hashinfo->bhash_size)];
|
||||
bhead = &hashinfo->bhash[tw->tw_bslot];
|
||||
|
||||
spin_lock(&bhead->lock);
|
||||
inet_twsk_bind_unhash(tw, hashinfo);
|
||||
@ -110,8 +109,12 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
|
||||
Note, that any socket with inet->num != 0 MUST be bound in
|
||||
binding cache, even if it is closed.
|
||||
*/
|
||||
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
|
||||
hashinfo->bhash_size)];
|
||||
/* Cache inet_bhashfn(), because 'struct net' might be no longer
|
||||
* available later in inet_twsk_kill().
|
||||
*/
|
||||
tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
|
||||
hashinfo->bhash_size);
|
||||
bhead = &hashinfo->bhash[tw->tw_bslot];
|
||||
spin_lock(&bhead->lock);
|
||||
tw->tw_tb = icsk->icsk_bind_hash;
|
||||
WARN_ON(!icsk->icsk_bind_hash);
|
||||
@ -145,10 +148,6 @@ static void tw_timer_handler(struct timer_list *t)
|
||||
{
|
||||
struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
|
||||
|
||||
if (tw->tw_kill)
|
||||
__NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
|
||||
else
|
||||
__NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
|
||||
inet_twsk_kill(tw);
|
||||
}
|
||||
|
||||
@ -244,8 +243,11 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
|
||||
* of PAWS.
|
||||
*/
|
||||
|
||||
tw->tw_kill = timeo <= 4*HZ;
|
||||
if (!rearm) {
|
||||
bool kill = timeo <= 4*HZ;
|
||||
|
||||
__NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
|
||||
LINUX_MIB_TIMEWAITED);
|
||||
BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
|
||||
atomic_inc(&tw->tw_dr->tw_count);
|
||||
} else {
|
||||
@ -253,50 +255,3 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
|
||||
|
||||
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
|
||||
{
|
||||
struct inet_timewait_sock *tw;
|
||||
struct sock *sk;
|
||||
struct hlist_nulls_node *node;
|
||||
unsigned int slot;
|
||||
|
||||
for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
|
||||
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
|
||||
restart_rcu:
|
||||
cond_resched();
|
||||
rcu_read_lock();
|
||||
restart:
|
||||
sk_nulls_for_each_rcu(sk, node, &head->chain) {
|
||||
if (sk->sk_state != TCP_TIME_WAIT)
|
||||
continue;
|
||||
tw = inet_twsk(sk);
|
||||
if ((tw->tw_family != family) ||
|
||||
refcount_read(&twsk_net(tw)->ns.count))
|
||||
continue;
|
||||
|
||||
if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
|
||||
continue;
|
||||
|
||||
if (unlikely((tw->tw_family != family) ||
|
||||
refcount_read(&twsk_net(tw)->ns.count))) {
|
||||
inet_twsk_put(tw);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
local_bh_disable();
|
||||
inet_twsk_deschedule_put(tw);
|
||||
local_bh_enable();
|
||||
goto restart_rcu;
|
||||
}
|
||||
/* If the nulls value we got at the end of this lookup is
|
||||
* not the expected one, we must restart lookup.
|
||||
* We probably met an item that was moved to another chain.
|
||||
*/
|
||||
if (get_nulls_value(node) != slot)
|
||||
goto restart;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_twsk_purge);
|
||||
|
@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
|
||||
struct inet_hashinfo tcp_hashinfo;
|
||||
EXPORT_SYMBOL(tcp_hashinfo);
|
||||
|
||||
static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
|
||||
|
||||
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
|
||||
{
|
||||
return secure_tcp_seq(ip_hdr(skb)->daddr,
|
||||
@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
||||
arg.tos = ip_hdr(skb)->tos;
|
||||
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
|
||||
local_bh_disable();
|
||||
ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
|
||||
ctl_sk = this_cpu_read(ipv4_tcp_sk);
|
||||
sock_net_set(ctl_sk, net);
|
||||
if (sk) {
|
||||
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
|
||||
inet_twsk(sk)->tw_mark : sk->sk_mark;
|
||||
@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
||||
transmit_time);
|
||||
|
||||
ctl_sk->sk_mark = 0;
|
||||
sock_net_set(ctl_sk, &init_net);
|
||||
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
|
||||
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
|
||||
local_bh_enable();
|
||||
@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
|
||||
arg.tos = tos;
|
||||
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
|
||||
local_bh_disable();
|
||||
ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
|
||||
ctl_sk = this_cpu_read(ipv4_tcp_sk);
|
||||
sock_net_set(ctl_sk, net);
|
||||
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
|
||||
inet_twsk(sk)->tw_mark : sk->sk_mark;
|
||||
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
|
||||
@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
|
||||
transmit_time);
|
||||
|
||||
ctl_sk->sk_mark = 0;
|
||||
sock_net_set(ctl_sk, &init_net);
|
||||
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
|
||||
local_bh_enable();
|
||||
}
|
||||
@ -3111,41 +3117,14 @@ EXPORT_SYMBOL(tcp_prot);
|
||||
|
||||
static void __net_exit tcp_sk_exit(struct net *net)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (net->ipv4.tcp_congestion_control)
|
||||
bpf_module_put(net->ipv4.tcp_congestion_control,
|
||||
net->ipv4.tcp_congestion_control->owner);
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
|
||||
free_percpu(net->ipv4.tcp_sk);
|
||||
}
|
||||
|
||||
static int __net_init tcp_sk_init(struct net *net)
|
||||
{
|
||||
int res, cpu, cnt;
|
||||
|
||||
net->ipv4.tcp_sk = alloc_percpu(struct sock *);
|
||||
if (!net->ipv4.tcp_sk)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct sock *sk;
|
||||
|
||||
res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
|
||||
IPPROTO_TCP, net);
|
||||
if (res)
|
||||
goto fail;
|
||||
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
|
||||
|
||||
/* Please enforce IP_DF and IPID==0 for RST and
|
||||
* ACK sent in SYN-RECV and TIME-WAIT state.
|
||||
*/
|
||||
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
|
||||
|
||||
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
|
||||
}
|
||||
int cnt;
|
||||
|
||||
net->ipv4.sysctl_tcp_ecn = 2;
|
||||
net->ipv4.sysctl_tcp_ecn_fallback = 1;
|
||||
@ -3229,18 +3208,12 @@ static int __net_init tcp_sk_init(struct net *net)
|
||||
net->ipv4.tcp_congestion_control = &tcp_reno;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
tcp_sk_exit(net);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
|
||||
{
|
||||
struct net *net;
|
||||
|
||||
inet_twsk_purge(&tcp_hashinfo, AF_INET);
|
||||
|
||||
list_for_each_entry(net, net_exit_list, exit_list)
|
||||
tcp_fastopen_ctx_destroy(net);
|
||||
}
|
||||
@ -3326,6 +3299,24 @@ static void __init bpf_iter_register(void)
|
||||
|
||||
void __init tcp_v4_init(void)
|
||||
{
|
||||
int cpu, res;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct sock *sk;
|
||||
|
||||
res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
|
||||
IPPROTO_TCP, &init_net);
|
||||
if (res)
|
||||
panic("Failed to create the TCP control socket.\n");
|
||||
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
|
||||
|
||||
/* Please enforce IP_DF and IPID==0 for RST and
|
||||
* ACK sent in SYN-RECV and TIME-WAIT state.
|
||||
*/
|
||||
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
|
||||
|
||||
per_cpu(ipv4_tcp_sk, cpu) = sk;
|
||||
}
|
||||
if (register_pernet_subsys(&tcp_sk_ops))
|
||||
panic("Failed to create the TCP control socket.\n");
|
||||
|
||||
|
@ -69,17 +69,7 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
/*
|
||||
* The ICMP socket(s). This is the most convenient way to flow control
|
||||
* our ICMP output as well as maintain a clean interface throughout
|
||||
* all layers. All Socketless IP sends will soon be gone.
|
||||
*
|
||||
* On SMP we have one ICMP socket per-cpu.
|
||||
*/
|
||||
static struct sock *icmpv6_sk(struct net *net)
|
||||
{
|
||||
return this_cpu_read(*net->ipv6.icmp_sk);
|
||||
}
|
||||
static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
|
||||
|
||||
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
||||
u8 type, u8 code, int offset, __be32 info)
|
||||
@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
|
||||
};
|
||||
|
||||
/* Called with BH disabled */
|
||||
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
|
||||
static struct sock *icmpv6_xmit_lock(struct net *net)
|
||||
{
|
||||
struct sock *sk;
|
||||
|
||||
sk = icmpv6_sk(net);
|
||||
sk = this_cpu_read(ipv6_icmp_sk);
|
||||
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
|
||||
/* This can happen if the output path (f.e. SIT or
|
||||
* ip6ip6 tunnel) signals dst_link_failure() for an
|
||||
@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
sock_net_set(sk, net);
|
||||
return sk;
|
||||
}
|
||||
|
||||
static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
|
||||
static void icmpv6_xmit_unlock(struct sock *sk)
|
||||
{
|
||||
sock_net_set(sk, &init_net);
|
||||
spin_unlock(&sk->sk_lock.slock);
|
||||
}
|
||||
|
||||
@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
|
||||
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
|
||||
}
|
||||
|
||||
static void __net_exit icmpv6_sk_exit(struct net *net)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
|
||||
free_percpu(net->ipv6.icmp_sk);
|
||||
}
|
||||
|
||||
static int __net_init icmpv6_sk_init(struct net *net)
|
||||
int __init icmpv6_init(void)
|
||||
{
|
||||
struct sock *sk;
|
||||
int err, i;
|
||||
|
||||
net->ipv6.icmp_sk = alloc_percpu(struct sock *);
|
||||
if (!net->ipv6.icmp_sk)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
err = inet_ctl_sock_create(&sk, PF_INET6,
|
||||
SOCK_RAW, IPPROTO_ICMPV6, net);
|
||||
SOCK_RAW, IPPROTO_ICMPV6, &init_net);
|
||||
if (err < 0) {
|
||||
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
|
||||
err);
|
||||
goto fail;
|
||||
return err;
|
||||
}
|
||||
|
||||
*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
|
||||
per_cpu(ipv6_icmp_sk, i) = sk;
|
||||
|
||||
/* Enough space for 2 64K ICMP packets, including
|
||||
* sk_buff struct overhead.
|
||||
*/
|
||||
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
|
||||
}
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
icmpv6_sk_exit(net);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct pernet_operations icmpv6_sk_ops = {
|
||||
.init = icmpv6_sk_init,
|
||||
.exit = icmpv6_sk_exit,
|
||||
};
|
||||
|
||||
int __init icmpv6_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = register_pernet_subsys(&icmpv6_sk_ops);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = -EAGAIN;
|
||||
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
|
||||
@ -1101,14 +1061,12 @@ sender_reg_err:
|
||||
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
|
||||
fail:
|
||||
pr_err("Failed to register ICMP6 protocol\n");
|
||||
unregister_pernet_subsys(&icmpv6_sk_ops);
|
||||
return err;
|
||||
}
|
||||
|
||||
void icmpv6_cleanup(void)
|
||||
{
|
||||
inet6_unregister_icmp_sender(icmp6_send);
|
||||
unregister_pernet_subsys(&icmpv6_sk_ops);
|
||||
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
|
||||
}
|
||||
|
||||
|
@ -2237,15 +2237,9 @@ static void __net_exit tcpv6_net_exit(struct net *net)
|
||||
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
|
||||
}
|
||||
|
||||
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
|
||||
{
|
||||
inet_twsk_purge(&tcp_hashinfo, AF_INET6);
|
||||
}
|
||||
|
||||
static struct pernet_operations tcpv6_net_ops = {
|
||||
.init = tcpv6_net_init,
|
||||
.exit = tcpv6_net_exit,
|
||||
.exit_batch = tcpv6_net_exit_batch,
|
||||
};
|
||||
|
||||
int __init tcpv6_init(void)
|
||||
|
Loading…
x
Reference in New Issue
Block a user