From 98f6c533a3e98f21305575f0cf87cdb6c2210c43 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:02 +0300 Subject: [PATCH 01/32] net: Assign net to net_namespace_list in setup_net() This patch merges two repeating pieces of code in one, and they will live in setup_net() now. The only change is that assignment: init_net_initialized = true; becomes reordered with: list_add_tail_rcu(&net->list, &net_namespace_list); The order does not have visible effect, and it is a simple cleanup because of: init_net_initialized is used in !CONFIG_NET_NS case to order proc_net_ns_ops registration occuring at boot time: start_kernel()->proc_root_init()->proc_net_init(), with net_ns_init()->setup_net(&init_net, &init_user_ns) also occuring in boot time from the same init_task. When there are no another tasks to race with them, for the single task it does not matter, which order two sequential independent loads should be made. So we make them reordered. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/net_namespace.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3cad5f51afd3..1180c217895a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -303,6 +303,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) if (error < 0) goto out_undo; } + rtnl_lock(); + list_add_tail_rcu(&net->list, &net_namespace_list); + rtnl_unlock(); out: return error; @@ -424,11 +427,6 @@ struct net *copy_net_ns(unsigned long flags, net->ucounts = ucounts; rv = setup_net(net, user_ns); - if (rv == 0) { - rtnl_lock(); - list_add_tail_rcu(&net->list, &net_namespace_list); - rtnl_unlock(); - } mutex_unlock(&net_mutex); if (rv < 0) { dec_net_namespaces(ucounts); @@ -880,11 +878,6 @@ static int __init net_ns_init(void) panic("Could not setup the initial network namespace"); init_net_initialized = true; - - rtnl_lock(); - list_add_tail_rcu(&init_net.list, &net_namespace_list); - rtnl_unlock(); - mutex_unlock(&net_mutex); register_pernet_subsys(&net_ns_ops); From 5ba049a5cc8e24a1643df75bbf65b4efa070fa74 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:13 +0300 Subject: [PATCH 02/32] net: Cleanup in copy_net_ns() Line up destructors actions in the revers order to constructors. Next patches will add more actions, and this will be comfortable, if there is the such order. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/net_namespace.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1180c217895a..81384386f91b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -411,27 +411,25 @@ struct net *copy_net_ns(unsigned long flags, net = net_alloc(); if (!net) { - dec_net_namespaces(ucounts); - return ERR_PTR(-ENOMEM); + rv = -ENOMEM; + goto dec_ucounts; } - + refcount_set(&net->passive, 1); + net->ucounts = ucounts; get_user_ns(user_ns); rv = mutex_lock_killable(&net_mutex); - if (rv < 0) { - net_free(net); - dec_net_namespaces(ucounts); - put_user_ns(user_ns); - return ERR_PTR(rv); - } + if (rv < 0) + goto put_userns; - net->ucounts = ucounts; rv = setup_net(net, user_ns); mutex_unlock(&net_mutex); if (rv < 0) { - dec_net_namespaces(ucounts); +put_userns: put_user_ns(user_ns); net_drop_ns(net); +dec_ucounts: + dec_net_namespaces(ucounts); return ERR_PTR(rv); } return net; From 1a57feb847c56d6193f67d0e892c24e71f9e3ab1 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:23 +0300 Subject: [PATCH 03/32] net: Introduce net_sem for protection of pernet_list Currently, the mutex is mostly used to protect pernet operations list. It orders setup_net() and cleanup_net() with parallel {un,}register_pernet_operations() calls, so ->exit{,batch} methods of the same pernet operations are executed for a dying net, as were used to call ->init methods, even after the net namespace is unlinked from net_namespace_list in cleanup_net(). But there are several problems with scalability. The first one is that more than one net can't be created or destroyed at the same moment on the node. For big machines with many cpus running many containers it's very sensitive. The second one is that it's need to synchronize_rcu() after net is removed from net_namespace_list(): Destroy net_ns: cleanup_net() mutex_lock(&net_mutex) list_del_rcu(&net->list) synchronize_rcu() <--- Sleep there for ages list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list) list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list) mutex_unlock(&net_mutex) This primitive is not fast, especially on the systems with many processors and/or when preemptible RCU is enabled in config. So, all the time, while cleanup_net() is waiting for RCU grace period, creation of new net namespaces is not possible, the tasks, who makes it, are sleeping on the same mutex: Create net_ns: copy_net_ns() mutex_lock_killable(&net_mutex) <--- Sleep there for ages I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop with preemptible RCU enabled after CRIU tests round is finished. The solution is to convert net_mutex to the rw_semaphore and add fine grain locks to really small number of pernet_operations, what really need them. Then, pernet_operations::init/::exit methods, modifying the net-related data, will require down_read() locking only, while down_write() will be used for changing pernet_list (i.e., when modules are being loaded and unloaded). This gives signify performance increase, after all patch set is applied, like you may see here: %for i in {1..10000}; do unshare -n bash -c exit; done *before* real 1m40,377s user 0m9,672s sys 0m19,928s *after* real 0m17,007s user 0m5,311s sys 0m11,779 (5.8 times faster) This patch starts replacing net_mutex to net_sem. It adds rw_semaphore, describes the variables it protects, and makes to use, where appropriate. net_mutex is still present, and next patches will kick it out step-by-step. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/core/net_namespace.c | 39 ++++++++++++++++++++++++++------------- net/core/rtnetlink.c | 4 ++-- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1fdcde96eb65..e9ee9ad0a681 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -36,6 +36,7 @@ extern int rtnl_is_locked(void); extern wait_queue_head_t netdev_unregistering_wq; extern struct mutex net_mutex; +extern struct rw_semaphore net_sem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 81384386f91b..e89b2b7abd36 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -41,6 +41,11 @@ struct net init_net = { EXPORT_SYMBOL(init_net); static bool init_net_initialized; +/* + * net_sem: protects: pernet_list, net_generic_ids, + * init_net_initialized and first_device pointer. + */ +DECLARE_RWSEM(net_sem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) @@ -286,7 +291,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) */ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { - /* Must be called with net_mutex held */ + /* Must be called with net_sem held */ const struct pernet_operations *ops, *saved_ops; int error = 0; LIST_HEAD(net_exit_list); @@ -418,12 +423,16 @@ struct net *copy_net_ns(unsigned long flags, net->ucounts = ucounts; get_user_ns(user_ns); - rv = mutex_lock_killable(&net_mutex); + rv = down_read_killable(&net_sem); if (rv < 0) goto put_userns; - + rv = mutex_lock_killable(&net_mutex); + if (rv < 0) + goto up_read; rv = setup_net(net, user_ns); mutex_unlock(&net_mutex); +up_read: + up_read(&net_sem); if (rv < 0) { put_userns: put_user_ns(user_ns); @@ -477,6 +486,7 @@ static void cleanup_net(struct work_struct *work) list_replace_init(&cleanup_list, &net_kill_list); spin_unlock_irq(&cleanup_list_lock); + down_read(&net_sem); mutex_lock(&net_mutex); /* Don't let anyone else find us. */ @@ -517,6 +527,7 @@ static void cleanup_net(struct work_struct *work) ops_free_list(ops, &net_exit_list); mutex_unlock(&net_mutex); + up_read(&net_sem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. @@ -543,8 +554,10 @@ static void cleanup_net(struct work_struct *work) */ void net_ns_barrier(void) { + down_write(&net_sem); mutex_lock(&net_mutex); mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL(net_ns_barrier); @@ -871,12 +884,12 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); - mutex_lock(&net_mutex); + down_write(&net_sem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); init_net_initialized = true; - mutex_unlock(&net_mutex); + up_write(&net_sem); register_pernet_subsys(&net_ns_ops); @@ -1016,9 +1029,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops) int register_pernet_subsys(struct pernet_operations *ops) { int error; - mutex_lock(&net_mutex); + down_write(&net_sem); error = register_pernet_operations(first_device, ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); return error; } EXPORT_SYMBOL_GPL(register_pernet_subsys); @@ -1034,9 +1047,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); */ void unregister_pernet_subsys(struct pernet_operations *ops) { - mutex_lock(&net_mutex); + down_write(&net_sem); unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -1062,11 +1075,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys); int register_pernet_device(struct pernet_operations *ops) { int error; - mutex_lock(&net_mutex); + down_write(&net_sem); error = register_pernet_operations(&pernet_list, ops); if (!error && (first_device == &pernet_list)) first_device = &ops->list; - mutex_unlock(&net_mutex); + up_write(&net_sem); return error; } EXPORT_SYMBOL_GPL(register_pernet_device); @@ -1082,11 +1095,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device); */ void unregister_pernet_device(struct pernet_operations *ops) { - mutex_lock(&net_mutex); + down_write(&net_sem); if (&ops->list == first_device) first_device = first_device->next; unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bc290413a49d..257e7bbaffba 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void) void rtnl_link_unregister(struct rtnl_link_ops *ops) { /* Close the race with cleanup_net() */ - mutex_lock(&net_mutex); + down_write(&net_sem); rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); From bcab1ddd9b2b105390712a9c1605bdb20a7f9a03 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:33 +0300 Subject: [PATCH 04/32] net: Move mutex_unlock() in cleanup_net() up net_sem protects from pernet_list changing, while ops_free_list() makes simple kfree(), and it can't race with other pernet_operations callbacks. So we may release net_mutex earlier then it was. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/net_namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e89b2b7abd36..f8453c438798 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -522,11 +522,12 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); + mutex_unlock(&net_mutex); + /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); - mutex_unlock(&net_mutex); up_read(&net_sem); /* Ensure there are no outstanding rcu callbacks using this From 447cd7a0d7d1e5b4486e99cce289654fec9951e3 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:44 +0300 Subject: [PATCH 05/32] net: Allow pernet_operations to be executed in parallel This adds new pernet_operations::async flag to indicate operations, which ->init(), ->exit() and ->exit_batch() methods are allowed to be executed in parallel with the methods of any other pernet_operations. When there are only asynchronous pernet_operations in the system, net_mutex won't be taken for a net construction and destruction. Also, remove BUG_ON(mutex_is_locked()) from net_assign_generic() without replacing with the equivalent net_sem check, as there is one more lockdep assert below. v3: Add comment near net_mutex. Suggested-by: Eric W. Biederman Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- include/net/net_namespace.h | 6 ++++++ net/core/net_namespace.c | 30 ++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f306b2aa15a4..9158ec1ad06f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -313,6 +313,12 @@ struct pernet_operations { void (*exit_batch)(struct list_head *net_exit_list); unsigned int *id; size_t size; + /* + * Indicates above methods are allowed to be executed in parallel + * with methods of any other pernet_operations, i.e. they are not + * need synchronization via net_mutex. + */ + bool async; }; /* diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f8453c438798..2a01ff32d9c7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,6 +29,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; +/* Used only if there are !async pernet_operations registered */ DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); @@ -41,8 +42,9 @@ struct net init_net = { EXPORT_SYMBOL(init_net); static bool init_net_initialized; +static unsigned nr_sync_pernet_ops; /* - * net_sem: protects: pernet_list, net_generic_ids, + * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops, * init_net_initialized and first_device pointer. */ DECLARE_RWSEM(net_sem); @@ -70,11 +72,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) { struct net_generic *ng, *old_ng; - BUG_ON(!mutex_is_locked(&net_mutex)); BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, - lockdep_is_held(&net_mutex)); + lockdep_is_held(&net_sem)); if (old_ng->s.len > id) { old_ng->ptr[id] = data; return 0; @@ -426,11 +427,14 @@ struct net *copy_net_ns(unsigned long flags, rv = down_read_killable(&net_sem); if (rv < 0) goto put_userns; - rv = mutex_lock_killable(&net_mutex); - if (rv < 0) - goto up_read; + if (nr_sync_pernet_ops) { + rv = mutex_lock_killable(&net_mutex); + if (rv < 0) + goto up_read; + } rv = setup_net(net, user_ns); - mutex_unlock(&net_mutex); + if (nr_sync_pernet_ops) + mutex_unlock(&net_mutex); up_read: up_read(&net_sem); if (rv < 0) { @@ -487,7 +491,8 @@ static void cleanup_net(struct work_struct *work) spin_unlock_irq(&cleanup_list_lock); down_read(&net_sem); - mutex_lock(&net_mutex); + if (nr_sync_pernet_ops) + mutex_lock(&net_mutex); /* Don't let anyone else find us. */ rtnl_lock(); @@ -522,7 +527,8 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); - mutex_unlock(&net_mutex); + if (nr_sync_pernet_ops) + mutex_unlock(&net_mutex); /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) @@ -994,6 +1000,9 @@ again: rcu_barrier(); if (ops->id) ida_remove(&net_generic_ids, *ops->id); + } else if (!ops->async) { + pr_info_once("Pernet operations %ps are sync.\n", ops); + nr_sync_pernet_ops++; } return error; @@ -1001,7 +1010,8 @@ again: static void unregister_pernet_operations(struct pernet_operations *ops) { - + if (!ops->async) + BUG_ON(nr_sync_pernet_ops-- == 0); __unregister_pernet_operations(ops); rcu_barrier(); if (ops->id) From f039e184bc45227553e645fb1c7021d69a5262b4 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:52 +0300 Subject: [PATCH 06/32] net: Convert proc_net_ns_ops This patch starts to convert pernet_subsys, registered before initcalls. proc_net_ns_ops::proc_net_ns_init()/proc_net_ns_exit() {un,}register pernet net->proc_net and ->proc_net_stat. Constructors and destructors of another pernet_operations are not interested in foreign net's proc_net and proc_net_stat. Proc filesystem privitives are synchronized on proc_subdir_lock. So, proc_net_ns_ops methods are able to be executed in parallel with methods of any other pernet operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 68c06ae7888c..da6f8733c9c5 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net) static struct pernet_operations __net_initdata proc_net_ns_ops = { .init = proc_net_ns_init, .exit = proc_net_ns_exit, + .async = true, }; int __init proc_net_init(void) From 3fc3b827f0c4397c74d4b8a8a06d71b903a4982f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:27:03 +0300 Subject: [PATCH 07/32] net: Convert net_ns_ops methods This patch starts to convert pernet_subsys, registered from pure initcalls. net_ns_ops::net_ns_net_init/net_ns_net_init, methods use only ida_simple_* functions, which are not need a synchronization. They are synchronized by idr subsystem. So, net_ns_ops methods are able to be executed in parallel with methods of other pernet operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/net_namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2a01ff32d9c7..e21c564c8c00 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -649,6 +649,7 @@ static __net_exit void net_ns_net_exit(struct net *net) static struct pernet_operations __net_initdata net_ns_ops = { .init = net_ns_net_init, .exit = net_ns_net_exit, + .async = true, }; static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { From 93d230fe0762bf129fb4debc944af3e1c1e8f40e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:27:13 +0300 Subject: [PATCH 08/32] net: Convert sysctl_pernet_ops This patch starts to convert pernet_subsys, registered from core initcalls. Methods sysctl_net_init() and sysctl_net_exit() initialize net::sysctls table of a namespace. pernet_operations::init()/exit() methods from the rest of the list do not touch net::sysctls of strangers, so it's safe to execute sysctl_pernet_ops's methods in parallel with any other pernet_operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/sysctl_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9aed6fe1bf1a..f424539829b7 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net) static struct pernet_operations sysctl_pernet_ops = { .init = sysctl_net_init, .exit = sysctl_net_exit, + .async = true, }; static struct ctl_table_header *net_header; From 9549929923738d14dc450f0427a4b98ac3e8aff2 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:27:23 +0300 Subject: [PATCH 09/32] net: Convert netfilter_net_ops Methods netfilter_net_init() and netfilter_net_exit() initialize net::nf::hooks and change net-related proc directory of net. Another pernet_operations are not interested in forein net::nf::hooks or proc entries, so it's safe to make them executed in parallel with methods of other pernet operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/netfilter/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0f6b8172fb9a..d72cc786c7b7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net) static struct pernet_operations netfilter_net_ops = { .init = netfilter_net_init, .exit = netfilter_net_exit, + .async = true, }; int __init netfilter_init(void) From c9d8fb91351fe1514731b7f6d24d4decc0028978 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:27:31 +0300 Subject: [PATCH 10/32] net: Convert nf_log_net_ops The pernet_operations would have had a problem in parallel execution with others, if init_net had been able to released. But it's not, and the rest is safe for that. There is memory allocation, which nobody else interested in, and sysctl registration. So, we make them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/netfilter/nf_log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index c2c1b16b7538..1ba3da51050d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net) static struct pernet_operations nf_log_net_ops = { .init = nf_log_net_init, .exit = nf_log_net_exit, + .async = true, }; int __init netfilter_log_init(void) From 604da74e4fc1c2afc50dc7a1850acfd9aff2b58d Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:27:41 +0300 Subject: [PATCH 11/32] net: Convert net_inuse_ops net_inuse_ops methods expose statistics in /proc. No one from the rest of pernet_subsys or pernet_device lists touch net::core::inuse. So, it's safe to make net_inuse_ops async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index 04e5e27c9b81..f2bf69b86c58 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3112,6 +3112,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net) static struct pernet_operations net_inuse_ops = { .init = sock_inuse_init_net, .exit = sock_inuse_exit_net, + .async = true, }; static __init int net_inuse_init(void) From ff291d005a988aaa7d73daf44c3d04585e9f0637 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:27:51 +0300 Subject: [PATCH 12/32] net: Convert net_defaults_ops net_defaults_ops introduce only net_defaults_init_net method, and it acts on net::core::sysctl_somaxconn, which is not interesting for the rest of pernet_subsys and pernet_device lists. Then, make them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/net_namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e21c564c8c00..bcab9a938d6f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -340,6 +340,7 @@ static int __net_init net_defaults_init_net(struct net *net) static struct pernet_operations net_defaults_ops = { .init = net_defaults_init_net, + .async = true, }; static __init int net_defaults_init(void) From 194b95d2166661f890dca5ef3b952c73622eb4e5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:28:00 +0300 Subject: [PATCH 13/32] net: Convert netlink_net_ops The methods of netlink_net_ops create and destroy "netlink" file, which are not interesting for foreigh pernet_operations. So, netlink_net_ops may safely be made async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c8af14330b5..b3065908e146 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2723,6 +2723,7 @@ static void __init netlink_add_usersock_entry(void) static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, + .async = true, }; static inline u32 netlink_hash(const void *data, u32 len, u32 seed) From 46456675ec1b7f93dadd2b1b4b58d763c3ae9266 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:28:15 +0300 Subject: [PATCH 14/32] net: Convert rtnetlink_net_ops rtnetlink_net_init() and rtnetlink_net_exit() create and destroy netlink socket net::rtnl. The socket is used to send rtnl notification via rtnl_net_notifyid(). There is no a problem to create and destroy it in parallel with other pernet operations, as we link net in setup_net() after the socket is created, and destroy in cleanup_net() after net is unhashed from all the lists and there is no RCU references on it. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 257e7bbaffba..67f375cfb982 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net) static struct pernet_operations rtnetlink_net_ops = { .init = rtnetlink_net_init, .exit = rtnetlink_net_exit, + .async = true, }; void __init rtnetlink_init(void) From 906f63ec1d1b4941d36eee18121e68749c9e3279 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:28:24 +0300 Subject: [PATCH 15/32] net: Convert audit_net_ops This patch starts to convert pernet_subsys, registered from postcore initcalls. audit_net_init() creates netlink socket, while audit_net_exit() destroys it. The rest of the pernet_list are not interested in the socket, so we make audit_net_ops async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- kernel/audit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/audit.c b/kernel/audit.c index 227db99b0f19..5e49b614d0e6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = { .exit = audit_net_exit, .id = &audit_net_id, .size = sizeof(struct audit_net), + .async = true, }; /* Initialize audit support at boot time. */ From 15898a011b3d0390869f31167c4403835bc04954 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:28:33 +0300 Subject: [PATCH 16/32] net: Convert uevent_net_ops uevent_net_init() and uevent_net_exit() create and destroy netlink socket, and these actions serialized in netlink code. Parallel execution with other pernet_operations makes the socket disappear earlier from uevent_sock_list on ->exit. As userspace can't be interested in broadcast messages of dying net, and, as I see, no one in kernel listen them, we may safely make uevent_net_ops async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9fe6ec8fda28..9539d7ab3ea8 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -650,6 +650,7 @@ found: static struct pernet_operations uevent_net_ops = { .init = uevent_net_init, .exit = uevent_net_exit, + .async = true, }; static int __init kobject_uevent_init(void) From 36b0068e6c9892aa4757d4fa08fd14fbba72b3b3 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:28:44 +0300 Subject: [PATCH 17/32] net: Convert proto_net_ops This patch starts to convert pernet_subsys, registered from subsys initcalls. It seems safe to be executed in parallel with others, as it's only creates/destoyes proc entry, which nobody else is not interested in. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index f2bf69b86c58..e90d461748f0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3386,6 +3386,7 @@ static __net_exit void proto_exit_net(struct net *net) static __net_initdata struct pernet_operations proto_net_ops = { .init = proto_init_net, .exit = proto_exit_net, + .async = true, }; static int __init proto_init(void) From 88b8ffebdb4d0f4da4e9a8383c8478c32372b42b Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:28:54 +0300 Subject: [PATCH 18/32] net: Convert pernet_subsys ops, registered via net_dev_init() There are: 1)dev_proc_ops and dev_mc_net_ops, which create and destroy pernet proc file and not interesting for another net namespaces; 2)netdev_net_ops, which creates pernet hashes, which are not touched by another pernet_operations. So, make them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/core/net-procfs.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index dda9d7b9a840..dc7506f00a66 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8833,6 +8833,7 @@ static void __net_exit netdev_exit(struct net *net) static struct pernet_operations __net_initdata netdev_net_ops = { .init = netdev_init, .exit = netdev_exit, + .async = true, }; static void __net_exit default_device_exit(struct net *net) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index e010bb800d7b..65b51e778782 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, + .async = true, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) @@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, + .async = true, }; int __init dev_proc_init(void) From 86b63418fd382b095fdac4408fd565aa5da4b036 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:29:03 +0300 Subject: [PATCH 19/32] net: Convert fib_* pernet_operations, registered via subsys_initcall Both of them create and initialize lists, which are not touched by another foreing pernet_operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/fib_notifier.c | 1 + net/core/fib_rules.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 0c048bdeb016..5ace0705a3f9 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net) static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, .exit = fib_notifier_net_exit, + .async = true, }; static int __init fib_notifier_init(void) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98e1066c3d55..cb071b8e8d17 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -1030,6 +1030,7 @@ static void __net_exit fib_rules_net_exit(struct net *net) static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, .exit = fib_rules_net_exit, + .async = true, }; static int __init fib_rules_init(void) From 13da199c38ee7f33a1c42db62647118f9f9f527c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:29:13 +0300 Subject: [PATCH 20/32] net: Convert subsys_initcall() registered pernet_operations from net/sched psched_net_ops only creates and destroyes /proc entry, and safe to be executed in parallel with any foreigh pernet_operations. tcf_action_net_ops initializes and destructs tcf_action_net::egdev_ht, which is not touched by foreign pernet_operations. So, make them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/sched/act_api.c | 1 + net/sched/sch_api.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index eba6682727dd..4886ea4a7d6e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1454,6 +1454,7 @@ static struct pernet_operations tcf_action_net_ops = { .exit = tcf_action_net_exit, .id = &tcf_action_net_id, .size = sizeof(struct tcf_action_net), + .async = true, }; static int __init tc_action_init(void) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d512f49ee83c..27e672c12492 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2128,6 +2128,7 @@ static void __net_exit psched_net_exit(struct net *net) static struct pernet_operations psched_net_ops = { .init = psched_net_init, .exit = psched_net_exit, + .async = true, }; static int __init pktsched_init(void) From 83caf62c867bed2637d4f3713839b0c414d6d966 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:29:23 +0300 Subject: [PATCH 21/32] net: Convert genl_pernet_ops This pernet_operations create and destroy net::genl_sock. Foreign pernet_operations don't touch it. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6f02499ef007..a6f63a5faee7 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net) static struct pernet_operations genl_pernet_ops = { .init = genl_pernet_init, .exit = genl_pernet_exit, + .async = true, }; static int __init genl_init(void) From 6c0075d0f6ccf5be4527408830852106808ab2bb Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:29:33 +0300 Subject: [PATCH 22/32] net: Convert wext_pernet_ops These pernet_operations initialize and purge net::wext_nlevents queue, and are not touched by foreign pernet_operations. Mark them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/wireless/wext-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 9efbfc753347..bc7064486b15 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net) static struct pernet_operations wext_pernet_ops = { .init = wext_pernet_init, .exit = wext_pernet_exit, + .async = true, }; static int __init wireless_nlevent_init(void) From 232cf06c611f57ccb8e321de3fd850f21215ede8 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:29:42 +0300 Subject: [PATCH 23/32] net: Convert sysctl_core_ops These pernet_operations register and destroy sysctl directory, and it's not interesting for foreign pernet_operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f2d0462611c3..d714f65782b7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -572,6 +572,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_core_ops = { .init = sysctl_core_net_init, .exit = sysctl_core_net_exit, + .async = true, }; static __init int sysctl_core_init(void) From f84c6821aa540342360067604ad156e3d53a67ed Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:29:52 +0300 Subject: [PATCH 24/32] net: Convert pernet_subsys, registered from inet_init() arp_net_ops just addr/removes /proc entry. devinet_ops allocates and frees duplicate of init_net tables and (un)registers sysctl entries. fib_net_ops allocates and frees pernet tables, creates/destroys netlink socket and (un)initializes /proc entries. Foreign pernet_operations do not touch them. ip_rt_proc_ops only modifies pernet /proc entries. xfrm_net_ops creates/destroys /proc entries, allocates/frees pernet statistics, hashes and tables, and (un)initializes sysctl files. These are not touched by foreigh pernet_operations xfrm4_net_ops allocates/frees private pernet memory, and configures sysctls. sysctl_route_ops creates/destroys sysctls. rt_genid_ops only initializes fields of just allocated net. ipv4_inetpeer_ops allocated/frees net private memory. igmp_net_ops just creates/destroys /proc files and socket, noone else interested in. tcp_sk_ops seems to be safe, because tcp_sk_init() does not depend on any other pernet_operations modifications. Iteration over hash table in inet_twsk_purge() is made under RCU lock, and it's safe to iterate the table this way. Removing from the table happen from inet_twsk_deschedule_put(), but this function is safe without any extern locks, as it's synchronized inside itself. There are many examples, it's used in different context. So, it's safe to leave tcp_sk_exit_batch() unlocked. tcp_net_metrics_ops is synchronized on tcp_metrics_lock and safe. udplite4_net_ops only creates/destroys pernet /proc file. icmp_sk_ops creates percpu sockets, not touched by foreign pernet_operations. ipmr_net_ops creates/destroys pernet fib tables, (un)registers fib rules and /proc files. This seem to be safe to execute in parallel with foreign pernet_operations. af_inet_ops just sets up default parameters of newly created net. ipv4_mib_ops creates and destroys pernet percpu statistics. raw_net_ops, tcp4_net_ops, udp4_net_ops, ping_v4_net_ops and ip_proc_ops only create/destroy pernet /proc files. ip4_frags_ops creates and destroys sysctl file. So, it's safe to make the pernet_operations async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 ++ net/ipv4/arp.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv4/icmp.c | 1 + net/ipv4/igmp.c | 1 + net/ipv4/ip_fragment.c | 1 + net/ipv4/ipmr.c | 1 + net/ipv4/ping.c | 1 + net/ipv4/proc.c | 1 + net/ipv4/raw.c | 1 + net/ipv4/route.c | 4 ++++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_metrics.c | 1 + net/ipv4/udp.c | 1 + net/ipv4/udplite.c | 1 + net/ipv4/xfrm4_policy.c | 1 + net/xfrm/xfrm_policy.c | 1 + 18 files changed, 23 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f98e2f0db841..e8c7fad8c329 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1735,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_mib_ops = { .init = ipv4_mib_init_net, .exit = ipv4_mib_exit_net, + .async = true, }; static int __init init_ipv4_mibs(void) @@ -1788,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net) static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, .exit = inet_exit_net, + .async = true, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f28f06c91ead..7dc9de8444a9 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net) static struct pernet_operations arp_net_ops = { .init = arp_net_init, .exit = arp_net_exit, + .async = true, }; static int __init arp_proc_init(void) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 40f001782c1b..5ae0d1f097ca 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net) static __net_initdata struct pernet_operations devinet_ops = { .init = devinet_init_net, .exit = devinet_exit_net, + .async = true, }; static struct rtnl_af_ops inet_af_ops __read_mostly = { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f05afaf3235c..ac71c3d496c0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net) static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, + .async = true, }; void __init ip_fib_init(void) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1617604c9284..cc56efa64d5c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1257,6 +1257,7 @@ fail: static struct pernet_operations __net_initdata icmp_sk_ops = { .init = icmp_sk_init, .exit = icmp_sk_exit, + .async = true, }; int __init icmp_init(void) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f2402581fef1..c2743763777e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net) static struct pernet_operations igmp_net_ops = { .init = igmp_net_init, .exit = igmp_net_exit, + .async = true, }; #endif diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbf1b94942c0..5e843ae5e468 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net) static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .exit = ipv4_frags_exit_net, + .async = true, }; void __init ipfrag_init(void) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b05689bbba31..7c7ac9d32e77 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3327,6 +3327,7 @@ static void __net_exit ipmr_net_exit(struct net *net) static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, + .async = true, }; int __init ip_mr_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b8f0db54b197..0164def9c808 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net) static struct pernet_operations ping_v4_net_ops = { .init = ping_v4_proc_init_net, .exit = ping_v4_proc_exit_net, + .async = true, }; int __init ping_proc_init(void) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index dc5edc8f7564..fdabc70283b6 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -549,6 +549,7 @@ static __net_exit void ip_proc_exit_net(struct net *net) static __net_initdata struct pernet_operations ip_proc_ops = { .init = ip_proc_init_net, .exit = ip_proc_exit_net, + .async = true, }; int __init ip_misc_proc_init(void) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9b367fc48d7d..54648d20bf0f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1156,6 +1156,7 @@ static __net_exit void raw_exit_net(struct net *net) static __net_initdata struct pernet_operations raw_net_ops = { .init = raw_init_net, .exit = raw_exit_net, + .async = true, }; int __init raw_proc_init(void) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1df1ba..9376ed69ffeb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -417,6 +417,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) static struct pernet_operations ip_rt_proc_ops __net_initdata = { .init = ip_rt_do_proc_init, .exit = ip_rt_do_proc_exit, + .async = true, }; static int __init ip_rt_proc_init(void) @@ -2994,6 +2995,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_route_ops = { .init = sysctl_route_net_init, .exit = sysctl_route_net_exit, + .async = true, }; #endif @@ -3007,6 +3009,7 @@ static __net_init int rt_genid_init(struct net *net) static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, + .async = true, }; static int __net_init ipv4_inetpeer_init(struct net *net) @@ -3032,6 +3035,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net) static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { .init = ipv4_inetpeer_init, .exit = ipv4_inetpeer_exit, + .async = true, }; #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8ad397e285e..ac16795486ea 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2387,6 +2387,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net) static struct pernet_operations tcp4_net_ops = { .init = tcp4_proc_init_net, .exit = tcp4_proc_exit_net, + .async = true, }; int __init tcp4_proc_init(void) @@ -2573,6 +2574,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { .init = tcp_sk_init, .exit = tcp_sk_exit, .exit_batch = tcp_sk_exit_batch, + .async = true, }; void __init tcp_v4_init(void) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 03b51cdcc731..aa6fea9f3328 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis static __net_initdata struct pernet_operations tcp_net_metrics_ops = { .init = tcp_net_metrics_init, .exit_batch = tcp_net_metrics_exit_batch, + .async = true, }; void __init tcp_metrics_init(void) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bfaefe560b5c..ac5fac0e59b1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2757,6 +2757,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net) static struct pernet_operations udp4_net_ops = { .init = udp4_proc_init_net, .exit = udp4_proc_exit_net, + .async = true, }; int __init udp4_proc_init(void) diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index f96614e9b9a5..72f2c3806408 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net) static struct pernet_operations udplite4_net_ops = { .init = udplite4_proc_init_net, .exit = udplite4_proc_exit_net, + .async = true, }; static __init int udplite4_proc_init(void) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 05017e2c849c..753f526cf9db 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -365,6 +365,7 @@ static void __net_exit xfrm4_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, + .async = true, }; static void __init xfrm4_policy_init(void) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7a23078132cf..77d9d1ab05ce 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2982,6 +2982,7 @@ static void __net_exit xfrm_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, .exit = xfrm_net_exit, + .async = true, }; void __init xfrm_init(void) From 167f7ac723e5b4ea22c44a0bd8e357bb76a68cd2 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:00 +0300 Subject: [PATCH 25/32] net: Convert unix_net_ops These pernet_operations are just create and destroy /proc and sysctl entries, and are not touched by foreign pernet_operations. So, we are able to make them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 723698416242..e3eb8806b3e4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net) static struct pernet_operations unix_net_ops = { .init = unix_net_init, .exit = unix_net_exit, + .async = true, }; static int __init af_unix_init(void) From cb5e3400e78598e1eb872954516a02ba85926d84 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:08 +0300 Subject: [PATCH 26/32] net: Convert packet_net_ops These pernet_operations just create and destroy /proc entry, and another operations do not touch it. Also, nobody else are interested in foreign net::packet::sklist. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/packet/af_packet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 616cb9c18f88..2c5a6fe5d749 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4557,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net) static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, + .async = true, }; From 22769a2a6e93a17d08e1cd7a2de2749088887b87 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:18 +0300 Subject: [PATCH 27/32] net: Convert ipv4_sysctl_ops These pernet_operations create and destroy sysctl, which are not touched by anybody else. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 93e172118a94..89683d868b37 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_sysctl_ops = { .init = ipv4_sysctl_init_net, .exit = ipv4_sysctl_exit_net, + .async = true, }; static __init int sysctl_ipv4_init(void) From 0bc9be67185ec90feedc971af6e7b84ab932703a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:27 +0300 Subject: [PATCH 28/32] net: Convert addrconf_ops These pernet_operations (un)register sysctl, which are not touched by anybody else. So, it's safe to make them async. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1846b97ee69..8c17f8d8d5d9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6550,6 +6550,7 @@ static void __net_exit addrconf_exit_net(struct net *net) static struct pernet_operations addrconf_ops = { .init = addrconf_init_net, .exit = addrconf_exit_net, + .async = true, }; static struct rtnl_af_ops inet6_ops __read_mostly = { From 9a4d105de78488526d8b0d6cdc5f2c22f122ca4a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:35 +0300 Subject: [PATCH 29/32] net: Convert loopback_net_ops These pernet_operations have only init() method. It allocates memory for net_device, calls register_netdev() and assigns net::loopback_dev. register_netdev() is allowed be used without additional locks, as it's synchronized on rtnl_lock(). There are many examples of using this functon directly from ioctl(). The only difference, compared to ioctl(), is that net is not completely alive at this moment. But it looks like, there is no way for parallel pernet_operations to dereference the net_device, as the most of struct net_device lists, where it's linked, are related to net, and the net is not liked. The exceptions are net_device::unreg_list, close_list, todo_list, used for unregistration, and ::link_watch_list, where net_device may be linked to global lists. Unregistration of loopback_dev obviously can't happen, when loopback_net_init() is executing, as the net as alive. It occurs in default_device_ops, which currently requires net_mutex, and it behaves as a barrier at the moment. It will be considered in next patch. Speaking about link_watch_list, it seems, there is no way for loopback_dev at time of registration to be linked in lweventlist and be available for another pernet_operations. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- drivers/net/loopback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 30612497643c..b97a907ea5aa 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -230,4 +230,5 @@ out: /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, + .async = true, }; From 2608e6b7adc8b07194b855e2102d6f1a277e3f03 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:42 +0300 Subject: [PATCH 30/32] net: Convert default_device_ops These pernet operations consist of exit() and exit_batch() methods. default_device_exit() moves not-local and virtual devices to init_net. There is nothing exciting, because this may happen in any time on a working system, and rtnl_lock() and synchronize_net() protect us from all cases of external dereference. The same for default_device_exit_batch(). Similar unregisteration may happen in any time on a system. Here several lists (like todo_list), which are accessed under rtnl_lock(). After rtnl_unlock() and netdev_run_todo() all the devices are flushed. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index dc7506f00a66..df5241c8eda1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8934,6 +8934,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, .exit_batch = default_device_exit_batch, + .async = true, }; /* From 59a513587ac09359875d6074778f21a3c01754e0 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:30:52 +0300 Subject: [PATCH 31/32] net: Convert diag_net_ops These pernet operations just create and destroy netlink socket. The socket is pernet and else operations don't touch it. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/sock_diag.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 146b50e30659..aee5642affd9 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net) static struct pernet_operations diag_net_ops = { .init = diag_net_init, .exit = diag_net_exit, + .async = true, }; static int __init sock_diag_init(void) From b86b47a39598cdf17e0e826ebe1be21c798112cf Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:31:01 +0300 Subject: [PATCH 32/32] net: Convert netlink_tap_net_ops These pernet_operations init just allocated net memory, and they obviously can be executed in parallel in any others. v3: New Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b3065908e146..63cb55d3c2fd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = { .exit = netlink_tap_exit_net, .id = &netlink_tap_net_id, .size = sizeof(struct netlink_tap_net), + .async = true, }; static bool netlink_filter_tap(const struct sk_buff *skb)