From e9a441b6e729e16092fcc18e3962b952a01d1e3c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 29 Mar 2018 17:03:25 +0300 Subject: [PATCH 1/3] xfrm: Register xfrm_dev_notifier in appropriate place Currently, driver registers it from pernet_operations::init method, and this breaks modularity, because initialization of net namespace and netdevice notifiers are orthogonal actions. We don't have per-namespace netdevice notifiers; all of them are global for all devices in all namespaces. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_device.c | 2 +- net/xfrm/xfrm_policy.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa027ba1d032..a872379b69da 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1894,7 +1894,7 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) #endif } -void __net_init xfrm_dev_init(void); +void __init xfrm_dev_init(void); #ifdef CONFIG_XFRM_OFFLOAD void xfrm_dev_resume(struct sk_buff *skb); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index e87d6c4dd5b6..175941e15a6e 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -350,7 +350,7 @@ static struct notifier_block xfrm_dev_notifier = { .notifier_call = xfrm_dev_event, }; -void __net_init xfrm_dev_init(void) +void __init xfrm_dev_init(void) { register_netdevice_notifier(&xfrm_dev_notifier); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0e065db6c7c0..40b54cc64243 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2895,8 +2895,6 @@ static int __net_init xfrm_policy_init(struct net *net) INIT_LIST_HEAD(&net->xfrm.policy_all); INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); - if (net_eq(net, &init_net)) - xfrm_dev_init(); return 0; out_bydst: @@ -2999,6 +2997,7 @@ void __init xfrm_init(void) INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); register_pernet_subsys(&xfrm_net_ops); + xfrm_dev_init(); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); } From 9e2f6c5d78db6647eb9d7bfeb20b9e0f9ff2c56c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 29 Mar 2018 17:03:35 +0300 Subject: [PATCH 2/3] netfilter: Rework xt_TEE netdevice notifier Register netdevice notifier for every iptable entry is not good, since this breaks modularity, and the hidden synchronization is based on rtnl_lock(). This patch reworks the synchronization via new lock, while the rest of logic remains as it was before. This is required for the next patch. Tested via: while :; do unshare -n iptables -t mangle -A OUTPUT -j TEE --gateway 1.1.1.2 --oif lo; done Signed-off-by: Kirill Tkhai Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/xt_TEE.c | 73 ++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 86b0580b2216..475957cfcf50 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -20,7 +20,7 @@ #include struct xt_tee_priv { - struct notifier_block notifier; + struct list_head list; struct xt_tee_tginfo *tginfo; int oif; }; @@ -51,29 +51,35 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif +static DEFINE_MUTEX(priv_list_mutex); +static LIST_HEAD(priv_list); + static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; - priv = container_of(this, struct xt_tee_priv, notifier); - switch (event) { - case NETDEV_REGISTER: - if (!strcmp(dev->name, priv->tginfo->oif)) - priv->oif = dev->ifindex; - break; - case NETDEV_UNREGISTER: - if (dev->ifindex == priv->oif) - priv->oif = -1; - break; - case NETDEV_CHANGENAME: - if (!strcmp(dev->name, priv->tginfo->oif)) - priv->oif = dev->ifindex; - else if (dev->ifindex == priv->oif) - priv->oif = -1; - break; + mutex_lock(&priv_list_mutex); + list_for_each_entry(priv, &priv_list, list) { + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, priv->tginfo->oif)) + priv->oif = dev->ifindex; + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == priv->oif) + priv->oif = -1; + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, priv->tginfo->oif)) + priv->oif = dev->ifindex; + else if (dev->ifindex == priv->oif) + priv->oif = -1; + break; + } } + mutex_unlock(&priv_list_mutex); return NOTIFY_DONE; } @@ -89,8 +95,6 @@ static int tee_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->oif[0]) { - int ret; - if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; @@ -100,14 +104,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->tginfo = info; priv->oif = -1; - priv->notifier.notifier_call = tee_netdev_event; info->priv = priv; - ret = register_netdevice_notifier(&priv->notifier); - if (ret) { - kfree(priv); - return ret; - } + mutex_lock(&priv_list_mutex); + list_add(&priv->list, &priv_list); + mutex_unlock(&priv_list_mutex); } else info->priv = NULL; @@ -120,7 +121,9 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par) struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { - unregister_netdevice_notifier(&info->priv->notifier); + mutex_lock(&priv_list_mutex); + list_del(&info->priv->list); + mutex_unlock(&priv_list_mutex); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); @@ -153,13 +156,29 @@ static struct xt_target tee_tg_reg[] __read_mostly = { #endif }; +static struct notifier_block tee_netdev_notifier = { + .notifier_call = tee_netdev_event, +}; + static int __init tee_tg_init(void) { - return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + int ret; + + ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + if (ret) + return ret; + ret = register_netdevice_notifier(&tee_netdev_notifier); + if (ret) { + xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + return ret; + } + + return 0; } static void __exit tee_tg_exit(void) { + unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); } From 328fbe747ad4622f0dfa98d2e19e836f03f80c04 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 29 Mar 2018 17:03:45 +0300 Subject: [PATCH 3/3] net: Close race between {un, }register_netdevice_notifier() and setup_net()/cleanup_net() {un,}register_netdevice_notifier() iterate over all net namespaces hashed to net_namespace_list. But pernet_operations register and unregister netdevices in unhashed net namespace, and they are not seen for netdevice notifiers. This results in asymmetry: 1)Race with register_netdevice_notifier() pernet_operations::init(net) ... register_netdevice() ... call_netdevice_notifiers() ... ... nb is not called ... ... register_netdevice_notifier(nb) -> net skipped ... ... list_add_tail(&net->list, ..) ... Then, userspace stops using net, and it's destructed: pernet_operations::exit(net) unregister_netdevice() call_netdevice_notifiers() ... nb is called ... This always happens with net::loopback_dev, but it may be not the only device. 2)Race with unregister_netdevice_notifier() pernet_operations::init(net) register_netdevice() call_netdevice_notifiers() ... nb is called ... Then, userspace stops using net, and it's destructed: list_del_rcu(&net->list) ... pernet_operations::exit(net) unregister_netdevice_notifier(nb) -> net skipped dev_change_net_namespace() ... call_netdevice_notifiers() ... nb is not called ... unregister_netdevice() call_netdevice_notifiers() ... nb is not called ... This race is more danger, since dev_change_net_namespace() moves real network devices, which use not trivial netdevice notifiers, and if this will happen, the system will be left in unpredictable state. The patch closes the race. During the testing I found two places, where register_netdevice_notifier() is called from pernet init/exit methods (which led to deadlock) and fixed them (see previous patches). The review moved me to one more unusual registration place: raw_init() (can driver). It may be a reason of problems, if someone creates in-kernel CAN_RAW sockets, since they will be destroyed in exit method and raw_release() will call unregister_netdevice_notifier(). But grep over kernel tree does not show, someone creates such sockets from kernel space. Theoretically, there can be more places like this, and which are hidden from review, but we found them on the first bumping there (since there is no a race, it will be 100% reproducible). Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 1ddb6b9c58a8..07da7add4845 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1625,6 +1625,8 @@ int register_netdevice_notifier(struct notifier_block *nb) struct net *net; int err; + /* Close race with setup_net() and cleanup_net() */ + down_write(&pernet_ops_rwsem); rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); if (err) @@ -1649,6 +1651,7 @@ int register_netdevice_notifier(struct notifier_block *nb) unlock: rtnl_unlock(); + up_write(&pernet_ops_rwsem); return err; rollback: @@ -1694,6 +1697,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb) struct net *net; int err; + /* Close race with setup_net() and cleanup_net() */ + down_write(&pernet_ops_rwsem); rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_chain, nb); if (err) @@ -1713,6 +1718,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) up_read(&net_rwsem); unlock: rtnl_unlock(); + up_write(&pernet_ops_rwsem); return err; } EXPORT_SYMBOL(unregister_netdevice_notifier);