diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5225832bd6ff..bb9cb84114c1 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -34,6 +35,7 @@ extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); +extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; @@ -83,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } +static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) +{ + return rcu_dereference(dev->ingress_queue); +} + struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7dc769e5452b..a16fbe9a2a67 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -102,6 +102,7 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d326fd553b58..7a6b71ee5433 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -105,6 +105,7 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + struct rcu_head rcu; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -114,6 +115,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } +/* Intended to be used by unlocked users, when concurrent qdisc release is + * possible. + */ + +static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return qdisc; + if (refcount_inc_not_zero(&qdisc->refcnt)) + return qdisc; + return NULL; +} + static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) @@ -331,7 +345,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; u32 index; /* block index for shared blocks */ - unsigned int refcnt; + refcount_t refcnt; struct net *net; struct Qdisc *q; struct list_head cb_list; @@ -343,6 +357,7 @@ struct tcf_block { struct tcf_chain *chain; struct list_head filter_chain_list; } chain0; + struct rcu_head rcu; }; static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) @@ -554,7 +569,8 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); -void qdisc_destroy(struct Qdisc *qdisc); +void qdisc_put(struct Qdisc *qdisc); +void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 80a7e18c65fb..35162e1b06ad 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -130,6 +130,12 @@ int rtnl_is_locked(void) } EXPORT_SYMBOL(rtnl_is_locked); +bool refcount_dec_and_rtnl_lock(refcount_t *r) +{ + return refcount_dec_and_mutex_lock(r, &rtnl_mutex); +} +EXPORT_SYMBOL(refcount_dec_and_rtnl_lock); + #ifdef CONFIG_PROVE_LOCKING bool lockdep_rtnl_is_held(void) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0a75cb2e5e7b..3de47e99b788 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -240,8 +240,8 @@ static void tcf_chain_destroy(struct tcf_chain *chain) if (!chain->index) block->chain0.chain = NULL; kfree(chain); - if (list_empty(&block->chain_list) && block->refcnt == 0) - kfree(block); + if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt)) + kfree_rcu(block, rcu); } static void tcf_chain_hold(struct tcf_chain *chain) @@ -473,6 +473,7 @@ tcf_chain0_head_change_cb_del(struct tcf_block *block, } struct tcf_net { + spinlock_t idr_lock; /* Protects idr */ struct idr idr; }; @@ -482,16 +483,25 @@ static int tcf_block_insert(struct tcf_block *block, struct net *net, struct netlink_ext_ack *extack) { struct tcf_net *tn = net_generic(net, tcf_net_id); + int err; - return idr_alloc_u32(&tn->idr, block, &block->index, block->index, - GFP_KERNEL); + idr_preload(GFP_KERNEL); + spin_lock(&tn->idr_lock); + err = idr_alloc_u32(&tn->idr, block, &block->index, block->index, + GFP_NOWAIT); + spin_unlock(&tn->idr_lock); + idr_preload_end(); + + return err; } static void tcf_block_remove(struct tcf_block *block, struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); + spin_lock(&tn->idr_lock); idr_remove(&tn->idr, block->index); + spin_unlock(&tn->idr_lock); } static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, @@ -510,7 +520,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, INIT_LIST_HEAD(&block->owner_list); INIT_LIST_HEAD(&block->chain0.filter_chain_list); - block->refcnt = 1; + refcount_set(&block->refcnt, 1); block->net = net; block->index = block_index; @@ -527,6 +537,78 @@ static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) return idr_find(&tn->idr, block_index); } +static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) +{ + struct tcf_block *block; + + rcu_read_lock(); + block = tcf_block_lookup(net, block_index); + if (block && !refcount_inc_not_zero(&block->refcnt)) + block = NULL; + rcu_read_unlock(); + + return block; +} + +static void tcf_block_flush_all_chains(struct tcf_block *block) +{ + struct tcf_chain *chain; + + /* Hold a refcnt for all chains, so that they don't disappear + * while we are iterating. + */ + list_for_each_entry(chain, &block->chain_list, list) + tcf_chain_hold(chain); + + list_for_each_entry(chain, &block->chain_list, list) + tcf_chain_flush(chain); +} + +static void tcf_block_put_all_chains(struct tcf_block *block) +{ + struct tcf_chain *chain, *tmp; + + /* At this point, all the chains should have refcnt >= 1. */ + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { + tcf_chain_put_explicitly_created(chain); + tcf_chain_put(chain); + } +} + +static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + if (refcount_dec_and_test(&block->refcnt)) { + /* Flushing/putting all chains will cause the block to be + * deallocated when last chain is freed. However, if chain_list + * is empty, block has to be manually deallocated. After block + * reference counter reached 0, it is no longer possible to + * increment it or add new chains to block. + */ + bool free_block = list_empty(&block->chain_list); + + if (tcf_block_shared(block)) + tcf_block_remove(block, block->net); + if (!free_block) + tcf_block_flush_all_chains(block); + + if (q) + tcf_block_offload_unbind(block, q, ei); + + if (free_block) + kfree_rcu(block, rcu); + else + tcf_block_put_all_chains(block); + } else if (q) { + tcf_block_offload_unbind(block, q, ei); + } +} + +static void tcf_block_refcnt_put(struct tcf_block *block) +{ + __tcf_block_put(block, NULL, NULL); +} + /* Find tcf block. * Set q, parent, cl when appropriate. */ @@ -537,9 +619,10 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, struct netlink_ext_ack *extack) { struct tcf_block *block; + int err = 0; if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_lookup(net, block_index); + block = tcf_block_refcnt_get(net, block_index); if (!block) { NL_SET_ERR_MSG(extack, "Block of given index was not found"); return ERR_PTR(-EINVAL); @@ -548,55 +631,104 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, const struct Qdisc_class_ops *cops; struct net_device *dev; + rcu_read_lock(); + /* Find link */ - dev = __dev_get_by_index(net, ifindex); - if (!dev) + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); return ERR_PTR(-ENODEV); + } /* Find qdisc */ if (!*parent) { *q = dev->qdisc; *parent = (*q)->handle; } else { - *q = qdisc_lookup(dev, TC_H_MAJ(*parent)); + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto errout_rcu; } } + *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } + /* Is it classful? */ cops = (*q)->ops->cl_ops; if (!cops) { NL_SET_ERR_MSG(extack, "Qdisc not classful"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto errout_rcu; } if (!cops->tcf_block) { NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - return ERR_PTR(-EOPNOTSUPP); + err = -EOPNOTSUPP; + goto errout_rcu; } + /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + /* Do we search for filter, attached to class? */ if (TC_H_MIN(*parent)) { *cl = cops->find(*q, *parent); if (*cl == 0) { NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - return ERR_PTR(-ENOENT); + err = -ENOENT; + goto errout_qdisc; } } /* And the last stroke */ block = cops->tcf_block(*q, *cl, extack); - if (!block) - return ERR_PTR(-EINVAL); + if (!block) { + err = -EINVAL; + goto errout_qdisc; + } if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - return ERR_PTR(-EOPNOTSUPP); + err = -EOPNOTSUPP; + goto errout_qdisc; } + + /* Always take reference to block in order to support execution + * of rules update path of cls API without rtnl lock. Caller + * must release block when it is finished using it. 'if' block + * of this conditional obtain reference to block by calling + * tcf_block_refcnt_get(). + */ + refcount_inc(&block->refcnt); } return block; + +errout_rcu: + rcu_read_unlock(); +errout_qdisc: + if (*q) + qdisc_put(*q); + return ERR_PTR(err); +} + +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +{ + if (!IS_ERR_OR_NULL(block)) + tcf_block_refcnt_put(block); + + if (q) + qdisc_put(q); } struct tcf_block_owner_item { @@ -664,21 +796,16 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, { struct net *net = qdisc_net(q); struct tcf_block *block = NULL; - bool created = false; int err; - if (ei->block_index) { + if (ei->block_index) /* block_index not 0 means the shared block is requested */ - block = tcf_block_lookup(net, ei->block_index); - if (block) - block->refcnt++; - } + block = tcf_block_refcnt_get(net, ei->block_index); if (!block) { block = tcf_block_create(net, q, ei->block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); - created = true; if (tcf_block_shared(block)) { err = tcf_block_insert(block, net, extack); if (err) @@ -708,14 +835,8 @@ err_block_offload_bind: err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: - if (created) { - if (tcf_block_shared(block)) - tcf_block_remove(block, net); err_block_insert: - kfree(block); - } else { - block->refcnt--; - } + tcf_block_refcnt_put(block); return err; } EXPORT_SYMBOL(tcf_block_get_ext); @@ -747,42 +868,12 @@ EXPORT_SYMBOL(tcf_block_get); void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain, *tmp; - if (!block) return; tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); - if (block->refcnt == 1) { - if (tcf_block_shared(block)) - tcf_block_remove(block, block->net); - - /* Hold a refcnt for all chains, so that they don't disappear - * while we are iterating. - */ - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_hold(chain); - - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_flush(chain); - } - - tcf_block_offload_unbind(block, q, ei); - - if (block->refcnt == 1) { - /* At this point, all the chains should have refcnt >= 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { - tcf_chain_put_explicitly_created(chain); - tcf_chain_put(chain); - } - - block->refcnt--; - if (list_empty(&block->chain_list)) - kfree(block); - } else { - block->refcnt--; - } + __tcf_block_put(block, q, ei); } EXPORT_SYMBOL(tcf_block_put_ext); @@ -1332,6 +1423,7 @@ replay: errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ goto replay; @@ -1453,6 +1545,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); return err; } @@ -1538,6 +1631,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); return err; } @@ -1636,7 +1730,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) return err; if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_lookup(net, tcm->tcm_block_index); + block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; /* If we work with block index, q is NULL and parent value @@ -1695,6 +1789,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) } } + if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) + tcf_block_refcnt_put(block); cb->args[0] = index; out: @@ -1854,7 +1950,8 @@ replay: chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); - return -EINVAL; + err = -EINVAL; + goto errout_block; } chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { @@ -1866,23 +1963,27 @@ replay: tcf_chain_hold(chain); } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); - return -EEXIST; + err = -EEXIST; + goto errout_block; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); - return -ENOENT; + err = -ENOENT; + goto errout_block; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); - return -ENOMEM; + err = -ENOMEM; + goto errout_block; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); - return -EINVAL; + err = -EINVAL; + goto errout_block; } tcf_chain_hold(chain); } @@ -1926,6 +2027,8 @@ replay: errout: tcf_chain_put(chain); +errout_block: + tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ goto replay; @@ -1954,7 +2057,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) return err; if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_lookup(net, tcm->tcm_block_index); + block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; /* If we work with block index, q is NULL and parent value @@ -2021,6 +2124,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; } + if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) + tcf_block_refcnt_put(block); cb->args[0] = index; out: @@ -2213,6 +2318,7 @@ static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); + spin_lock_init(&tn->idr_lock); idr_init(&tn->idr); return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 411c40344b77..22e9799e5b69 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -314,6 +314,24 @@ out: return q; } +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) +{ + struct netdev_queue *nq; + struct Qdisc *q; + + if (!handle) + return NULL; + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; + + nq = dev_ingress_queue_rcu(dev); + if (nq) + q = qdisc_match_from_root(nq->qdisc_sleeping, handle); +out: + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -920,7 +938,7 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, qdisc_notify(net, skb, n, clid, old, new); if (old) - qdisc_destroy(old); + qdisc_put(old); } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -973,7 +991,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_refcount_inc(new); if (!ingress) - qdisc_destroy(old); + qdisc_put(old); } skip: @@ -1561,7 +1579,7 @@ graft: err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); if (err) { if (q) - qdisc_destroy(q); + qdisc_put(q); return err; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index cd49afca9617..d714d3747bcb 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -150,7 +150,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) pr_debug("atm_tc_put: destroying\n"); list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_destroy(flow->q); + qdisc_put(flow->q); tcf_block_put(flow->block); if (flow->sock) { pr_debug("atm_tc_put: f_count %ld\n", diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index f42025d53cfe..4dc05409e3fb 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1418,7 +1418,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) WARN_ON(cl->filters); tcf_block_put(cl->block); - qdisc_destroy(cl->q); + qdisc_put(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->rate_est); if (cl != &q->link) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index e26a24017faa..e689e11b6d0f 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -379,7 +379,7 @@ static void cbs_destroy(struct Qdisc *sch) cbs_disable_offload(dev, q); if (q->qdisc) - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e0b0cf8a9939..cdebaed0f8cf 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -134,7 +134,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } @@ -153,7 +153,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) { gen_kill_estimator(&cl->rate_est); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 049714c57075..f6f480784bc6 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -412,7 +412,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); tcf_block_put(p->block); - qdisc_destroy(p->q); + qdisc_put(p->q); if (p->mv != p->embedded) kfree(p->mv); } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 24893d3b5d22..3809c9bf8896 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -177,7 +177,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, if (q) { err = fifo_set_limit(q, limit); if (err < 0) { - qdisc_destroy(q); + qdisc_put(q); q = NULL; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a64132a5db36..531fac1d2875 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,7 +901,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, if (!ops->init || ops->init(sch, NULL, extack) == 0) return sch; - qdisc_destroy(sch); + qdisc_put(sch); return NULL; } EXPORT_SYMBOL(qdisc_create_dflt); @@ -941,15 +941,18 @@ void qdisc_free(struct Qdisc *qdisc) kfree((char *) qdisc - qdisc->padded); } -void qdisc_destroy(struct Qdisc *qdisc) +void qdisc_free_cb(struct rcu_head *head) +{ + struct Qdisc *q = container_of(head, struct Qdisc, rcu); + + qdisc_free(q); +} + +static void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; struct sk_buff *skb, *tmp; - if (qdisc->flags & TCQ_F_BUILTIN || - !refcount_dec_and_test(&qdisc->refcnt)) - return; - #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc); @@ -974,9 +977,34 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(skb); } - qdisc_free(qdisc); + call_rcu(&qdisc->rcu, qdisc_free_cb); } -EXPORT_SYMBOL(qdisc_destroy); + +void qdisc_put(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_test(&qdisc->refcnt)) + return; + + qdisc_destroy(qdisc); +} +EXPORT_SYMBOL(qdisc_put); + +/* Version of qdisc_put() that is called with rtnl mutex unlocked. + * Intended to be used as optimization, this function only takes rtnl lock if + * qdisc reference counter reached zero. + */ + +void qdisc_put_unlocked(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) + return; + + qdisc_destroy(qdisc); + rtnl_unlock(); +} +EXPORT_SYMBOL(qdisc_put_unlocked); /* Attach toplevel qdisc to device queue. */ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, @@ -1270,7 +1298,7 @@ static void shutdown_scheduler_queue(struct net_device *dev, rcu_assign_pointer(dev_queue->qdisc, qdisc_default); dev_queue->qdisc_sleeping = qdisc_default; - qdisc_destroy(qdisc); + qdisc_put(qdisc); } } @@ -1279,7 +1307,7 @@ void dev_shutdown(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - qdisc_destroy(dev->qdisc); + qdisc_put(dev->qdisc); dev->qdisc = &noop_qdisc; WARN_ON(timer_pending(&dev->watchdog_timer)); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3278a76f6861..b18ec1f6de60 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1092,7 +1092,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) struct hfsc_sched *q = qdisc_priv(sch); tcf_block_put(cl->block); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); gen_kill_estimator(&cl->rate_est); if (cl != &q->root) kfree(cl); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 18ac2d6ca294..58b449490757 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1208,7 +1208,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { WARN_ON(!cl->leaf.q); - qdisc_destroy(cl->leaf.q); + qdisc_put(cl->leaf.q); } gen_kill_estimator(&cl->rate_est); tcf_block_put(cl->block); @@ -1409,7 +1409,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_reset(parent->leaf.q); qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog); - qdisc_destroy(parent->leaf.q); + qdisc_put(parent->leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index d6b8ae4ed7a3..f20f3a0f8424 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -65,7 +65,7 @@ static void mq_destroy(struct Qdisc *sch) if (!priv->qdiscs) return; for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } @@ -119,7 +119,7 @@ static void mq_attach(struct Qdisc *sch) qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) - qdisc_destroy(old); + qdisc_put(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 0e9d761cdd80..d364e63c396d 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -40,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch) for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } @@ -300,7 +300,7 @@ static void mqprio_attach(struct Qdisc *sch) qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) - qdisc_destroy(old); + qdisc_put(old); if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 1da7ea8de0ad..7410ce4d0321 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -175,7 +175,7 @@ multiq_destroy(struct Qdisc *sch) tcf_block_put(q->block); for (band = 0; band < q->bands; band++) - qdisc_destroy(q->queues[band]); + qdisc_put(q->queues[band]); kfree(q->queues); } @@ -204,7 +204,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, q->queues[i] = &noop_qdisc; qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); + qdisc_put(child); } } @@ -228,7 +228,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); - qdisc_destroy(old); + qdisc_put(old); } sch_tree_unlock(sch); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 506e1960ed7f..57b3ad9394ad 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1022,7 +1022,7 @@ static void netem_destroy(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); if (q->qdisc) - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); dist_free(q->delay_dist); dist_free(q->slot_dist); } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 222e53d3d27a..f8af98621179 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -175,7 +175,7 @@ prio_destroy(struct Qdisc *sch) tcf_block_put(q->block); prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) - qdisc_destroy(q->queues[prio]); + qdisc_put(q->queues[prio]); } static int prio_tune(struct Qdisc *sch, struct nlattr *opt, @@ -205,7 +205,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, extack); if (!queues[i]) { while (i > oldbands) - qdisc_destroy(queues[--i]); + qdisc_put(queues[--i]); return -ENOMEM; } } @@ -220,7 +220,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); + qdisc_put(child); } for (i = oldbands; i < q->bands; i++) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bb1a9c11fc54..dc37c4ead439 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -526,7 +526,7 @@ set_change_agg: return 0; destroy_class: - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } @@ -537,7 +537,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) qfq_rm_from_agg(q, cl); gen_kill_estimator(&cl->rate_est); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 56c181c3feeb..3ce6c0a2c493 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -181,7 +181,7 @@ static void red_destroy(struct Qdisc *sch) del_timer_sync(&q->adapt_timer); red_offload(sch, false); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { @@ -233,7 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 7cbdad8419b7..bab506b01a32 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -469,7 +469,7 @@ static void sfb_destroy(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { @@ -523,7 +523,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a4530e85bd02..942dcca09cf2 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -392,7 +392,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; } q->limit = qopt->limit; @@ -438,7 +438,7 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)