blkcg: always create the blkcg_gq for the root blkcg

Currently, blkcg does a minor optimization where the root blkcg is
created when the first blkcg policy is activated on a queue and
destroyed on the deactivation of the last.  On systems where blkcg is
configured but not used, this saves one blkcg_gq struct per queue.  On
systems where blkcg is actually used, there's no difference.  The only
case where this can lead to any meaninful, albeit still minute, save
in memory consumption is when all blkcg policies are deactivated after
being widely used in the system, which is a hihgly unlikely scenario.

The conditional existence of root blkcg_gq has already created several
bugs in blkcg and became an issue once again for the new per-cgroup
wb_congested mechanism for cgroup writeback support leading to a NULL
dereference when no blkcg policy is active.  This is really not worth
bothering with.  This patch makes blkcg always allocate and link the
root blkcg_gq and release it only on queue destruction.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Tejun Heo 2015-05-22 17:13:19 -04:00 committed by Jens Axboe
parent efa7d1c733
commit ec13b1d6f0

View File

@ -235,13 +235,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg->online = true; blkg->online = true;
spin_unlock(&blkcg->lock); spin_unlock(&blkcg->lock);
if (!ret) { if (!ret)
if (blkcg == &blkcg_root) {
q->root_blkg = blkg;
q->root_rl.blkg = blkg;
}
return blkg; return blkg;
}
/* @blkg failed fully initialized, use the usual release path */ /* @blkg failed fully initialized, use the usual release path */
blkg_put(blkg); blkg_put(blkg);
@ -339,15 +334,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
if (rcu_access_pointer(blkcg->blkg_hint) == blkg) if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
rcu_assign_pointer(blkcg->blkg_hint, NULL); rcu_assign_pointer(blkcg->blkg_hint, NULL);
/*
* If root blkg is destroyed. Just clear the pointer since root_rl
* does not take reference on root blkg.
*/
if (blkcg == &blkcg_root) {
blkg->q->root_blkg = NULL;
blkg->q->root_rl.blkg = NULL;
}
/* /*
* Put the reference taken at the time of creation so that when all * Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed. * queues are gone, group can be destroyed.
@ -855,9 +841,45 @@ done:
*/ */
int blkcg_init_queue(struct request_queue *q) int blkcg_init_queue(struct request_queue *q)
{ {
might_sleep(); struct blkcg_gq *new_blkg, *blkg;
bool preloaded;
int ret;
return blk_throtl_init(q); new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
preloaded = !radix_tree_preload(GFP_KERNEL);
/*
* Make sure the root blkg exists and count the existing blkgs. As
* @q is bypassing at this point, blkg_lookup_create() can't be
* used. Open code insertion.
*/
rcu_read_lock();
spin_lock_irq(q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
spin_unlock_irq(q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
if (IS_ERR(blkg)) {
kfree(new_blkg);
return PTR_ERR(blkg);
}
q->root_blkg = blkg;
q->root_rl.blkg = blkg;
ret = blk_throtl_init(q);
if (ret) {
spin_lock_irq(q->queue_lock);
blkg_destroy_all(q);
spin_unlock_irq(q->queue_lock);
}
return ret;
} }
/** /**
@ -958,52 +980,20 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol) const struct blkcg_policy *pol)
{ {
LIST_HEAD(pds); LIST_HEAD(pds);
struct blkcg_gq *blkg, *new_blkg; struct blkcg_gq *blkg;
struct blkg_policy_data *pd, *n; struct blkg_policy_data *pd, *n;
int cnt = 0, ret; int cnt = 0, ret;
bool preloaded;
if (blkcg_policy_enabled(q, pol)) if (blkcg_policy_enabled(q, pol))
return 0; return 0;
/* preallocations for root blkg */ /* count and allocate policy_data for all existing blkgs */
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
blk_queue_bypass_start(q); blk_queue_bypass_start(q);
preloaded = !radix_tree_preload(GFP_KERNEL);
/*
* Make sure the root blkg exists and count the existing blkgs. As
* @q is bypassing at this point, blkg_lookup_create() can't be
* used. Open code it.
*/
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
rcu_read_lock();
blkg = __blkg_lookup(&blkcg_root, q, false);
if (blkg)
blkg_free(new_blkg);
else
blkg = blkg_create(&blkcg_root, q, new_blkg);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto out_unlock;
}
list_for_each_entry(blkg, &q->blkg_list, q_node) list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++; cnt++;
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
/* allocate policy_data for all existing blkgs */
while (cnt--) { while (cnt--) {
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
if (!pd) { if (!pd) {
@ -1072,10 +1062,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
__clear_bit(pol->plid, q->blkcg_pols); __clear_bit(pol->plid, q->blkcg_pols);
/* if no policy is left, no need for blkgs - shoot them down */
if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
blkg_destroy_all(q);
list_for_each_entry(blkg, &q->blkg_list, q_node) { list_for_each_entry(blkg, &q->blkg_list, q_node) {
/* grab blkcg lock too while removing @pd from @blkg */ /* grab blkcg lock too while removing @pd from @blkg */
spin_lock(&blkg->blkcg->lock); spin_lock(&blkg->blkcg->lock);