cgroup: Do not corrupt task iteration when rebinding subsystem
We found a refcount UAF bug as follows:
refcount_t: addition on 0; use-after-free.
WARNING: CPU: 1 PID: 342 at lib/refcount.c:25 refcount_warn_saturate+0xa0/0x148
Workqueue: events cpuset_hotplug_workfn
Call trace:
refcount_warn_saturate+0xa0/0x148
__refcount_add.constprop.0+0x5c/0x80
css_task_iter_advance_css_set+0xd8/0x210
css_task_iter_advance+0xa8/0x120
css_task_iter_next+0x94/0x158
update_tasks_root_domain+0x58/0x98
rebuild_root_domains+0xa0/0x1b0
rebuild_sched_domains_locked+0x144/0x188
cpuset_hotplug_workfn+0x138/0x5a0
process_one_work+0x1e8/0x448
worker_thread+0x228/0x3e0
kthread+0xe0/0xf0
ret_from_fork+0x10/0x20
then a kernel panic will be triggered as below:
Unable to handle kernel paging request at virtual address 00000000c0000010
Call trace:
cgroup_apply_control_disable+0xa4/0x16c
rebind_subsystems+0x224/0x590
cgroup_destroy_root+0x64/0x2e0
css_free_rwork_fn+0x198/0x2a0
process_one_work+0x1d4/0x4bc
worker_thread+0x158/0x410
kthread+0x108/0x13c
ret_from_fork+0x10/0x18
The race that cause this bug can be shown as below:
(hotplug cpu) | (umount cpuset)
mutex_lock(&cpuset_mutex) | mutex_lock(&cgroup_mutex)
cpuset_hotplug_workfn |
rebuild_root_domains | rebind_subsystems
update_tasks_root_domain | spin_lock_irq(&css_set_lock)
css_task_iter_start | list_move_tail(&cset->e_cset_node[ss->id]
while(css_task_iter_next) | &dcgrp->e_csets[ss->id]);
css_task_iter_end | spin_unlock_irq(&css_set_lock)
mutex_unlock(&cpuset_mutex) | mutex_unlock(&cgroup_mutex)
Inside css_task_iter_start/next/end, css_set_lock is hold and then
released, so when iterating task(left side), the css_set may be moved to
another list(right side), then it->cset_head points to the old list head
and it->cset_pos->next points to the head node of new list, which can't
be used as struct css_set.
To fix this issue, switch from all css_sets to only scgrp's css_sets to
patch in-flight iterators to preserve correct iteration, and then
update it->cset_head as well.
Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://www.spinics.net/lists/cgroups/msg37935.html
Suggested-by: Michal Koutný <mkoutny@suse.com>
Link: https://lore.kernel.org/all/20230526114139.70274-1-xiujianfeng@huaweicloud.com/
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Fixes: 2d8f243a5e
("cgroup: implement cgroup->e_csets[]")
Cc: stable@vger.kernel.org # v3.16+
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
f0cc749254
commit
6f363f5aa8
@ -1798,7 +1798,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
|
|||||||
{
|
{
|
||||||
struct cgroup *dcgrp = &dst_root->cgrp;
|
struct cgroup *dcgrp = &dst_root->cgrp;
|
||||||
struct cgroup_subsys *ss;
|
struct cgroup_subsys *ss;
|
||||||
int ssid, i, ret;
|
int ssid, ret;
|
||||||
u16 dfl_disable_ss_mask = 0;
|
u16 dfl_disable_ss_mask = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&cgroup_mutex);
|
lockdep_assert_held(&cgroup_mutex);
|
||||||
@ -1842,7 +1842,8 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
|
|||||||
struct cgroup_root *src_root = ss->root;
|
struct cgroup_root *src_root = ss->root;
|
||||||
struct cgroup *scgrp = &src_root->cgrp;
|
struct cgroup *scgrp = &src_root->cgrp;
|
||||||
struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
|
struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
|
||||||
struct css_set *cset;
|
struct css_set *cset, *cset_pos;
|
||||||
|
struct css_task_iter *it;
|
||||||
|
|
||||||
WARN_ON(!css || cgroup_css(dcgrp, ss));
|
WARN_ON(!css || cgroup_css(dcgrp, ss));
|
||||||
|
|
||||||
@ -1860,9 +1861,22 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
|
|||||||
css->cgroup = dcgrp;
|
css->cgroup = dcgrp;
|
||||||
|
|
||||||
spin_lock_irq(&css_set_lock);
|
spin_lock_irq(&css_set_lock);
|
||||||
hash_for_each(css_set_table, i, cset, hlist)
|
WARN_ON(!list_empty(&dcgrp->e_csets[ss->id]));
|
||||||
|
list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id],
|
||||||
|
e_cset_node[ss->id]) {
|
||||||
list_move_tail(&cset->e_cset_node[ss->id],
|
list_move_tail(&cset->e_cset_node[ss->id],
|
||||||
&dcgrp->e_csets[ss->id]);
|
&dcgrp->e_csets[ss->id]);
|
||||||
|
/*
|
||||||
|
* all css_sets of scgrp together in same order to dcgrp,
|
||||||
|
* patch in-flight iterators to preserve correct iteration.
|
||||||
|
* since the iterator is always advanced right away and
|
||||||
|
* finished when it->cset_pos meets it->cset_head, so only
|
||||||
|
* update it->cset_head is enough here.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(it, &cset->task_iters, iters_node)
|
||||||
|
if (it->cset_head == &scgrp->e_csets[ss->id])
|
||||||
|
it->cset_head = &dcgrp->e_csets[ss->id];
|
||||||
|
}
|
||||||
spin_unlock_irq(&css_set_lock);
|
spin_unlock_irq(&css_set_lock);
|
||||||
|
|
||||||
if (ss->css_rstat_flush) {
|
if (ss->css_rstat_flush) {
|
||||||
|
Loading…
Reference in New Issue
Block a user