psi: Fix uaf issue when psi trigger is destroyed while being polled
With write operation on psi files replacing old trigger with a new one,
the lifetime of its waitqueue is totally arbitrary. Overwriting an
existing trigger causes its waitqueue to be freed and pending poll()
will stumble on trigger->event_wait which was destroyed.
Fix this by disallowing to redefine an existing psi trigger. If a write
operation is used on a file descriptor with an already existing psi
trigger, the operation will fail with EBUSY error.
Also bypass a check for psi_disabled in the psi_trigger_destroy as the
flag can be flipped after the trigger is created, leading to a memory
leak.
Fixes: 0e94682b73
("psi: introduce psi monitor")
Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Analyzed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com
This commit is contained in:
parent
fb3b0673b7
commit
a06247c680
@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
|
||||
for the same psi metric can be specified. However for each trigger a separate
|
||||
file descriptor is required to be able to poll it separately from others,
|
||||
therefore for each trigger a separate open() syscall should be made even
|
||||
when opening the same psi interface file.
|
||||
when opening the same psi interface file. Write operations to a file descriptor
|
||||
with an already existing psi trigger will fail with EBUSY.
|
||||
|
||||
Monitors activate only when system enters stall state for the monitored
|
||||
psi metric and deactivates upon exit from the stall state. While system is
|
||||
|
@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);
|
||||
|
||||
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
char *buf, size_t nbytes, enum psi_res res);
|
||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
|
||||
void psi_trigger_destroy(struct psi_trigger *t);
|
||||
|
||||
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
||||
poll_table *wait);
|
||||
|
@ -141,9 +141,6 @@ struct psi_trigger {
|
||||
* events to one per window
|
||||
*/
|
||||
u64 last_event_time;
|
||||
|
||||
/* Refcounting to prevent premature destruction */
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct psi_group {
|
||||
|
@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||
cgroup_get(cgrp);
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
/* Allow only one trigger per file descriptor */
|
||||
if (ctx->psi.trigger) {
|
||||
cgroup_put(cgrp);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
new = psi_trigger_create(psi, buf, nbytes, res);
|
||||
if (IS_ERR(new)) {
|
||||
@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, new);
|
||||
|
||||
smp_store_release(&ctx->psi.trigger, new);
|
||||
cgroup_put(cgrp);
|
||||
|
||||
return nbytes;
|
||||
@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, NULL);
|
||||
psi_trigger_destroy(ctx->psi.trigger);
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void)
|
||||
|
@ -1162,7 +1162,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
t->event = 0;
|
||||
t->last_event_time = 0;
|
||||
init_waitqueue_head(&t->event_wait);
|
||||
kref_init(&t->refcount);
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
|
||||
@ -1191,15 +1190,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
return t;
|
||||
}
|
||||
|
||||
static void psi_trigger_destroy(struct kref *ref)
|
||||
void psi_trigger_destroy(struct psi_trigger *t)
|
||||
{
|
||||
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
|
||||
struct psi_group *group = t->group;
|
||||
struct psi_group *group;
|
||||
struct task_struct *task_to_destroy = NULL;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
/*
|
||||
* We do not check psi_disabled since it might have been disabled after
|
||||
* the trigger got created.
|
||||
*/
|
||||
if (!t)
|
||||
return;
|
||||
|
||||
group = t->group;
|
||||
/*
|
||||
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
|
||||
* from under a polling process.
|
||||
@ -1235,9 +1238,9 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
|
||||
/*
|
||||
* Wait for both *trigger_ptr from psi_trigger_replace and
|
||||
* poll_task RCUs to complete their read-side critical sections
|
||||
* before destroying the trigger and optionally the poll_task
|
||||
* Wait for psi_schedule_poll_work RCU to complete its read-side
|
||||
* critical section before destroying the trigger and optionally the
|
||||
* poll_task.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
/*
|
||||
@ -1254,18 +1257,6 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||
kfree(t);
|
||||
}
|
||||
|
||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
|
||||
{
|
||||
struct psi_trigger *old = *trigger_ptr;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
rcu_assign_pointer(*trigger_ptr, new);
|
||||
if (old)
|
||||
kref_put(&old->refcount, psi_trigger_destroy);
|
||||
}
|
||||
|
||||
__poll_t psi_trigger_poll(void **trigger_ptr,
|
||||
struct file *file, poll_table *wait)
|
||||
{
|
||||
@ -1275,24 +1266,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
|
||||
if (!t) {
|
||||
rcu_read_unlock();
|
||||
t = smp_load_acquire(trigger_ptr);
|
||||
if (!t)
|
||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||
}
|
||||
kref_get(&t->refcount);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
poll_wait(file, &t->event_wait, wait);
|
||||
|
||||
if (cmpxchg(&t->event, 1, 0) == 1)
|
||||
ret |= EPOLLPRI;
|
||||
|
||||
kref_put(&t->refcount, psi_trigger_destroy);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
||||
|
||||
buf[buf_size - 1] = '\0';
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||
if (IS_ERR(new))
|
||||
return PTR_ERR(new);
|
||||
|
||||
seq = file->private_data;
|
||||
|
||||
/* Take seq->lock to protect seq->private from concurrent writes */
|
||||
mutex_lock(&seq->lock);
|
||||
psi_trigger_replace(&seq->private, new);
|
||||
|
||||
/* Allow only one trigger per file descriptor */
|
||||
if (seq->private) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||
if (IS_ERR(new)) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
smp_store_release(&seq->private, new);
|
||||
mutex_unlock(&seq->lock);
|
||||
|
||||
return nbytes;
|
||||
@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *seq = file->private_data;
|
||||
|
||||
psi_trigger_replace(&seq->private, NULL);
|
||||
psi_trigger_destroy(seq->private);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user