35a2897c2a
Steven Rostedt reported a potential race in RCU core because of swake_up(): CPU0 CPU1 ---- ---- __call_rcu_core() { spin_lock(rnp_root) need_wake = __rcu_start_gp() { rcu_start_gp_advanced() { gp_flags = FLAG_INIT } } rcu_gp_kthread() { swait_event_interruptible(wq, gp_flags & FLAG_INIT) { spin_lock(q->lock) *fetch wq->task_list here! * list_add(wq->task_list, q->task_list) spin_unlock(q->lock); *fetch old value of gp_flags here * spin_unlock(rnp_root) rcu_gp_kthread_wake() { swake_up(wq) { swait_active(wq) { list_empty(wq->task_list) } * return false * if (condition) * false * schedule(); In this case, a wakeup is missed, which could cause the rcu_gp_kthread waits for a long time. The reason of this is that we do a lockless swait_active() check in swake_up(). To fix this, we can either 1) add a smp_mb() in swake_up() before swait_active() to provide the proper order or 2) simply remove the swait_active() in swake_up(). The solution 2 not only fixes this problem but also keeps the swait and wait API as close as possible, as wake_up() doesn't provide a full barrier and doesn't do a lockless check of the wait queue either. Moreover, there are users already using swait_active() to do their quick checks for the wait queues, so it make less sense that swake_up() and swake_up_all() do this on their own. This patch then removes the lockless swait_active() check in swake_up() and swake_up_all(). Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Boqun Feng <boqun.feng@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Krister Johansen <kjlx@templeofstupid.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20170615041828.zk3a3sfyudm5p6nl@tardis Signed-off-by: Ingo Molnar <mingo@kernel.org>
118 lines
2.9 KiB
C
118 lines
2.9 KiB
C
#include <linux/sched/signal.h>
|
|
#include <linux/swait.h>
|
|
|
|
void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
|
|
struct lock_class_key *key)
|
|
{
|
|
raw_spin_lock_init(&q->lock);
|
|
lockdep_set_class_and_name(&q->lock, key, name);
|
|
INIT_LIST_HEAD(&q->task_list);
|
|
}
|
|
EXPORT_SYMBOL(__init_swait_queue_head);
|
|
|
|
/*
|
|
* The thing about the wake_up_state() return value; I think we can ignore it.
|
|
*
|
|
* If for some reason it would return 0, that means the previously waiting
|
|
* task is already running, so it will observe condition true (or has already).
|
|
*/
|
|
void swake_up_locked(struct swait_queue_head *q)
|
|
{
|
|
struct swait_queue *curr;
|
|
|
|
if (list_empty(&q->task_list))
|
|
return;
|
|
|
|
curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
|
|
wake_up_process(curr->task);
|
|
list_del_init(&curr->task_list);
|
|
}
|
|
EXPORT_SYMBOL(swake_up_locked);
|
|
|
|
void swake_up(struct swait_queue_head *q)
|
|
{
|
|
unsigned long flags;
|
|
|
|
raw_spin_lock_irqsave(&q->lock, flags);
|
|
swake_up_locked(q);
|
|
raw_spin_unlock_irqrestore(&q->lock, flags);
|
|
}
|
|
EXPORT_SYMBOL(swake_up);
|
|
|
|
/*
|
|
* Does not allow usage from IRQ disabled, since we must be able to
|
|
* release IRQs to guarantee bounded hold time.
|
|
*/
|
|
void swake_up_all(struct swait_queue_head *q)
|
|
{
|
|
struct swait_queue *curr;
|
|
LIST_HEAD(tmp);
|
|
|
|
raw_spin_lock_irq(&q->lock);
|
|
list_splice_init(&q->task_list, &tmp);
|
|
while (!list_empty(&tmp)) {
|
|
curr = list_first_entry(&tmp, typeof(*curr), task_list);
|
|
|
|
wake_up_state(curr->task, TASK_NORMAL);
|
|
list_del_init(&curr->task_list);
|
|
|
|
if (list_empty(&tmp))
|
|
break;
|
|
|
|
raw_spin_unlock_irq(&q->lock);
|
|
raw_spin_lock_irq(&q->lock);
|
|
}
|
|
raw_spin_unlock_irq(&q->lock);
|
|
}
|
|
EXPORT_SYMBOL(swake_up_all);
|
|
|
|
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
|
{
|
|
wait->task = current;
|
|
if (list_empty(&wait->task_list))
|
|
list_add(&wait->task_list, &q->task_list);
|
|
}
|
|
|
|
void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
|
|
{
|
|
unsigned long flags;
|
|
|
|
raw_spin_lock_irqsave(&q->lock, flags);
|
|
__prepare_to_swait(q, wait);
|
|
set_current_state(state);
|
|
raw_spin_unlock_irqrestore(&q->lock, flags);
|
|
}
|
|
EXPORT_SYMBOL(prepare_to_swait);
|
|
|
|
long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
|
|
{
|
|
if (signal_pending_state(state, current))
|
|
return -ERESTARTSYS;
|
|
|
|
prepare_to_swait(q, wait, state);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(prepare_to_swait_event);
|
|
|
|
void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
|
{
|
|
__set_current_state(TASK_RUNNING);
|
|
if (!list_empty(&wait->task_list))
|
|
list_del_init(&wait->task_list);
|
|
}
|
|
|
|
void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
|
{
|
|
unsigned long flags;
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
|
|
if (!list_empty_careful(&wait->task_list)) {
|
|
raw_spin_lock_irqsave(&q->lock, flags);
|
|
list_del_init(&wait->task_list);
|
|
raw_spin_unlock_irqrestore(&q->lock, flags);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(finish_swait);
|