2019-05-19 15:08:55 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2005-04-17 02:20:36 +04:00
/*
* Generic waiting primitives .
*
2012-12-06 13:39:54 +04:00
* ( C ) 2004 Nadia Yvette Chambers , Oracle
2005-04-17 02:20:36 +04:00
*/
2017-03-05 13:10:18 +03:00
void __init_waitqueue_head ( struct wait_queue_head * wq_head , const char * name , struct lock_class_key * key )
2006-07-10 15:45:32 +04:00
{
2017-03-05 13:10:18 +03:00
spin_lock_init ( & wq_head - > lock ) ;
lockdep_set_class_and_name ( & wq_head - > lock , key , name ) ;
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
INIT_LIST_HEAD ( & wq_head - > head ) ;
2006-07-10 15:45:32 +04:00
}
2006-07-03 11:25:07 +04:00
2009-08-10 15:33:05 +04:00
EXPORT_SYMBOL ( __init_waitqueue_head ) ;
2006-07-03 11:25:07 +04:00
2017-03-05 13:10:18 +03:00
void add_wait_queue ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
2017-03-05 12:33:16 +03:00
wq_entry - > flags & = ~ WQ_FLAG_EXCLUSIVE ;
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
2017-12-06 10:15:31 +03:00
__add_wait_queue ( wq_head , wq_entry ) ;
2017-03-05 13:10:18 +03:00
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( add_wait_queue ) ;
2017-03-05 13:10:18 +03:00
void add_wait_queue_exclusive ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
2017-03-05 12:33:16 +03:00
wq_entry - > flags | = WQ_FLAG_EXCLUSIVE ;
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
__add_wait_queue_entry_tail ( wq_head , wq_entry ) ;
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( add_wait_queue_exclusive ) ;
2020-10-27 17:39:43 +03:00
void add_wait_queue_priority ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry )
{
unsigned long flags ;
wq_entry - > flags | = WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY ;
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
__add_wait_queue ( wq_head , wq_entry ) ;
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
}
EXPORT_SYMBOL_GPL ( add_wait_queue_priority ) ;
2017-03-05 13:10:18 +03:00
void remove_wait_queue ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
__remove_wait_queue ( wq_head , wq_entry ) ;
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( remove_wait_queue ) ;
2017-08-25 19:13:54 +03:00
/*
* Scan threshold to break wait queue walk .
* This allows a waker to take a break from holding the
* wait queue lock during the wait queue walk .
*/
# define WAITQUEUE_WALK_BREAK_CNT 64
2005-04-17 02:20:36 +04:00
2013-10-04 19:24:35 +04:00
/*
* The core wakeup function . Non - exclusive wakeups ( nr_exclusive = = 0 ) just
* wake everything up . If it ' s an exclusive wakeup ( nr_exclusive = = small + ve
2020-10-27 17:39:43 +03:00
* number ) then we wake that number of exclusive tasks , and potentially all
* the non - exclusive tasks . Normally , exclusive tasks will be at the end of
* the list and any non - exclusive tasks will be woken first . A priority task
* may be at the head of the list , and can consume the event without any other
* tasks being woken .
2013-10-04 19:24:35 +04:00
*
* There are circumstances in which we can try to wake a task which has already
* started to run but is not in state TASK_RUNNING . try_to_wake_up ( ) returns
* zero in this ( rare ) case , and we handle it by continuing to scan the queue .
*/
2017-08-25 19:13:54 +03:00
static int __wake_up_common ( struct wait_queue_head * wq_head , unsigned int mode ,
int nr_exclusive , int wake_flags , void * key ,
wait_queue_entry_t * bookmark )
2013-10-04 19:24:35 +04:00
{
2017-06-20 13:06:13 +03:00
wait_queue_entry_t * curr , * next ;
2017-08-25 19:13:54 +03:00
int cnt = 0 ;
2018-08-22 07:56:34 +03:00
lockdep_assert_held ( & wq_head - > lock ) ;
2017-08-25 19:13:54 +03:00
if ( bookmark & & ( bookmark - > flags & WQ_FLAG_BOOKMARK ) ) {
curr = list_next_entry ( bookmark , entry ) ;
2013-10-04 19:24:35 +04:00
2017-08-25 19:13:54 +03:00
list_del ( & bookmark - > entry ) ;
bookmark - > flags = 0 ;
} else
curr = list_first_entry ( & wq_head - > head , wait_queue_entry_t , entry ) ;
if ( & curr - > entry = = & wq_head - > head )
return nr_exclusive ;
list_for_each_entry_safe_from ( curr , next , & wq_head - > head , entry ) {
2013-10-04 19:24:35 +04:00
unsigned flags = curr - > flags ;
2017-08-25 19:13:54 +03:00
int ret ;
if ( flags & WQ_FLAG_BOOKMARK )
continue ;
ret = curr - > func ( curr , mode , wake_flags , key ) ;
Minor page waitqueue cleanups
Tim Chen and Kan Liang have been battling a customer load that shows
extremely long page wakeup lists. The cause seems to be constant NUMA
migration of a hot page that is shared across a lot of threads, but the
actual root cause for the exact behavior has not been found.
Tim has a patch that batches the wait list traversal at wakeup time, so
that we at least don't get long uninterruptible cases where we traverse
and wake up thousands of processes and get nasty latency spikes. That
is likely 4.14 material, but we're still discussing the page waitqueue
specific parts of it.
In the meantime, I've tried to look at making the page wait queues less
expensive, and failing miserably. If you have thousands of threads
waiting for the same page, it will be painful. We'll need to try to
figure out the NUMA balancing issue some day, in addition to avoiding
the excessive spinlock hold times.
That said, having tried to rewrite the page wait queues, I can at least
fix up some of the braindamage in the current situation. In particular:
(a) we don't want to continue walking the page wait list if the bit
we're waiting for already got set again (which seems to be one of
the patterns of the bad load). That makes no progress and just
causes pointless cache pollution chasing the pointers.
(b) we don't want to put the non-locking waiters always on the front of
the queue, and the locking waiters always on the back. Not only is
that unfair, it means that we wake up thousands of reading threads
that will just end up being blocked by the writer later anyway.
Also add a comment about the layout of 'struct wait_page_key' - there is
an external user of it in the cachefiles code that means that it has to
match the layout of 'struct wait_bit_key' in the two first members. It
so happens to match, because 'struct page *' and 'unsigned long *' end
up having the same values simply because the page flags are the first
member in struct page.
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Christopher Lameter <cl@linux.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-08-27 23:55:12 +03:00
if ( ret < 0 )
break ;
if ( ret & & ( flags & WQ_FLAG_EXCLUSIVE ) & & ! - - nr_exclusive )
2013-10-04 19:24:35 +04:00
break ;
2017-08-25 19:13:54 +03:00
if ( bookmark & & ( + + cnt > WAITQUEUE_WALK_BREAK_CNT ) & &
( & next - > entry ! = & wq_head - > head ) ) {
bookmark - > flags = WQ_FLAG_BOOKMARK ;
list_add_tail ( & bookmark - > entry , & next - > entry ) ;
break ;
}
}
sched: Clean up and harmonize the coding style of the scheduler code base
A good number of small style inconsistencies have accumulated
in the scheduler core, so do a pass over them to harmonize
all these details:
- fix speling in comments,
- use curly braces for multi-line statements,
- remove unnecessary parentheses from integer literals,
- capitalize consistently,
- remove stray newlines,
- add comments where necessary,
- remove invalid/unnecessary comments,
- align structure definitions and other data types vertically,
- add missing newlines for increased readability,
- fix vertical tabulation where it's misaligned,
- harmonize preprocessor conditional block labeling
and vertical alignment,
- remove line-breaks where they uglify the code,
- add newline after local variable definitions,
No change in functionality:
md5:
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.before.asm
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.after.asm
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-03-03 16:01:12 +03:00
2017-08-25 19:13:54 +03:00
return nr_exclusive ;
}
static void __wake_up_common_lock ( struct wait_queue_head * wq_head , unsigned int mode ,
int nr_exclusive , int wake_flags , void * key )
{
unsigned long flags ;
wait_queue_entry_t bookmark ;
bookmark . flags = 0 ;
bookmark . private = NULL ;
bookmark . func = NULL ;
INIT_LIST_HEAD ( & bookmark . entry ) ;
2019-06-11 15:29:07 +03:00
do {
2017-08-25 19:13:54 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
nr_exclusive = __wake_up_common ( wq_head , mode , nr_exclusive ,
wake_flags , key , & bookmark ) ;
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2019-06-11 15:29:07 +03:00
} while ( bookmark . flags & WQ_FLAG_BOOKMARK ) ;
2013-10-04 19:24:35 +04:00
}
/**
* __wake_up - wake up threads blocked on a waitqueue .
2017-03-05 13:10:18 +03:00
* @ wq_head : the waitqueue
2013-10-04 19:24:35 +04:00
* @ mode : which threads
* @ nr_exclusive : how many wake - one or wake - many threads to wake up
* @ key : is directly passed to the wakeup function
*
2018-07-16 21:06:03 +03:00
* If this function wakes up a task , it executes a full memory barrier before
* accessing the task state .
2013-10-04 19:24:35 +04:00
*/
2017-03-05 13:10:18 +03:00
void __wake_up ( struct wait_queue_head * wq_head , unsigned int mode ,
2013-10-04 19:24:35 +04:00
int nr_exclusive , void * key )
{
2017-08-25 19:13:54 +03:00
__wake_up_common_lock ( wq_head , mode , nr_exclusive , 0 , key ) ;
2013-10-04 19:24:35 +04:00
}
EXPORT_SYMBOL ( __wake_up ) ;
/*
* Same as __wake_up but called with the spinlock in wait_queue_head_t held .
*/
2017-03-05 13:10:18 +03:00
void __wake_up_locked ( struct wait_queue_head * wq_head , unsigned int mode , int nr )
2013-10-04 19:24:35 +04:00
{
2017-08-25 19:13:54 +03:00
__wake_up_common ( wq_head , mode , nr , 0 , NULL , NULL ) ;
2013-10-04 19:24:35 +04:00
}
EXPORT_SYMBOL_GPL ( __wake_up_locked ) ;
2017-03-05 13:10:18 +03:00
void __wake_up_locked_key ( struct wait_queue_head * wq_head , unsigned int mode , void * key )
2013-10-04 19:24:35 +04:00
{
2017-08-25 19:13:54 +03:00
__wake_up_common ( wq_head , mode , 1 , 0 , key , NULL ) ;
2013-10-04 19:24:35 +04:00
}
EXPORT_SYMBOL_GPL ( __wake_up_locked_key ) ;
2017-08-25 19:13:55 +03:00
void __wake_up_locked_key_bookmark ( struct wait_queue_head * wq_head ,
unsigned int mode , void * key , wait_queue_entry_t * bookmark )
{
__wake_up_common ( wq_head , mode , 1 , 0 , key , bookmark ) ;
}
EXPORT_SYMBOL_GPL ( __wake_up_locked_key_bookmark ) ;
2013-10-04 19:24:35 +04:00
/**
* __wake_up_sync_key - wake up threads blocked on a waitqueue .
2017-03-05 13:10:18 +03:00
* @ wq_head : the waitqueue
2013-10-04 19:24:35 +04:00
* @ mode : which threads
* @ key : opaque value to be passed to wakeup targets
*
* The sync wakeup differs that the waker knows that it will schedule
* away soon , so while the target thread will be woken up , it will not
* be migrated to another CPU - ie . the two threads are ' synchronized '
* with each other . This can prevent needless bouncing between CPUs .
*
* On UP it can prevent extra preemption .
*
2018-07-16 21:06:03 +03:00
* If this function wakes up a task , it executes a full memory barrier before
* accessing the task state .
2013-10-04 19:24:35 +04:00
*/
2017-03-05 13:10:18 +03:00
void __wake_up_sync_key ( struct wait_queue_head * wq_head , unsigned int mode ,
2019-10-16 17:13:41 +03:00
void * key )
2013-10-04 19:24:35 +04:00
{
2017-03-05 13:10:18 +03:00
if ( unlikely ( ! wq_head ) )
2013-10-04 19:24:35 +04:00
return ;
2019-10-16 17:13:41 +03:00
__wake_up_common_lock ( wq_head , mode , 1 , WF_SYNC , key ) ;
2013-10-04 19:24:35 +04:00
}
EXPORT_SYMBOL_GPL ( __wake_up_sync_key ) ;
2019-09-24 18:07:45 +03:00
/**
* __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue .
* @ wq_head : the waitqueue
* @ mode : which threads
* @ key : opaque value to be passed to wakeup targets
*
* The sync wakeup differs in that the waker knows that it will schedule
* away soon , so while the target thread will be woken up , it will not
* be migrated to another CPU - ie . the two threads are ' synchronized '
* with each other . This can prevent needless bouncing between CPUs .
*
* On UP it can prevent extra preemption .
*
* If this function wakes up a task , it executes a full memory barrier before
* accessing the task state .
*/
void __wake_up_locked_sync_key ( struct wait_queue_head * wq_head ,
unsigned int mode , void * key )
{
__wake_up_common ( wq_head , mode , 1 , WF_SYNC , key , NULL ) ;
}
EXPORT_SYMBOL_GPL ( __wake_up_locked_sync_key ) ;
2013-10-04 19:24:35 +04:00
/*
* __wake_up_sync - see __wake_up_sync_key ( )
*/
2019-10-16 17:13:41 +03:00
void __wake_up_sync ( struct wait_queue_head * wq_head , unsigned int mode )
2013-10-04 19:24:35 +04:00
{
2019-10-16 17:13:41 +03:00
__wake_up_sync_key ( wq_head , mode , NULL ) ;
2013-10-04 19:24:35 +04:00
}
EXPORT_SYMBOL_GPL ( __wake_up_sync ) ; /* For internal use only */
wait: add wake_up_pollfree()
Several ->poll() implementations are special in that they use a
waitqueue whose lifetime is the current task, rather than the struct
file as is normally the case. This is okay for blocking polls, since a
blocking poll occurs within one task; however, non-blocking polls
require another solution. This solution is for the queue to be cleared
before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'.
However, that has a bug: wake_up_poll() calls __wake_up() with
nr_exclusive=1. Therefore, if there are multiple "exclusive" waiters,
and the wakeup function for the first one returns a positive value, only
that one will be called. That's *not* what's needed for POLLFREE;
POLLFREE is special in that it really needs to wake up everyone.
Considering the three non-blocking poll systems:
- io_uring poll doesn't handle POLLFREE at all, so it is broken anyway.
- aio poll is unaffected, since it doesn't support exclusive waits.
However, that's fragile, as someone could add this feature later.
- epoll doesn't appear to be broken by this, since its wakeup function
returns 0 when it sees POLLFREE. But this is fragile.
Although there is a workaround (see epoll), it's better to define a
function which always sends POLLFREE to all waiters. Add such a
function. Also make it verify that the queue really becomes empty after
all waiters have been woken up.
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
2021-12-09 04:04:51 +03:00
void __wake_up_pollfree ( struct wait_queue_head * wq_head )
{
__wake_up ( wq_head , TASK_NORMAL , 0 , poll_to_key ( EPOLLHUP | POLLFREE ) ) ;
/* POLLFREE must have cleared the queue. */
WARN_ON_ONCE ( waitqueue_active ( wq_head ) ) ;
}
2005-04-17 02:20:36 +04:00
/*
* Note : we use " set_current_state() " _after_ the wait - queue add ,
* because we need a memory barrier there on SMP , so that any
* wake - function that tests for the wait - queue being active
* will be guaranteed to see waitqueue addition _or_ subsequent
* tests in this thread will see the wakeup having taken place .
*
* The spin_unlock ( ) itself is semi - permeable and only protects
* one way ( it only protects stuff inside the critical region and
* stops them from bleeding out - it would still allow subsequent
2007-05-09 10:57:56 +04:00
* loads to move into the critical region ) .
2005-04-17 02:20:36 +04:00
*/
2008-02-08 15:19:53 +03:00
void
2017-03-05 13:10:18 +03:00
prepare_to_wait ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry , int state )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
2017-03-05 12:33:16 +03:00
wq_entry - > flags & = ~ WQ_FLAG_EXCLUSIVE ;
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
if ( list_empty ( & wq_entry - > entry ) )
2017-03-05 13:10:18 +03:00
__add_wait_queue ( wq_head , wq_entry ) ;
2008-10-16 09:01:38 +04:00
set_current_state ( state ) ;
2017-03-05 13:10:18 +03:00
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( prepare_to_wait ) ;
2021-06-07 14:26:13 +03:00
/* Returns true if we are the first waiter in the queue, false otherwise. */
bool
2017-03-05 13:10:18 +03:00
prepare_to_wait_exclusive ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry , int state )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
2021-06-07 14:26:13 +03:00
bool was_empty = false ;
2005-04-17 02:20:36 +04:00
2017-03-05 12:33:16 +03:00
wq_entry - > flags | = WQ_FLAG_EXCLUSIVE ;
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
2021-06-07 14:26:13 +03:00
if ( list_empty ( & wq_entry - > entry ) ) {
was_empty = list_empty ( & wq_head - > head ) ;
2017-03-05 13:10:18 +03:00
__add_wait_queue_entry_tail ( wq_head , wq_entry ) ;
2021-06-07 14:26:13 +03:00
}
2008-10-16 09:01:38 +04:00
set_current_state ( state ) ;
2017-03-05 13:10:18 +03:00
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2021-06-07 14:26:13 +03:00
return was_empty ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( prepare_to_wait_exclusive ) ;
2017-03-05 12:33:16 +03:00
void init_wait_entry ( struct wait_queue_entry * wq_entry , int flags )
2016-09-06 17:00:55 +03:00
{
2017-03-05 12:33:16 +03:00
wq_entry - > flags = flags ;
wq_entry - > private = current ;
wq_entry - > func = autoremove_wake_function ;
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
INIT_LIST_HEAD ( & wq_entry - > entry ) ;
2016-09-06 17:00:55 +03:00
}
EXPORT_SYMBOL ( init_wait_entry ) ;
2017-03-05 13:10:18 +03:00
long prepare_to_wait_event ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry , int state )
2013-10-07 20:18:24 +04:00
{
unsigned long flags ;
2016-09-08 19:48:15 +03:00
long ret = 0 ;
2013-10-07 20:18:24 +04:00
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
2019-01-04 02:28:48 +03:00
if ( signal_pending_state ( state , current ) ) {
2016-09-08 19:48:15 +03:00
/*
* Exclusive waiter must not fail if it was selected by wakeup ,
* it should " consume " the condition we were waiting for .
*
* The caller will recheck the condition and return success if
* we were already woken up , we can not miss the event because
2017-03-05 13:10:18 +03:00
* wakeup locks / unlocks the same wq_head - > lock .
2016-09-08 19:48:15 +03:00
*
* But we need to ensure that set - condition + wakeup after that
* can ' t see us , it should wake up another exclusive waiter if
* we fail .
*/
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
list_del_init ( & wq_entry - > entry ) ;
2016-09-08 19:48:15 +03:00
ret = - ERESTARTSYS ;
} else {
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
if ( list_empty ( & wq_entry - > entry ) ) {
2017-03-05 12:33:16 +03:00
if ( wq_entry - > flags & WQ_FLAG_EXCLUSIVE )
2017-03-05 13:10:18 +03:00
__add_wait_queue_entry_tail ( wq_head , wq_entry ) ;
2016-09-08 19:48:15 +03:00
else
2017-03-05 13:10:18 +03:00
__add_wait_queue ( wq_head , wq_entry ) ;
2016-09-08 19:48:15 +03:00
}
set_current_state ( state ) ;
2013-10-07 20:18:24 +04:00
}
2017-03-05 13:10:18 +03:00
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2013-10-07 20:18:24 +04:00
2016-09-08 19:48:15 +03:00
return ret ;
2013-10-07 20:18:24 +04:00
}
EXPORT_SYMBOL ( prepare_to_wait_event ) ;
2017-03-08 02:33:14 +03:00
/*
* Note ! These two wait functions are entered with the
* wait - queue lock held ( and interrupts off in the _irq
* case ) , so there is no race with testing the wakeup
* condition in the caller before they add the wait
* entry to the wake queue .
*/
2017-06-20 13:06:13 +03:00
int do_wait_intr ( wait_queue_head_t * wq , wait_queue_entry_t * wait )
2017-03-08 02:33:14 +03:00
{
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
if ( likely ( list_empty ( & wait - > entry ) ) )
2017-06-20 13:06:13 +03:00
__add_wait_queue_entry_tail ( wq , wait ) ;
2017-03-08 02:33:14 +03:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( signal_pending ( current ) )
return - ERESTARTSYS ;
spin_unlock ( & wq - > lock ) ;
schedule ( ) ;
spin_lock ( & wq - > lock ) ;
sched: Clean up and harmonize the coding style of the scheduler code base
A good number of small style inconsistencies have accumulated
in the scheduler core, so do a pass over them to harmonize
all these details:
- fix speling in comments,
- use curly braces for multi-line statements,
- remove unnecessary parentheses from integer literals,
- capitalize consistently,
- remove stray newlines,
- add comments where necessary,
- remove invalid/unnecessary comments,
- align structure definitions and other data types vertically,
- add missing newlines for increased readability,
- fix vertical tabulation where it's misaligned,
- harmonize preprocessor conditional block labeling
and vertical alignment,
- remove line-breaks where they uglify the code,
- add newline after local variable definitions,
No change in functionality:
md5:
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.before.asm
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.after.asm
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-03-03 16:01:12 +03:00
2017-03-08 02:33:14 +03:00
return 0 ;
}
EXPORT_SYMBOL ( do_wait_intr ) ;
2017-06-20 13:06:13 +03:00
int do_wait_intr_irq ( wait_queue_head_t * wq , wait_queue_entry_t * wait )
2017-03-08 02:33:14 +03:00
{
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
if ( likely ( list_empty ( & wait - > entry ) ) )
2017-06-20 13:06:13 +03:00
__add_wait_queue_entry_tail ( wq , wait ) ;
2017-03-08 02:33:14 +03:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( signal_pending ( current ) )
return - ERESTARTSYS ;
spin_unlock_irq ( & wq - > lock ) ;
schedule ( ) ;
spin_lock_irq ( & wq - > lock ) ;
sched: Clean up and harmonize the coding style of the scheduler code base
A good number of small style inconsistencies have accumulated
in the scheduler core, so do a pass over them to harmonize
all these details:
- fix speling in comments,
- use curly braces for multi-line statements,
- remove unnecessary parentheses from integer literals,
- capitalize consistently,
- remove stray newlines,
- add comments where necessary,
- remove invalid/unnecessary comments,
- align structure definitions and other data types vertically,
- add missing newlines for increased readability,
- fix vertical tabulation where it's misaligned,
- harmonize preprocessor conditional block labeling
and vertical alignment,
- remove line-breaks where they uglify the code,
- add newline after local variable definitions,
No change in functionality:
md5:
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.before.asm
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.after.asm
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-03-03 16:01:12 +03:00
2017-03-08 02:33:14 +03:00
return 0 ;
}
EXPORT_SYMBOL ( do_wait_intr_irq ) ;
2010-10-27 01:17:25 +04:00
/**
2009-02-05 02:12:14 +03:00
* finish_wait - clean up after waiting in a queue
2017-03-05 13:10:18 +03:00
* @ wq_head : waitqueue waited on
2017-03-05 12:33:16 +03:00
* @ wq_entry : wait descriptor
2009-02-05 02:12:14 +03:00
*
* Sets current thread back to running state and removes
* the wait descriptor from the given waitqueue if still
* queued .
*/
2017-03-05 13:10:18 +03:00
void finish_wait ( struct wait_queue_head * wq_head , struct wait_queue_entry * wq_entry )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
__set_current_state ( TASK_RUNNING ) ;
/*
* We can check for list emptiness outside the lock
* IFF :
* - we use the " careful " check that verifies both
* the next and prev pointers , so that there cannot
* be any half - pending updates in progress on other
* CPU ' s that we haven ' t seen yet ( and that might
* still change the stack area .
* and
* - all other users take the lock ( ie we can only
* have _one_ other CPU that looks at or modifies
* the list ) .
*/
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
if ( ! list_empty_careful ( & wq_entry - > entry ) ) {
2017-03-05 13:10:18 +03:00
spin_lock_irqsave ( & wq_head - > lock , flags ) ;
sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming
So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.
Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.
To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:
struct wait_queue_head::task_list => ::head
struct wait_queue_entry::task_list => ::entry
For example, this code:
rqw->wait.task_list.next != &wait->task_list
... is was pretty unclear (to me) what it's doing, while now it's written this way:
rqw->wait.head.next != &wait->entry
... which makes it pretty clear that we are iterating a list until we see the head.
Other examples are:
list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
list_for_each_entry(wq, &fence->wait.task_list, task_list) {
... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:
list_for_each_entry_safe(pos, next, &x->head, entry) {
list_for_each_entry(wq, &fence->wait.head, entry) {
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-20 13:06:46 +03:00
list_del_init ( & wq_entry - > entry ) ;
2017-03-05 13:10:18 +03:00
spin_unlock_irqrestore ( & wq_head - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
}
}
EXPORT_SYMBOL ( finish_wait ) ;
2017-03-05 12:33:16 +03:00
int autoremove_wake_function ( struct wait_queue_entry * wq_entry , unsigned mode , int sync , void * key )
2005-04-17 02:20:36 +04:00
{
2017-03-05 12:33:16 +03:00
int ret = default_wake_function ( wq_entry , mode , sync , key ) ;
2005-04-17 02:20:36 +04:00
if ( ret )
2020-07-23 22:33:41 +03:00
list_del_init_careful ( & wq_entry - > entry ) ;
sched: Clean up and harmonize the coding style of the scheduler code base
A good number of small style inconsistencies have accumulated
in the scheduler core, so do a pass over them to harmonize
all these details:
- fix speling in comments,
- use curly braces for multi-line statements,
- remove unnecessary parentheses from integer literals,
- capitalize consistently,
- remove stray newlines,
- add comments where necessary,
- remove invalid/unnecessary comments,
- align structure definitions and other data types vertically,
- add missing newlines for increased readability,
- fix vertical tabulation where it's misaligned,
- harmonize preprocessor conditional block labeling
and vertical alignment,
- remove line-breaks where they uglify the code,
- add newline after local variable definitions,
No change in functionality:
md5:
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.before.asm
1191fa0a890cfa8132156d2959d7e9e2 built-in.o.after.asm
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-03-03 16:01:12 +03:00
2005-04-17 02:20:36 +04:00
return ret ;
}
EXPORT_SYMBOL ( autoremove_wake_function ) ;
2014-10-31 13:57:30 +03:00
static inline bool is_kthread_should_stop ( void )
{
return ( current - > flags & PF_KTHREAD ) & & kthread_should_stop ( ) ;
}
2014-09-24 12:18:47 +04:00
/*
* DEFINE_WAIT_FUNC ( wait , woken_wake_func ) ;
*
2017-03-05 13:10:18 +03:00
* add_wait_queue ( & wq_head , & wait ) ;
2014-09-24 12:18:47 +04:00
* for ( ; ; ) {
* if ( condition )
* break ;
*
2018-07-16 21:06:01 +03:00
* // in wait_woken() // in woken_wake_function()
2014-09-24 12:18:47 +04:00
*
2018-07-16 21:06:01 +03:00
* p - > state = mode ; wq_entry - > flags | = WQ_FLAG_WOKEN ;
* smp_mb ( ) ; // A try_to_wake_up():
* if ( ! ( wq_entry - > flags & WQ_FLAG_WOKEN ) ) < full barrier >
* schedule ( ) if ( p - > state & mode )
* p - > state = TASK_RUNNING ; p - > state = TASK_RUNNING ;
* wq_entry - > flags & = ~ WQ_FLAG_WOKEN ; ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* smp_mb ( ) ; // B condition = true;
* } smp_mb ( ) ; // C
* remove_wait_queue ( & wq_head , & wait ) ; wq_entry - > flags | = WQ_FLAG_WOKEN ;
2014-09-24 12:18:47 +04:00
*/
2017-03-05 12:33:16 +03:00
long wait_woken ( struct wait_queue_entry * wq_entry , unsigned mode , long timeout )
2014-09-24 12:18:47 +04:00
{
/*
2018-07-16 21:06:01 +03:00
* The below executes an smp_mb ( ) , which matches with the full barrier
* executed by the try_to_wake_up ( ) in woken_wake_function ( ) such that
* either we see the store to wq_entry - > flags in woken_wake_function ( )
* or woken_wake_function ( ) sees our store to current - > state .
2014-09-24 12:18:47 +04:00
*/
2018-07-16 21:06:01 +03:00
set_current_state ( mode ) ; /* A */
2017-03-05 12:33:16 +03:00
if ( ! ( wq_entry - > flags & WQ_FLAG_WOKEN ) & & ! is_kthread_should_stop ( ) )
2014-09-24 12:18:47 +04:00
timeout = schedule_timeout ( timeout ) ;
__set_current_state ( TASK_RUNNING ) ;
/*
2018-07-16 21:06:01 +03:00
* The below executes an smp_mb ( ) , which matches with the smp_mb ( ) ( C )
* in woken_wake_function ( ) such that either we see the wait condition
* being true or the store to wq_entry - > flags in woken_wake_function ( )
* follows ours in the coherence order .
2014-09-24 12:18:47 +04:00
*/
2017-03-05 12:33:16 +03:00
smp_store_mb ( wq_entry - > flags , wq_entry - > flags & ~ WQ_FLAG_WOKEN ) ; /* B */
2014-09-24 12:18:47 +04:00
return timeout ;
}
EXPORT_SYMBOL ( wait_woken ) ;
2017-03-05 12:33:16 +03:00
int woken_wake_function ( struct wait_queue_entry * wq_entry , unsigned mode , int sync , void * key )
2014-09-24 12:18:47 +04:00
{
2018-07-16 21:06:01 +03:00
/* Pairs with the smp_store_mb() in wait_woken(). */
smp_mb ( ) ; /* C */
2017-03-05 12:33:16 +03:00
wq_entry - > flags | = WQ_FLAG_WOKEN ;
2014-09-24 12:18:47 +04:00
2017-03-05 12:33:16 +03:00
return default_wake_function ( wq_entry , mode , sync , key ) ;
2014-09-24 12:18:47 +04:00
}
EXPORT_SYMBOL ( woken_wake_function ) ;