2019-05-19 15:08:55 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2012-12-18 04:01:36 +04:00
# include <linux/atomic.h>
2012-12-18 04:01:32 +04:00
# include <linux/percpu.h>
2019-10-30 22:30:41 +03:00
# include <linux/wait.h>
2012-12-18 04:01:38 +04:00
# include <linux/lockdep.h>
2012-12-18 04:01:32 +04:00
# include <linux/percpu-rwsem.h>
# include <linux/rcupdate.h>
# include <linux/sched.h>
2019-10-30 22:30:41 +03:00
# include <linux/sched/task.h>
2022-01-16 02:16:57 +03:00
# include <linux/sched/debug.h>
2012-12-18 04:01:32 +04:00
# include <linux/errno.h>
2022-03-22 21:57:09 +03:00
# include <trace/events/lock.h>
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
int __percpu_init_rwsem ( struct percpu_rw_semaphore * sem ,
2019-10-30 22:01:26 +03:00
const char * name , struct lock_class_key * key )
2012-12-18 04:01:32 +04:00
{
2016-07-14 21:08:46 +03:00
sem - > read_count = alloc_percpu ( int ) ;
if ( unlikely ( ! sem - > read_count ) )
2012-12-18 04:01:32 +04:00
return - ENOMEM ;
2019-04-23 15:07:24 +03:00
rcu_sync_init ( & sem - > rss ) ;
2017-01-11 18:22:26 +03:00
rcuwait_init ( & sem - > writer ) ;
2019-10-30 22:30:41 +03:00
init_waitqueue_head ( & sem - > waiters ) ;
atomic_set ( & sem - > block , 0 ) ;
2019-10-30 22:01:26 +03:00
# ifdef CONFIG_DEBUG_LOCK_ALLOC
debug_check_no_locks_freed ( ( void * ) sem , sizeof ( * sem ) ) ;
lockdep_init_map ( & sem - > dep_map , name , key , 0 ) ;
# endif
2012-12-18 04:01:32 +04:00
return 0 ;
}
2015-09-01 06:21:59 +03:00
EXPORT_SYMBOL_GPL ( __percpu_init_rwsem ) ;
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
void percpu_free_rwsem ( struct percpu_rw_semaphore * sem )
2012-12-18 04:01:32 +04:00
{
2015-08-21 20:42:55 +03:00
/*
* XXX : temporary kludge . The error path in alloc_super ( )
* assumes that percpu_free_rwsem ( ) is safe after kzalloc ( ) .
*/
2016-07-14 21:08:46 +03:00
if ( ! sem - > read_count )
2015-08-21 20:42:55 +03:00
return ;
2016-07-14 21:08:46 +03:00
rcu_sync_dtor ( & sem - > rss ) ;
free_percpu ( sem - > read_count ) ;
sem - > read_count = NULL ; /* catch use after free bugs */
2012-12-18 04:01:32 +04:00
}
2016-04-26 06:22:35 +03:00
EXPORT_SYMBOL_GPL ( percpu_free_rwsem ) ;
2012-12-18 04:01:32 +04:00
2019-10-31 14:34:23 +03:00
static bool __percpu_down_read_trylock ( struct percpu_rw_semaphore * sem )
2012-12-18 04:01:32 +04:00
{
2020-09-15 17:07:50 +03:00
this_cpu_inc ( * sem - > read_count ) ;
2019-10-30 22:17:51 +03:00
2016-07-14 21:08:46 +03:00
/*
* Due to having preemption disabled the decrement happens on
* the same CPU as the increment , avoiding the
* increment - on - one - CPU - and - decrement - on - another problem .
*
2019-10-30 22:30:41 +03:00
* If the reader misses the writer ' s assignment of sem - > block , then the
* writer is guaranteed to see the reader ' s increment .
2016-07-14 21:08:46 +03:00
*
* Conversely , any readers that increment their sem - > read_count after
2019-10-30 22:30:41 +03:00
* the writer looks are guaranteed to see the sem - > block value , which
* in turn means that they are guaranteed to immediately decrement
* their sem - > read_count , so that it doesn ' t matter that the writer
* missed them .
2016-07-14 21:08:46 +03:00
*/
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
smp_mb ( ) ; /* A matches D */
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
/*
2019-10-30 22:30:41 +03:00
* If ! sem - > block the critical section starts here , matched by the
2016-07-14 21:08:46 +03:00
* release in percpu_up_write ( ) .
*/
2019-10-30 22:30:41 +03:00
if ( likely ( ! atomic_read_acquire ( & sem - > block ) ) )
2019-10-30 22:12:37 +03:00
return true ;
2012-12-18 04:01:32 +04:00
2020-09-15 17:07:50 +03:00
this_cpu_dec ( * sem - > read_count ) ;
2019-10-31 14:34:23 +03:00
/* Prod writer to re-evaluate readers_active_check() */
rcuwait_wake_up ( & sem - > writer ) ;
return false ;
}
2019-10-30 22:30:41 +03:00
static inline bool __percpu_down_write_trylock ( struct percpu_rw_semaphore * sem )
{
if ( atomic_read ( & sem - > block ) )
return false ;
return atomic_xchg ( & sem - > block , 1 ) = = 0 ;
}
static bool __percpu_rwsem_trylock ( struct percpu_rw_semaphore * sem , bool reader )
{
if ( reader ) {
bool ret ;
preempt_disable ( ) ;
ret = __percpu_down_read_trylock ( sem ) ;
preempt_enable ( ) ;
return ret ;
}
return __percpu_down_write_trylock ( sem ) ;
}
/*
* The return value of wait_queue_entry : : func means :
*
* < 0 - error , wakeup is terminated and the error is returned
* 0 - no wakeup , a next waiter is tried
* > 0 - woken , if EXCLUSIVE , counted towards @ nr_exclusive .
*
* We use EXCLUSIVE for both readers and writers to preserve FIFO order ,
* and play games with the return value to allow waking multiple readers .
*
* Specifically , we wake readers until we ' ve woken a single writer , or until a
* trylock fails .
*/
static int percpu_rwsem_wake_function ( struct wait_queue_entry * wq_entry ,
unsigned int mode , int wake_flags ,
void * key )
{
bool reader = wq_entry - > flags & WQ_FLAG_CUSTOM ;
struct percpu_rw_semaphore * sem = key ;
2020-03-31 00:30:02 +03:00
struct task_struct * p ;
2019-10-30 22:30:41 +03:00
/* concurrent against percpu_down_write(), can get stolen */
if ( ! __percpu_rwsem_trylock ( sem , reader ) )
return 1 ;
2020-03-31 00:30:02 +03:00
p = get_task_struct ( wq_entry - > private ) ;
2019-10-30 22:30:41 +03:00
list_del_init ( & wq_entry - > entry ) ;
smp_store_release ( & wq_entry - > private , NULL ) ;
wake_up_process ( p ) ;
put_task_struct ( p ) ;
return ! reader ; /* wake (readers until) 1 writer */
}
static void percpu_rwsem_wait ( struct percpu_rw_semaphore * sem , bool reader )
{
DEFINE_WAIT_FUNC ( wq_entry , percpu_rwsem_wake_function ) ;
bool wait ;
spin_lock_irq ( & sem - > waiters . lock ) ;
/*
* Serialize against the wakeup in percpu_up_write ( ) , if we fail
* the trylock , the wakeup must see us on the list .
*/
wait = ! __percpu_rwsem_trylock ( sem , reader ) ;
if ( wait ) {
wq_entry . flags | = WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM ;
__add_wait_queue_entry_tail ( & sem - > waiters , & wq_entry ) ;
}
spin_unlock_irq ( & sem - > waiters . lock ) ;
while ( wait ) {
set_current_state ( TASK_UNINTERRUPTIBLE ) ;
if ( ! smp_load_acquire ( & wq_entry . private ) )
break ;
schedule ( ) ;
}
__set_current_state ( TASK_RUNNING ) ;
}
2022-01-16 02:16:57 +03:00
bool __sched __percpu_down_read ( struct percpu_rw_semaphore * sem , bool try )
2019-10-31 14:34:23 +03:00
{
if ( __percpu_down_read_trylock ( sem ) )
return true ;
2015-08-21 20:43:03 +03:00
2016-07-14 21:08:46 +03:00
if ( try )
2019-10-30 22:12:37 +03:00
return false ;
2012-12-18 04:01:32 +04:00
2022-03-22 21:57:09 +03:00
trace_contention_begin ( sem , LCB_F_PERCPU | LCB_F_READ ) ;
2019-10-30 22:30:41 +03:00
preempt_enable ( ) ;
percpu_rwsem_wait ( sem , /* .reader = */ true ) ;
2016-07-14 21:08:46 +03:00
preempt_disable ( ) ;
2022-03-22 21:57:09 +03:00
trace_contention_end ( sem , 0 ) ;
2019-10-30 22:30:41 +03:00
2019-10-30 22:12:37 +03:00
return true ;
2015-07-21 18:45:57 +03:00
}
2016-07-14 21:08:46 +03:00
EXPORT_SYMBOL_GPL ( __percpu_down_read ) ;
2015-07-21 18:45:57 +03:00
2016-07-14 21:08:46 +03:00
# define per_cpu_sum(var) \
( { \
typeof ( var ) __sum = 0 ; \
int cpu ; \
compiletime_assert_atomic_type ( __sum ) ; \
for_each_possible_cpu ( cpu ) \
__sum + = per_cpu ( var , cpu ) ; \
__sum ; \
} )
2012-12-18 04:01:32 +04:00
2022-08-29 15:47:15 +03:00
bool percpu_is_read_locked ( struct percpu_rw_semaphore * sem )
{
return per_cpu_sum ( * sem - > read_count ) ! = 0 & & ! atomic_read ( & sem - > block ) ;
}
EXPORT_SYMBOL_GPL ( percpu_is_read_locked ) ;
2016-07-14 21:08:46 +03:00
/*
* Return true if the modular sum of the sem - > read_count per - CPU variable is
* zero . If this sum is zero , then it is stable due to the fact that if any
* newly arriving readers increment a given counter , they will immediately
* decrement that same counter .
2019-10-30 22:30:41 +03:00
*
* Assumes sem - > block is set .
2016-07-14 21:08:46 +03:00
*/
static bool readers_active_check ( struct percpu_rw_semaphore * sem )
2012-12-18 04:01:32 +04:00
{
2016-07-14 21:08:46 +03:00
if ( per_cpu_sum ( * sem - > read_count ) ! = 0 )
return false ;
/*
* If we observed the decrement ; ensure we see the entire critical
* section .
*/
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
smp_mb ( ) ; /* C matches B */
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
return true ;
2012-12-18 04:01:32 +04:00
}
2022-01-16 02:16:57 +03:00
void __sched percpu_down_write ( struct percpu_rw_semaphore * sem )
2012-12-18 04:01:32 +04:00
{
2023-11-09 00:53:22 +03:00
bool contended = false ;
2020-01-08 04:33:04 +03:00
might_sleep ( ) ;
2019-10-30 22:01:26 +03:00
rwsem_acquire ( & sem - > dep_map , 0 , 0 , _RET_IP_ ) ;
2016-07-14 21:08:46 +03:00
/* Notify readers to take the slow path. */
rcu_sync_enter ( & sem - > rss ) ;
2012-12-18 04:01:32 +04:00
/*
2019-10-30 22:30:41 +03:00
* Try set sem - > block ; this provides writer - writer exclusion .
* Having sem - > block set makes new readers block .
2012-12-18 04:01:32 +04:00
*/
2023-11-09 00:53:22 +03:00
if ( ! __percpu_down_write_trylock ( sem ) ) {
trace_contention_begin ( sem , LCB_F_PERCPU | LCB_F_WRITE ) ;
2019-10-30 22:30:41 +03:00
percpu_rwsem_wait ( sem , /* .reader = */ false ) ;
2023-11-09 00:53:22 +03:00
contended = true ;
}
2012-12-18 04:01:32 +04:00
2019-10-30 22:30:41 +03:00
/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
2012-12-18 04:01:36 +04:00
2016-07-14 21:08:46 +03:00
/*
2019-10-30 22:30:41 +03:00
* If they don ' t see our store of sem - > block , then we are guaranteed to
* see their sem - > read_count increment , and therefore will wait for
* them .
2016-07-14 21:08:46 +03:00
*/
2012-12-18 04:01:32 +04:00
2019-10-30 22:30:41 +03:00
/* Wait for all active readers to complete. */
2020-03-21 14:25:55 +03:00
rcuwait_wait_event ( & sem - > writer , readers_active_check ( sem ) , TASK_UNINTERRUPTIBLE ) ;
2023-11-09 00:53:22 +03:00
if ( contended )
trace_contention_end ( sem , 0 ) ;
2012-12-18 04:01:32 +04:00
}
2015-09-01 06:21:59 +03:00
EXPORT_SYMBOL_GPL ( percpu_down_write ) ;
2012-12-18 04:01:32 +04:00
2016-07-14 21:08:46 +03:00
void percpu_up_write ( struct percpu_rw_semaphore * sem )
2012-12-18 04:01:32 +04:00
{
2019-10-30 22:01:26 +03:00
rwsem_release ( & sem - > dep_map , _RET_IP_ ) ;
2012-12-18 04:01:32 +04:00
/*
2016-07-14 21:08:46 +03:00
* Signal the writer is done , no fast path yet .
*
* One reason that we cannot just immediately flip to readers_fast is
* that new readers might fail to see the results of this writer ' s
* critical section .
*
* Therefore we force it through the slow path which guarantees an
* acquire and thereby guarantees the critical section ' s consistency .
*/
2019-10-30 22:30:41 +03:00
atomic_set_release ( & sem - > block , 0 ) ;
2016-07-14 21:08:46 +03:00
/*
2019-10-30 22:30:41 +03:00
* Prod any pending reader / writer to make progress .
2016-07-14 21:08:46 +03:00
*/
2019-10-30 22:30:41 +03:00
__wake_up ( & sem - > waiters , TASK_NORMAL , 1 , sem ) ;
2016-07-14 21:08:46 +03:00
/*
* Once this completes ( at least one RCU - sched grace period hence ) the
* reader fast path will be available again . Safe to use outside the
* exclusive write lock because its counting .
2012-12-18 04:01:32 +04:00
*/
2016-07-14 21:08:46 +03:00
rcu_sync_exit ( & sem - > rss ) ;
2012-12-18 04:01:32 +04:00
}
2015-09-01 06:21:59 +03:00
EXPORT_SYMBOL_GPL ( percpu_up_write ) ;