2013-06-01 02:26:45 +04:00
# define pr_fmt(fmt) "%s: " fmt "\n", __func__
# include <linux/kernel.h>
2014-09-24 21:31:49 +04:00
# include <linux/sched.h>
# include <linux/wait.h>
2013-06-01 02:26:45 +04:00
# include <linux/percpu-refcount.h>
/*
* Initially , a percpu refcount is just a set of percpu counters . Initially , we
* don ' t try to detect the ref hitting 0 - which means that get / put can just
* increment or decrement the local counter . Note that the counter on a
* particular cpu can ( and will ) wrap - this is fine , when we go to shutdown the
* percpu counters will all sum to the correct value
*
2015-12-27 16:58:23 +03:00
* ( More precisely : because modular arithmetic is commutative the sum of all the
2014-09-24 21:31:48 +04:00
* percpu_count vars will be equal to what it would have been if all the gets
* and puts were done to a single integer , even if some of the percpu integers
2013-06-01 02:26:45 +04:00
* overflow or underflow ) .
*
* The real trick to implementing percpu refcounts is shutdown . We can ' t detect
* the ref hitting 0 on every put - this would require global synchronization
* and defeat the whole purpose of using percpu refs .
*
* What we do is require the user to keep track of the initial refcount ; we know
* the ref can ' t hit 0 before the user drops the initial ref , so as long as we
* convert to non percpu mode before the initial ref is dropped everything
* works .
*
* Converting to non percpu mode is done with some RCUish stuff in
2014-09-20 09:27:25 +04:00
* percpu_ref_kill . Additionally , we need a bias value so that the
* atomic_long_t can ' t hit 0 before we ' ve added up all the percpu refs .
2013-06-01 02:26:45 +04:00
*/
2014-09-24 21:31:48 +04:00
# define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
2013-06-01 02:26:45 +04:00
2015-09-30 00:47:20 +03:00
static DEFINE_SPINLOCK ( percpu_ref_switch_lock ) ;
2014-09-24 21:31:49 +04:00
static DECLARE_WAIT_QUEUE_HEAD ( percpu_ref_switch_waitq ) ;
2014-09-24 21:31:48 +04:00
static unsigned long __percpu * percpu_count_ptr ( struct percpu_ref * ref )
2014-06-28 16:10:13 +04:00
{
2014-09-24 21:31:48 +04:00
return ( unsigned long __percpu * )
2014-09-24 21:31:49 +04:00
( ref - > percpu_count_ptr & ~ __PERCPU_REF_ATOMIC_DEAD ) ;
2014-06-28 16:10:13 +04:00
}
2013-06-01 02:26:45 +04:00
/**
* percpu_ref_init - initialize a percpu refcount
2013-06-13 07:43:06 +04:00
* @ ref : percpu_ref to initialize
* @ release : function which will be called when refcount hits 0
2014-09-24 21:31:50 +04:00
* @ flags : PERCPU_REF_INIT_ * flags
2014-09-08 04:51:30 +04:00
* @ gfp : allocation mask to use
2013-06-01 02:26:45 +04:00
*
2014-09-24 21:31:50 +04:00
* Initializes @ ref . If @ flags is zero , @ ref starts in percpu mode with a
* refcount of 1 ; analagous to atomic_long_set ( ref , 1 ) . See the
* definitions of PERCPU_REF_INIT_ * flags for flag behaviors .
2013-06-01 02:26:45 +04:00
*
* Note that @ release must not sleep - it may potentially be called from RCU
* callback context by percpu_ref_kill ( ) .
*/
2014-09-08 04:51:30 +04:00
int percpu_ref_init ( struct percpu_ref * ref , percpu_ref_func_t * release ,
2014-09-24 21:31:50 +04:00
unsigned int flags , gfp_t gfp )
2013-06-01 02:26:45 +04:00
{
2014-09-24 21:31:49 +04:00
size_t align = max_t ( size_t , 1 < < __PERCPU_REF_FLAG_BITS ,
__alignof__ ( unsigned long ) ) ;
2014-09-24 21:31:50 +04:00
unsigned long start_count = 0 ;
2013-06-01 02:26:45 +04:00
2014-09-24 21:31:49 +04:00
ref - > percpu_count_ptr = ( unsigned long )
__alloc_percpu_gfp ( sizeof ( unsigned long ) , align , gfp ) ;
2014-09-24 21:31:48 +04:00
if ( ! ref - > percpu_count_ptr )
2013-06-01 02:26:45 +04:00
return - ENOMEM ;
2014-09-24 21:31:50 +04:00
ref - > force_atomic = flags & PERCPU_REF_INIT_ATOMIC ;
2014-09-24 21:31:50 +04:00
if ( flags & ( PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD ) )
ref - > percpu_count_ptr | = __PERCPU_REF_ATOMIC ;
else
start_count + = PERCPU_COUNT_BIAS ;
if ( flags & PERCPU_REF_INIT_DEAD )
ref - > percpu_count_ptr | = __PERCPU_REF_DEAD ;
else
start_count + + ;
atomic_long_set ( & ref - > count , start_count ) ;
2013-06-01 02:26:45 +04:00
ref - > release = release ;
return 0 ;
}
2013-10-17 00:47:01 +04:00
EXPORT_SYMBOL_GPL ( percpu_ref_init ) ;
2013-06-01 02:26:45 +04:00
2013-06-13 07:52:35 +04:00
/**
2014-06-28 16:10:14 +04:00
* percpu_ref_exit - undo percpu_ref_init ( )
* @ ref : percpu_ref to exit
2013-06-13 07:52:35 +04:00
*
2014-06-28 16:10:14 +04:00
* This function exits @ ref . The caller is responsible for ensuring that
* @ ref is no longer in active use . The usual places to invoke this
* function from are the @ ref - > release ( ) callback or in init failure path
* where percpu_ref_init ( ) succeeded but other parts of the initialization
* of the embedding object failed .
2013-06-13 07:52:35 +04:00
*/
2014-06-28 16:10:14 +04:00
void percpu_ref_exit ( struct percpu_ref * ref )
2013-06-13 07:52:35 +04:00
{
2014-09-24 21:31:48 +04:00
unsigned long __percpu * percpu_count = percpu_count_ptr ( ref ) ;
2013-06-13 07:52:35 +04:00
2014-09-24 21:31:48 +04:00
if ( percpu_count ) {
free_percpu ( percpu_count ) ;
2014-09-24 21:31:49 +04:00
ref - > percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD ;
2013-06-13 07:52:35 +04:00
}
}
2014-06-28 16:10:14 +04:00
EXPORT_SYMBOL_GPL ( percpu_ref_exit ) ;
2013-06-13 07:52:35 +04:00
2014-09-24 21:31:49 +04:00
static void percpu_ref_call_confirm_rcu ( struct rcu_head * rcu )
{
struct percpu_ref * ref = container_of ( rcu , struct percpu_ref , rcu ) ;
ref - > confirm_switch ( ref ) ;
ref - > confirm_switch = NULL ;
wake_up_all ( & percpu_ref_switch_waitq ) ;
/* drop ref from percpu_ref_switch_to_atomic() */
percpu_ref_put ( ref ) ;
}
static void percpu_ref_switch_to_atomic_rcu ( struct rcu_head * rcu )
2013-06-01 02:26:45 +04:00
{
struct percpu_ref * ref = container_of ( rcu , struct percpu_ref , rcu ) ;
2014-09-24 21:31:48 +04:00
unsigned long __percpu * percpu_count = percpu_count_ptr ( ref ) ;
2014-09-20 09:27:25 +04:00
unsigned long count = 0 ;
2013-06-01 02:26:45 +04:00
int cpu ;
for_each_possible_cpu ( cpu )
2014-09-24 21:31:48 +04:00
count + = * per_cpu_ptr ( percpu_count , cpu ) ;
2013-06-01 02:26:45 +04:00
2014-09-24 21:31:48 +04:00
pr_debug ( " global %ld percpu %ld " ,
2014-09-20 09:27:25 +04:00
atomic_long_read ( & ref - > count ) , ( long ) count ) ;
2013-06-01 02:26:45 +04:00
/*
* It ' s crucial that we sum the percpu counters _before_ adding the sum
* to & ref - > count ; since gets could be happening on one cpu while puts
* happen on another , adding a single cpu ' s count could cause
* @ ref - > count to hit 0 before we ' ve got a consistent value - but the
* sum of all the counts will be consistent and correct .
*
* Subtracting the bias value then has to happen _after_ adding count to
* & ref - > count ; we need the bias value to prevent & ref - > count from
* reaching 0 before we add the percpu counts . But doing it at the same
* time is equivalent and saves us atomic operations :
*/
2014-09-24 21:31:48 +04:00
atomic_long_add ( ( long ) count - PERCPU_COUNT_BIAS , & ref - > count ) ;
2013-06-01 02:26:45 +04:00
2014-09-20 09:27:25 +04:00
WARN_ONCE ( atomic_long_read ( & ref - > count ) < = 0 ,
2014-09-24 21:31:49 +04:00
" percpu ref (%pf) <= 0 (%ld) after switching to atomic " ,
2014-09-20 09:27:25 +04:00
ref - > release , atomic_long_read ( & ref - > count ) ) ;
2014-01-07 01:13:26 +04:00
2014-09-24 21:31:49 +04:00
/* @ref is viewed as dead on all CPUs, send out switch confirmation */
percpu_ref_call_confirm_rcu ( rcu ) ;
}
2013-06-14 06:23:53 +04:00
2014-09-24 21:31:49 +04:00
static void percpu_ref_noop_confirm_switch ( struct percpu_ref * ref )
{
}
static void __percpu_ref_switch_to_atomic ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_switch )
{
2015-09-30 00:47:17 +03:00
if ( ref - > percpu_count_ptr & __PERCPU_REF_ATOMIC ) {
2015-09-30 00:47:18 +03:00
if ( confirm_switch )
2015-09-30 00:47:17 +03:00
confirm_switch ( ref ) ;
return ;
2014-09-24 21:31:49 +04:00
}
2013-06-01 02:26:45 +04:00
2015-09-30 00:47:17 +03:00
/* switching from percpu to atomic */
ref - > percpu_count_ptr | = __PERCPU_REF_ATOMIC ;
/*
* Non - NULL - > confirm_switch is used to indicate that switching is
* in progress . Use noop one if unspecified .
*/
ref - > confirm_switch = confirm_switch ? : percpu_ref_noop_confirm_switch ;
percpu_ref_get ( ref ) ; /* put after confirmation */
call_rcu_sched ( & ref - > rcu , percpu_ref_switch_to_atomic_rcu ) ;
2013-06-01 02:26:45 +04:00
}
2014-09-24 21:31:48 +04:00
2014-09-24 21:31:49 +04:00
static void __percpu_ref_switch_to_percpu ( struct percpu_ref * ref )
2014-09-24 21:31:48 +04:00
{
2014-09-24 21:31:48 +04:00
unsigned long __percpu * percpu_count = percpu_count_ptr ( ref ) ;
2014-09-24 21:31:48 +04:00
int cpu ;
2014-09-24 21:31:48 +04:00
BUG_ON ( ! percpu_count ) ;
2014-09-24 21:31:48 +04:00
2014-09-24 21:31:49 +04:00
if ( ! ( ref - > percpu_count_ptr & __PERCPU_REF_ATOMIC ) )
return ;
atomic_long_add ( PERCPU_COUNT_BIAS , & ref - > count ) ;
2014-09-24 21:31:48 +04:00
/*
* Restore per - cpu operation . smp_store_release ( ) is paired with
2014-09-24 21:31:48 +04:00
* smp_read_barrier_depends ( ) in __ref_is_percpu ( ) and guarantees
* that the zeroing is visible to all percpu accesses which can see
2014-09-24 21:31:49 +04:00
* the following __PERCPU_REF_ATOMIC clearing .
2014-09-24 21:31:48 +04:00
*/
for_each_possible_cpu ( cpu )
2014-09-24 21:31:48 +04:00
* per_cpu_ptr ( percpu_count , cpu ) = 0 ;
2014-09-24 21:31:48 +04:00
2014-09-24 21:31:48 +04:00
smp_store_release ( & ref - > percpu_count_ptr ,
2014-09-24 21:31:49 +04:00
ref - > percpu_count_ptr & ~ __PERCPU_REF_ATOMIC ) ;
}
2015-09-30 00:47:19 +03:00
static void __percpu_ref_switch_mode ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_switch )
{
2015-09-30 00:47:20 +03:00
lockdep_assert_held ( & percpu_ref_switch_lock ) ;
2015-09-30 00:47:19 +03:00
/*
* If the previous ATOMIC switching hasn ' t finished yet , wait for
* its completion . If the caller ensures that ATOMIC switching
* isn ' t in progress , this function can be called from any context .
*/
2015-09-30 00:47:20 +03:00
wait_event_lock_irq ( percpu_ref_switch_waitq , ! ref - > confirm_switch ,
percpu_ref_switch_lock ) ;
2015-09-30 00:47:19 +03:00
if ( ref - > force_atomic | | ( ref - > percpu_count_ptr & __PERCPU_REF_DEAD ) )
__percpu_ref_switch_to_atomic ( ref , confirm_switch ) ;
else
__percpu_ref_switch_to_percpu ( ref ) ;
}
2015-09-30 00:47:17 +03:00
/**
* percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
* @ ref : percpu_ref to switch to atomic mode
* @ confirm_switch : optional confirmation callback
*
* There ' s no reason to use this function for the usual reference counting .
* Use percpu_ref_kill [ _and_confirm ] ( ) .
*
* Schedule switching of @ ref to atomic mode . All its percpu counts will
* be collected to the main atomic counter . On completion , when all CPUs
* are guaraneed to be in atomic mode , @ confirm_switch , which may not
* block , is invoked . This function may be invoked concurrently with all
* the get / put operations and can safely be mixed with kill and reinit
* operations . Note that @ ref will stay in atomic mode across kill / reinit
* cycles until percpu_ref_switch_to_percpu ( ) is called .
*
2015-09-30 00:47:19 +03:00
* This function may block if @ ref is in the process of switching to atomic
* mode . If the caller ensures that @ ref is not in the process of
* switching to atomic mode , this function can be called from any context .
2015-09-30 00:47:17 +03:00
*/
void percpu_ref_switch_to_atomic ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_switch )
{
2015-09-30 00:47:20 +03:00
unsigned long flags ;
spin_lock_irqsave ( & percpu_ref_switch_lock , flags ) ;
2015-09-30 00:47:17 +03:00
ref - > force_atomic = true ;
2015-09-30 00:47:19 +03:00
__percpu_ref_switch_mode ( ref , confirm_switch ) ;
2015-09-30 00:47:20 +03:00
spin_unlock_irqrestore ( & percpu_ref_switch_lock , flags ) ;
2015-09-30 00:47:17 +03:00
}
2014-09-24 21:31:49 +04:00
/**
* percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
* @ ref : percpu_ref to switch to percpu mode
*
* There ' s no reason to use this function for the usual reference counting .
* To re - use an expired ref , use percpu_ref_reinit ( ) .
*
* Switch @ ref to percpu mode . This function may be invoked concurrently
* with all the get / put operations and can safely be mixed with kill and
2014-09-24 21:31:50 +04:00
* reinit operations . This function reverses the sticky atomic state set
* by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic ( ) . If @ ref is
* dying or dead , the actual switching takes place on the following
* percpu_ref_reinit ( ) .
2014-09-24 21:31:49 +04:00
*
2015-09-30 00:47:19 +03:00
* This function may block if @ ref is in the process of switching to atomic
* mode . If the caller ensures that @ ref is not in the process of
* switching to atomic mode , this function can be called from any context .
2014-09-24 21:31:49 +04:00
*/
void percpu_ref_switch_to_percpu ( struct percpu_ref * ref )
{
2015-09-30 00:47:20 +03:00
unsigned long flags ;
spin_lock_irqsave ( & percpu_ref_switch_lock , flags ) ;
2014-09-24 21:31:50 +04:00
ref - > force_atomic = false ;
2015-09-30 00:47:19 +03:00
__percpu_ref_switch_mode ( ref , NULL ) ;
2015-09-30 00:47:20 +03:00
spin_unlock_irqrestore ( & percpu_ref_switch_lock , flags ) ;
2014-09-24 21:31:48 +04:00
}
2014-09-24 21:31:49 +04:00
/**
* percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
* @ ref : percpu_ref to kill
* @ confirm_kill : optional confirmation callback
*
* Equivalent to percpu_ref_kill ( ) but also schedules kill confirmation if
* @ confirm_kill is not NULL . @ confirm_kill , which may not block , will be
* called after @ ref is seen as dead from all CPUs at which point all
* further invocations of percpu_ref_tryget_live ( ) will fail . See
* percpu_ref_tryget_live ( ) for details .
*
* This function normally doesn ' t block and can be called from any context
2014-09-24 21:31:49 +04:00
* but it may block if @ confirm_kill is specified and @ ref is in the
2015-09-30 00:47:16 +03:00
* process of switching to atomic mode by percpu_ref_switch_to_atomic ( ) .
2014-09-24 21:31:49 +04:00
*/
void percpu_ref_kill_and_confirm ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_kill )
{
2015-09-30 00:47:20 +03:00
unsigned long flags ;
spin_lock_irqsave ( & percpu_ref_switch_lock , flags ) ;
2014-09-24 21:31:49 +04:00
WARN_ONCE ( ref - > percpu_count_ptr & __PERCPU_REF_DEAD ,
" %s called more than once on %pf! " , __func__ , ref - > release ) ;
ref - > percpu_count_ptr | = __PERCPU_REF_DEAD ;
2015-09-30 00:47:19 +03:00
__percpu_ref_switch_mode ( ref , confirm_kill ) ;
2014-09-24 21:31:49 +04:00
percpu_ref_put ( ref ) ;
2015-09-30 00:47:20 +03:00
spin_unlock_irqrestore ( & percpu_ref_switch_lock , flags ) ;
2014-09-24 21:31:49 +04:00
}
EXPORT_SYMBOL_GPL ( percpu_ref_kill_and_confirm ) ;
2014-09-24 21:31:49 +04:00
/**
* percpu_ref_reinit - re - initialize a percpu refcount
* @ ref : perpcu_ref to re - initialize
*
* Re - initialize @ ref so that it ' s in the same state as when it finished
2014-09-24 21:31:50 +04:00
* percpu_ref_init ( ) ignoring % PERCPU_REF_INIT_DEAD . @ ref must have been
* initialized successfully and reached 0 but not exited .
2014-09-24 21:31:49 +04:00
*
* Note that percpu_ref_tryget [ _live ] ( ) are safe to perform on @ ref while
* this function is in progress .
*/
void percpu_ref_reinit ( struct percpu_ref * ref )
{
2015-09-30 00:47:20 +03:00
unsigned long flags ;
spin_lock_irqsave ( & percpu_ref_switch_lock , flags ) ;
2014-09-24 21:31:49 +04:00
WARN_ON_ONCE ( ! percpu_ref_is_zero ( ref ) ) ;
ref - > percpu_count_ptr & = ~ __PERCPU_REF_DEAD ;
percpu_ref_get ( ref ) ;
2015-09-30 00:47:19 +03:00
__percpu_ref_switch_mode ( ref , NULL ) ;
2015-09-30 00:47:20 +03:00
spin_unlock_irqrestore ( & percpu_ref_switch_lock , flags ) ;
2014-09-24 21:31:49 +04:00
}
EXPORT_SYMBOL_GPL ( percpu_ref_reinit ) ;