2013-06-01 02:26:45 +04:00
# define pr_fmt(fmt) "%s: " fmt "\n", __func__
# include <linux/kernel.h>
2014-09-24 21:31:49 +04:00
# include <linux/sched.h>
# include <linux/wait.h>
2013-06-01 02:26:45 +04:00
# include <linux/percpu-refcount.h>
/*
* Initially , a percpu refcount is just a set of percpu counters . Initially , we
* don ' t try to detect the ref hitting 0 - which means that get / put can just
* increment or decrement the local counter . Note that the counter on a
* particular cpu can ( and will ) wrap - this is fine , when we go to shutdown the
* percpu counters will all sum to the correct value
*
* ( More precisely : because moduler arithmatic is commutative the sum of all the
2014-09-24 21:31:48 +04:00
* percpu_count vars will be equal to what it would have been if all the gets
* and puts were done to a single integer , even if some of the percpu integers
2013-06-01 02:26:45 +04:00
* overflow or underflow ) .
*
* The real trick to implementing percpu refcounts is shutdown . We can ' t detect
* the ref hitting 0 on every put - this would require global synchronization
* and defeat the whole purpose of using percpu refs .
*
* What we do is require the user to keep track of the initial refcount ; we know
* the ref can ' t hit 0 before the user drops the initial ref , so as long as we
* convert to non percpu mode before the initial ref is dropped everything
* works .
*
* Converting to non percpu mode is done with some RCUish stuff in
2014-09-20 09:27:25 +04:00
* percpu_ref_kill . Additionally , we need a bias value so that the
* atomic_long_t can ' t hit 0 before we ' ve added up all the percpu refs .
2013-06-01 02:26:45 +04:00
*/
2014-09-24 21:31:48 +04:00
# define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
2013-06-01 02:26:45 +04:00
2014-09-24 21:31:49 +04:00
static DECLARE_WAIT_QUEUE_HEAD ( percpu_ref_switch_waitq ) ;
2014-09-24 21:31:48 +04:00
static unsigned long __percpu * percpu_count_ptr ( struct percpu_ref * ref )
2014-06-28 16:10:13 +04:00
{
2014-09-24 21:31:48 +04:00
return ( unsigned long __percpu * )
2014-09-24 21:31:49 +04:00
( ref - > percpu_count_ptr & ~ __PERCPU_REF_ATOMIC_DEAD ) ;
2014-06-28 16:10:13 +04:00
}
2013-06-01 02:26:45 +04:00
/**
* percpu_ref_init - initialize a percpu refcount
2013-06-13 07:43:06 +04:00
* @ ref : percpu_ref to initialize
* @ release : function which will be called when refcount hits 0
2014-09-24 21:31:50 +04:00
* @ flags : PERCPU_REF_INIT_ * flags
2014-09-08 04:51:30 +04:00
* @ gfp : allocation mask to use
2013-06-01 02:26:45 +04:00
*
2014-09-24 21:31:50 +04:00
* Initializes @ ref . If @ flags is zero , @ ref starts in percpu mode with a
* refcount of 1 ; analagous to atomic_long_set ( ref , 1 ) . See the
* definitions of PERCPU_REF_INIT_ * flags for flag behaviors .
2013-06-01 02:26:45 +04:00
*
* Note that @ release must not sleep - it may potentially be called from RCU
* callback context by percpu_ref_kill ( ) .
*/
2014-09-08 04:51:30 +04:00
int percpu_ref_init ( struct percpu_ref * ref , percpu_ref_func_t * release ,
2014-09-24 21:31:50 +04:00
unsigned int flags , gfp_t gfp )
2013-06-01 02:26:45 +04:00
{
2014-09-24 21:31:49 +04:00
size_t align = max_t ( size_t , 1 < < __PERCPU_REF_FLAG_BITS ,
__alignof__ ( unsigned long ) ) ;
2014-09-24 21:31:50 +04:00
unsigned long start_count = 0 ;
2013-06-01 02:26:45 +04:00
2014-09-24 21:31:49 +04:00
ref - > percpu_count_ptr = ( unsigned long )
__alloc_percpu_gfp ( sizeof ( unsigned long ) , align , gfp ) ;
2014-09-24 21:31:48 +04:00
if ( ! ref - > percpu_count_ptr )
2013-06-01 02:26:45 +04:00
return - ENOMEM ;
2014-09-24 21:31:50 +04:00
if ( flags & ( PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD ) )
ref - > percpu_count_ptr | = __PERCPU_REF_ATOMIC ;
else
start_count + = PERCPU_COUNT_BIAS ;
if ( flags & PERCPU_REF_INIT_DEAD )
ref - > percpu_count_ptr | = __PERCPU_REF_DEAD ;
else
start_count + + ;
atomic_long_set ( & ref - > count , start_count ) ;
2013-06-01 02:26:45 +04:00
ref - > release = release ;
return 0 ;
}
2013-10-17 00:47:01 +04:00
EXPORT_SYMBOL_GPL ( percpu_ref_init ) ;
2013-06-01 02:26:45 +04:00
2013-06-13 07:52:35 +04:00
/**
2014-06-28 16:10:14 +04:00
* percpu_ref_exit - undo percpu_ref_init ( )
* @ ref : percpu_ref to exit
2013-06-13 07:52:35 +04:00
*
2014-06-28 16:10:14 +04:00
* This function exits @ ref . The caller is responsible for ensuring that
* @ ref is no longer in active use . The usual places to invoke this
* function from are the @ ref - > release ( ) callback or in init failure path
* where percpu_ref_init ( ) succeeded but other parts of the initialization
* of the embedding object failed .
2013-06-13 07:52:35 +04:00
*/
2014-06-28 16:10:14 +04:00
void percpu_ref_exit ( struct percpu_ref * ref )
2013-06-13 07:52:35 +04:00
{
2014-09-24 21:31:48 +04:00
unsigned long __percpu * percpu_count = percpu_count_ptr ( ref ) ;
2013-06-13 07:52:35 +04:00
2014-09-24 21:31:48 +04:00
if ( percpu_count ) {
free_percpu ( percpu_count ) ;
2014-09-24 21:31:49 +04:00
ref - > percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD ;
2013-06-13 07:52:35 +04:00
}
}
2014-06-28 16:10:14 +04:00
EXPORT_SYMBOL_GPL ( percpu_ref_exit ) ;
2013-06-13 07:52:35 +04:00
2014-09-24 21:31:49 +04:00
static void percpu_ref_call_confirm_rcu ( struct rcu_head * rcu )
{
struct percpu_ref * ref = container_of ( rcu , struct percpu_ref , rcu ) ;
ref - > confirm_switch ( ref ) ;
ref - > confirm_switch = NULL ;
wake_up_all ( & percpu_ref_switch_waitq ) ;
/* drop ref from percpu_ref_switch_to_atomic() */
percpu_ref_put ( ref ) ;
}
static void percpu_ref_switch_to_atomic_rcu ( struct rcu_head * rcu )
2013-06-01 02:26:45 +04:00
{
struct percpu_ref * ref = container_of ( rcu , struct percpu_ref , rcu ) ;
2014-09-24 21:31:48 +04:00
unsigned long __percpu * percpu_count = percpu_count_ptr ( ref ) ;
2014-09-20 09:27:25 +04:00
unsigned long count = 0 ;
2013-06-01 02:26:45 +04:00
int cpu ;
for_each_possible_cpu ( cpu )
2014-09-24 21:31:48 +04:00
count + = * per_cpu_ptr ( percpu_count , cpu ) ;
2013-06-01 02:26:45 +04:00
2014-09-24 21:31:48 +04:00
pr_debug ( " global %ld percpu %ld " ,
2014-09-20 09:27:25 +04:00
atomic_long_read ( & ref - > count ) , ( long ) count ) ;
2013-06-01 02:26:45 +04:00
/*
* It ' s crucial that we sum the percpu counters _before_ adding the sum
* to & ref - > count ; since gets could be happening on one cpu while puts
* happen on another , adding a single cpu ' s count could cause
* @ ref - > count to hit 0 before we ' ve got a consistent value - but the
* sum of all the counts will be consistent and correct .
*
* Subtracting the bias value then has to happen _after_ adding count to
* & ref - > count ; we need the bias value to prevent & ref - > count from
* reaching 0 before we add the percpu counts . But doing it at the same
* time is equivalent and saves us atomic operations :
*/
2014-09-24 21:31:48 +04:00
atomic_long_add ( ( long ) count - PERCPU_COUNT_BIAS , & ref - > count ) ;
2013-06-01 02:26:45 +04:00
2014-09-20 09:27:25 +04:00
WARN_ONCE ( atomic_long_read ( & ref - > count ) < = 0 ,
2014-09-24 21:31:49 +04:00
" percpu ref (%pf) <= 0 (%ld) after switching to atomic " ,
2014-09-20 09:27:25 +04:00
ref - > release , atomic_long_read ( & ref - > count ) ) ;
2014-01-07 01:13:26 +04:00
2014-09-24 21:31:49 +04:00
/* @ref is viewed as dead on all CPUs, send out switch confirmation */
percpu_ref_call_confirm_rcu ( rcu ) ;
}
2013-06-14 06:23:53 +04:00
2014-09-24 21:31:49 +04:00
static void percpu_ref_noop_confirm_switch ( struct percpu_ref * ref )
{
}
static void __percpu_ref_switch_to_atomic ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_switch )
{
if ( ! ( ref - > percpu_count_ptr & __PERCPU_REF_ATOMIC ) ) {
/* switching from percpu to atomic */
ref - > percpu_count_ptr | = __PERCPU_REF_ATOMIC ;
/*
* Non - NULL - > confirm_switch is used to indicate that
* switching is in progress . Use noop one if unspecified .
*/
WARN_ON_ONCE ( ref - > confirm_switch ) ;
ref - > confirm_switch =
confirm_switch ? : percpu_ref_noop_confirm_switch ;
percpu_ref_get ( ref ) ; /* put after confirmation */
call_rcu_sched ( & ref - > rcu , percpu_ref_switch_to_atomic_rcu ) ;
} else if ( confirm_switch ) {
/*
* Somebody already set ATOMIC . Switching may still be in
* progress . @ confirm_switch must be invoked after the
* switching is complete and a full sched RCU grace period
* has passed . Wait synchronously for the previous
* switching and schedule @ confirm_switch invocation .
*/
wait_event ( percpu_ref_switch_waitq , ! ref - > confirm_switch ) ;
ref - > confirm_switch = confirm_switch ;
percpu_ref_get ( ref ) ; /* put after confirmation */
call_rcu_sched ( & ref - > rcu , percpu_ref_call_confirm_rcu ) ;
}
2013-06-01 02:26:45 +04:00
}
/**
2014-09-24 21:31:49 +04:00
* percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
* @ ref : percpu_ref to switch to atomic mode
* @ confirm_switch : optional confirmation callback
2013-06-01 02:26:45 +04:00
*
2014-09-24 21:31:49 +04:00
* There ' s no reason to use this function for the usual reference counting .
* Use percpu_ref_kill [ _and_confirm ] ( ) .
*
* Schedule switching of @ ref to atomic mode . All its percpu counts will
* be collected to the main atomic counter . On completion , when all CPUs
* are guaraneed to be in atomic mode , @ confirm_switch , which may not
* block , is invoked . This function may be invoked concurrently with all
* the get / put operations and can safely be mixed with kill and reinit
* operations .
2013-06-01 02:26:45 +04:00
*
2014-09-24 21:31:49 +04:00
* This function normally doesn ' t block and can be called from any context
* but it may block if @ confirm_kill is specified and @ ref is already in
* the process of switching to atomic mode . In such cases , @ confirm_switch
* will be invoked after the switching is complete .
*
* Due to the way percpu_ref is implemented , @ confirm_switch will be called
* after at least one full sched RCU grace period has passed but this is an
* implementation detail and must not be depended upon .
2013-06-01 02:26:45 +04:00
*/
2014-09-24 21:31:49 +04:00
void percpu_ref_switch_to_atomic ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_switch )
2013-06-01 02:26:45 +04:00
{
2014-09-24 21:31:49 +04:00
__percpu_ref_switch_to_atomic ( ref , confirm_switch ) ;
2013-06-01 02:26:45 +04:00
}
2014-09-24 21:31:48 +04:00
2014-09-24 21:31:49 +04:00
static void __percpu_ref_switch_to_percpu ( struct percpu_ref * ref )
2014-09-24 21:31:48 +04:00
{
2014-09-24 21:31:48 +04:00
unsigned long __percpu * percpu_count = percpu_count_ptr ( ref ) ;
2014-09-24 21:31:48 +04:00
int cpu ;
2014-09-24 21:31:48 +04:00
BUG_ON ( ! percpu_count ) ;
2014-09-24 21:31:48 +04:00
2014-09-24 21:31:49 +04:00
if ( ! ( ref - > percpu_count_ptr & __PERCPU_REF_ATOMIC ) )
return ;
wait_event ( percpu_ref_switch_waitq , ! ref - > confirm_switch ) ;
atomic_long_add ( PERCPU_COUNT_BIAS , & ref - > count ) ;
2014-09-24 21:31:48 +04:00
/*
* Restore per - cpu operation . smp_store_release ( ) is paired with
2014-09-24 21:31:48 +04:00
* smp_read_barrier_depends ( ) in __ref_is_percpu ( ) and guarantees
* that the zeroing is visible to all percpu accesses which can see
2014-09-24 21:31:49 +04:00
* the following __PERCPU_REF_ATOMIC clearing .
2014-09-24 21:31:48 +04:00
*/
for_each_possible_cpu ( cpu )
2014-09-24 21:31:48 +04:00
* per_cpu_ptr ( percpu_count , cpu ) = 0 ;
2014-09-24 21:31:48 +04:00
2014-09-24 21:31:48 +04:00
smp_store_release ( & ref - > percpu_count_ptr ,
2014-09-24 21:31:49 +04:00
ref - > percpu_count_ptr & ~ __PERCPU_REF_ATOMIC ) ;
}
/**
* percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
* @ ref : percpu_ref to switch to percpu mode
*
* There ' s no reason to use this function for the usual reference counting .
* To re - use an expired ref , use percpu_ref_reinit ( ) .
*
* Switch @ ref to percpu mode . This function may be invoked concurrently
* with all the get / put operations and can safely be mixed with kill and
* reinit operations .
*
* This function normally doesn ' t block and can be called from any context
* but it may block if @ ref is in the process of switching to atomic mode
* by percpu_ref_switch_atomic ( ) .
*/
void percpu_ref_switch_to_percpu ( struct percpu_ref * ref )
{
/* a dying or dead ref can't be switched to percpu mode w/o reinit */
if ( ! ( ref - > percpu_count_ptr & __PERCPU_REF_DEAD ) )
__percpu_ref_switch_to_percpu ( ref ) ;
2014-09-24 21:31:48 +04:00
}
2014-09-24 21:31:49 +04:00
/**
* percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
* @ ref : percpu_ref to kill
* @ confirm_kill : optional confirmation callback
*
* Equivalent to percpu_ref_kill ( ) but also schedules kill confirmation if
* @ confirm_kill is not NULL . @ confirm_kill , which may not block , will be
* called after @ ref is seen as dead from all CPUs at which point all
* further invocations of percpu_ref_tryget_live ( ) will fail . See
* percpu_ref_tryget_live ( ) for details .
*
* This function normally doesn ' t block and can be called from any context
2014-09-24 21:31:49 +04:00
* but it may block if @ confirm_kill is specified and @ ref is in the
* process of switching to atomic mode by percpu_ref_switch_atomic ( ) .
2014-09-24 21:31:49 +04:00
*
* Due to the way percpu_ref is implemented , @ confirm_switch will be called
* after at least one full sched RCU grace period has passed but this is an
* implementation detail and must not be depended upon .
*/
void percpu_ref_kill_and_confirm ( struct percpu_ref * ref ,
percpu_ref_func_t * confirm_kill )
{
WARN_ONCE ( ref - > percpu_count_ptr & __PERCPU_REF_DEAD ,
" %s called more than once on %pf! " , __func__ , ref - > release ) ;
ref - > percpu_count_ptr | = __PERCPU_REF_DEAD ;
__percpu_ref_switch_to_atomic ( ref , confirm_kill ) ;
percpu_ref_put ( ref ) ;
}
EXPORT_SYMBOL_GPL ( percpu_ref_kill_and_confirm ) ;
2014-09-24 21:31:49 +04:00
/**
* percpu_ref_reinit - re - initialize a percpu refcount
* @ ref : perpcu_ref to re - initialize
*
* Re - initialize @ ref so that it ' s in the same state as when it finished
* percpu_ref_init ( ) . @ ref must have been initialized successfully and
* reached 0 but not exited .
*
* Note that percpu_ref_tryget [ _live ] ( ) are safe to perform on @ ref while
* this function is in progress .
*/
void percpu_ref_reinit ( struct percpu_ref * ref )
{
WARN_ON_ONCE ( ! percpu_ref_is_zero ( ref ) ) ;
ref - > percpu_count_ptr & = ~ __PERCPU_REF_DEAD ;
percpu_ref_get ( ref ) ;
__percpu_ref_switch_to_percpu ( ref ) ;
}
EXPORT_SYMBOL_GPL ( percpu_ref_reinit ) ;