2006-06-23 13:05:40 +04:00
/*
* Fast batching percpu counters .
*/
# include <linux/percpu_counter.h>
2007-07-16 10:39:51 +04:00
# include <linux/notifier.h>
# include <linux/mutex.h>
# include <linux/init.h>
# include <linux/cpu.h>
2006-06-23 13:05:40 +04:00
# include <linux/module.h>
2010-10-27 01:23:05 +04:00
# include <linux/debugobjects.h>
2006-06-23 13:05:40 +04:00
2011-11-01 04:12:34 +04:00
# ifdef CONFIG_HOTPLUG_CPU
2007-07-16 10:39:51 +04:00
static LIST_HEAD ( percpu_counters ) ;
2012-07-31 09:28:31 +04:00
static DEFINE_SPINLOCK ( percpu_counters_lock ) ;
2011-11-01 04:12:34 +04:00
# endif
2007-07-16 10:39:51 +04:00
2010-10-27 01:23:05 +04:00
# ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
static struct debug_obj_descr percpu_counter_debug_descr ;
2016-05-20 03:09:35 +03:00
static bool percpu_counter_fixup_free ( void * addr , enum debug_obj_state state )
2010-10-27 01:23:05 +04:00
{
struct percpu_counter * fbc = addr ;
switch ( state ) {
case ODEBUG_STATE_ACTIVE :
percpu_counter_destroy ( fbc ) ;
debug_object_free ( fbc , & percpu_counter_debug_descr ) ;
2016-05-20 03:09:35 +03:00
return true ;
2010-10-27 01:23:05 +04:00
default :
2016-05-20 03:09:35 +03:00
return false ;
2010-10-27 01:23:05 +04:00
}
}
static struct debug_obj_descr percpu_counter_debug_descr = {
. name = " percpu_counter " ,
. fixup_free = percpu_counter_fixup_free ,
} ;
static inline void debug_percpu_counter_activate ( struct percpu_counter * fbc )
{
debug_object_init ( fbc , & percpu_counter_debug_descr ) ;
debug_object_activate ( fbc , & percpu_counter_debug_descr ) ;
}
static inline void debug_percpu_counter_deactivate ( struct percpu_counter * fbc )
{
debug_object_deactivate ( fbc , & percpu_counter_debug_descr ) ;
debug_object_free ( fbc , & percpu_counter_debug_descr ) ;
}
# else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
static inline void debug_percpu_counter_activate ( struct percpu_counter * fbc )
{ }
static inline void debug_percpu_counter_deactivate ( struct percpu_counter * fbc )
{ }
# endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
2007-10-17 10:25:44 +04:00
void percpu_counter_set ( struct percpu_counter * fbc , s64 amount )
{
int cpu ;
2013-10-24 12:06:45 +04:00
unsigned long flags ;
2007-10-17 10:25:44 +04:00
2013-10-24 12:06:45 +04:00
raw_spin_lock_irqsave ( & fbc - > lock , flags ) ;
2007-10-17 10:25:44 +04:00
for_each_possible_cpu ( cpu ) {
s32 * pcount = per_cpu_ptr ( fbc - > counters , cpu ) ;
* pcount = 0 ;
}
fbc - > count = amount ;
2013-10-24 12:06:45 +04:00
raw_spin_unlock_irqrestore ( & fbc - > lock , flags ) ;
2007-10-17 10:25:44 +04:00
}
EXPORT_SYMBOL ( percpu_counter_set ) ;
2017-07-13 00:37:51 +03:00
/**
* This function is both preempt and irq safe . The former is due to explicit
* preemption disable . The latter is guaranteed by the fact that the slow path
* is explicitly protected by an irq - safe spinlock whereas the fast patch uses
* this_cpu_add which is irq - safe by definition . Hence there is no need muck
* with irq state before calling this one
*/
2017-06-20 21:01:20 +03:00
void percpu_counter_add_batch ( struct percpu_counter * fbc , s64 amount , s32 batch )
2006-06-23 13:05:40 +04:00
{
2007-10-17 10:25:43 +04:00
s64 count ;
2006-06-23 13:05:40 +04:00
2010-10-27 01:23:09 +04:00
preempt_disable ( ) ;
percpucounter: Optimize __percpu_counter_add a bit through the use of this_cpu() options.
The this_cpu_* options can be used to optimize __percpu_counter_add a bit. Avoids
some address arithmetic and saves 12 bytes.
Before:
00000000000001d3 <__percpu_counter_add>:
1d3: 55 push %rbp
1d4: 48 89 e5 mov %rsp,%rbp
1d7: 41 55 push %r13
1d9: 41 54 push %r12
1db: 53 push %rbx
1dc: 48 89 fb mov %rdi,%rbx
1df: 48 83 ec 08 sub $0x8,%rsp
1e3: 4c 8b 67 30 mov 0x30(%rdi),%r12
1e7: 65 4c 03 24 25 00 00 add %gs:0x0,%r12
1ee: 00 00
1f0: 4d 63 2c 24 movslq (%r12),%r13
1f4: 48 63 c2 movslq %edx,%rax
1f7: 49 01 f5 add %rsi,%r13
1fa: 49 39 c5 cmp %rax,%r13
1fd: 7d 0a jge 209 <__percpu_counter_add+0x36>
1ff: f7 da neg %edx
201: 48 63 d2 movslq %edx,%rdx
204: 49 39 d5 cmp %rdx,%r13
207: 7f 1e jg 227 <__percpu_counter_add+0x54>
209: 48 89 df mov %rbx,%rdi
20c: e8 00 00 00 00 callq 211 <__percpu_counter_add+0x3e>
211: 4c 01 6b 18 add %r13,0x18(%rbx)
215: 48 89 df mov %rbx,%rdi
218: 41 c7 04 24 00 00 00 movl $0x0,(%r12)
21f: 00
220: e8 00 00 00 00 callq 225 <__percpu_counter_add+0x52>
225: eb 04 jmp 22b <__percpu_counter_add+0x58>
227: 45 89 2c 24 mov %r13d,(%r12)
22b: 5b pop %rbx
22c: 5b pop %rbx
22d: 41 5c pop %r12
22f: 41 5d pop %r13
231: c9 leaveq
232: c3 retq
After:
00000000000001d3 <__percpu_counter_add>:
1d3: 55 push %rbp
1d4: 48 63 ca movslq %edx,%rcx
1d7: 48 89 e5 mov %rsp,%rbp
1da: 41 54 push %r12
1dc: 53 push %rbx
1dd: 48 89 fb mov %rdi,%rbx
1e0: 48 8b 47 30 mov 0x30(%rdi),%rax
1e4: 65 44 8b 20 mov %gs:(%rax),%r12d
1e8: 4d 63 e4 movslq %r12d,%r12
1eb: 49 01 f4 add %rsi,%r12
1ee: 49 39 cc cmp %rcx,%r12
1f1: 7d 0a jge 1fd <__percpu_counter_add+0x2a>
1f3: f7 da neg %edx
1f5: 48 63 d2 movslq %edx,%rdx
1f8: 49 39 d4 cmp %rdx,%r12
1fb: 7f 21 jg 21e <__percpu_counter_add+0x4b>
1fd: 48 89 df mov %rbx,%rdi
200: e8 00 00 00 00 callq 205 <__percpu_counter_add+0x32>
205: 4c 01 63 18 add %r12,0x18(%rbx)
209: 48 8b 43 30 mov 0x30(%rbx),%rax
20d: 48 89 df mov %rbx,%rdi
210: 65 c7 00 00 00 00 00 movl $0x0,%gs:(%rax)
217: e8 00 00 00 00 callq 21c <__percpu_counter_add+0x49>
21c: eb 04 jmp 222 <__percpu_counter_add+0x4f>
21e: 65 44 89 20 mov %r12d,%gs:(%rax)
222: 5b pop %rbx
223: 41 5c pop %r12
225: c9 leaveq
226: c3 retq
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2010-12-06 20:16:19 +03:00
count = __this_cpu_read ( * fbc - > counters ) + amount ;
2007-10-17 10:25:43 +04:00
if ( count > = batch | | count < = - batch ) {
2013-10-24 12:06:45 +04:00
unsigned long flags ;
raw_spin_lock_irqsave ( & fbc - > lock , flags ) ;
2006-06-23 13:05:40 +04:00
fbc - > count + = count ;
2014-01-17 03:26:48 +04:00
__this_cpu_sub ( * fbc - > counters , count - amount ) ;
2013-10-24 12:06:45 +04:00
raw_spin_unlock_irqrestore ( & fbc - > lock , flags ) ;
2006-06-23 13:05:40 +04:00
} else {
2014-01-15 05:56:42 +04:00
this_cpu_add ( * fbc - > counters , amount ) ;
2006-06-23 13:05:40 +04:00
}
2010-10-27 01:23:09 +04:00
preempt_enable ( ) ;
2006-06-23 13:05:40 +04:00
}
2017-06-20 21:01:20 +03:00
EXPORT_SYMBOL ( percpu_counter_add_batch ) ;
2006-06-23 13:05:40 +04:00
/*
* Add up all the per - cpu counts , return the result . This is a more accurate
* but much slower version of percpu_counter_read_positive ( )
*/
2008-12-10 00:14:14 +03:00
s64 __percpu_counter_sum ( struct percpu_counter * fbc )
2006-06-23 13:05:40 +04:00
{
2006-06-23 13:05:41 +04:00
s64 ret ;
2006-06-23 13:05:40 +04:00
int cpu ;
2013-10-24 12:06:45 +04:00
unsigned long flags ;
2006-06-23 13:05:40 +04:00
2013-10-24 12:06:45 +04:00
raw_spin_lock_irqsave ( & fbc - > lock , flags ) ;
2006-06-23 13:05:40 +04:00
ret = fbc - > count ;
2007-07-16 10:39:51 +04:00
for_each_online_cpu ( cpu ) {
2006-06-23 13:05:41 +04:00
s32 * pcount = per_cpu_ptr ( fbc - > counters , cpu ) ;
2006-06-23 13:05:40 +04:00
ret + = * pcount ;
}
2013-10-24 12:06:45 +04:00
raw_spin_unlock_irqrestore ( & fbc - > lock , flags ) ;
2007-10-17 10:25:45 +04:00
return ret ;
2006-06-23 13:05:40 +04:00
}
2007-10-17 10:25:45 +04:00
EXPORT_SYMBOL ( __percpu_counter_sum ) ;
2007-07-16 10:39:51 +04:00
2014-09-08 04:51:29 +04:00
int __percpu_counter_init ( struct percpu_counter * fbc , s64 amount , gfp_t gfp ,
2008-12-26 17:08:55 +03:00
struct lock_class_key * key )
2007-07-16 10:39:51 +04:00
{
2014-09-08 04:51:29 +04:00
unsigned long flags __maybe_unused ;
2009-07-25 18:21:48 +04:00
raw_spin_lock_init ( & fbc - > lock ) ;
2008-12-26 17:08:55 +03:00
lockdep_set_class ( & fbc - > lock , key ) ;
2007-07-16 10:39:51 +04:00
fbc - > count = amount ;
2014-09-08 04:51:29 +04:00
fbc - > counters = alloc_percpu_gfp ( s32 , gfp ) ;
2007-10-17 10:25:45 +04:00
if ( ! fbc - > counters )
return - ENOMEM ;
2010-10-27 01:23:05 +04:00
debug_percpu_counter_activate ( fbc ) ;
2007-07-16 10:39:51 +04:00
# ifdef CONFIG_HOTPLUG_CPU
2010-10-27 01:21:20 +04:00
INIT_LIST_HEAD ( & fbc - > list ) ;
2014-09-08 04:51:29 +04:00
spin_lock_irqsave ( & percpu_counters_lock , flags ) ;
2007-07-16 10:39:51 +04:00
list_add ( & fbc - > list , & percpu_counters ) ;
2014-09-08 04:51:29 +04:00
spin_unlock_irqrestore ( & percpu_counters_lock , flags ) ;
2007-07-16 10:39:51 +04:00
# endif
2007-10-17 10:25:45 +04:00
return 0 ;
2007-07-16 10:39:51 +04:00
}
2008-12-26 17:08:55 +03:00
EXPORT_SYMBOL ( __percpu_counter_init ) ;
2007-07-16 10:39:51 +04:00
void percpu_counter_destroy ( struct percpu_counter * fbc )
{
2014-09-08 04:51:29 +04:00
unsigned long flags __maybe_unused ;
2007-10-17 10:25:45 +04:00
if ( ! fbc - > counters )
return ;
2010-10-27 01:23:05 +04:00
debug_percpu_counter_deactivate ( fbc ) ;
2007-07-16 10:39:51 +04:00
# ifdef CONFIG_HOTPLUG_CPU
2014-09-08 04:51:29 +04:00
spin_lock_irqsave ( & percpu_counters_lock , flags ) ;
2007-07-16 10:39:51 +04:00
list_del ( & fbc - > list ) ;
2014-09-08 04:51:29 +04:00
spin_unlock_irqrestore ( & percpu_counters_lock , flags ) ;
2007-07-16 10:39:51 +04:00
# endif
2008-12-10 00:14:11 +03:00
free_percpu ( fbc - > counters ) ;
fbc - > counters = NULL ;
2007-07-16 10:39:51 +04:00
}
EXPORT_SYMBOL ( percpu_counter_destroy ) ;
2009-01-07 01:41:04 +03:00
int percpu_counter_batch __read_mostly = 32 ;
EXPORT_SYMBOL ( percpu_counter_batch ) ;
2016-11-03 17:50:00 +03:00
static int compute_batch_value ( unsigned int cpu )
2009-01-07 01:41:04 +03:00
{
int nr = num_online_cpus ( ) ;
percpu_counter_batch = max ( 32 , nr * 2 ) ;
2016-11-03 17:50:00 +03:00
return 0 ;
2009-01-07 01:41:04 +03:00
}
2016-11-03 17:50:00 +03:00
static int percpu_counter_cpu_dead ( unsigned int cpu )
2007-07-16 10:39:51 +04:00
{
2009-01-07 01:41:04 +03:00
# ifdef CONFIG_HOTPLUG_CPU
2007-07-16 10:39:51 +04:00
struct percpu_counter * fbc ;
2016-11-03 17:50:00 +03:00
compute_batch_value ( cpu ) ;
2007-07-16 10:39:51 +04:00
2014-09-08 04:51:29 +04:00
spin_lock_irq ( & percpu_counters_lock ) ;
2007-07-16 10:39:51 +04:00
list_for_each_entry ( fbc , & percpu_counters , list ) {
s32 * pcount ;
2017-01-20 17:34:22 +03:00
raw_spin_lock ( & fbc - > lock ) ;
2007-07-16 10:39:51 +04:00
pcount = per_cpu_ptr ( fbc - > counters , cpu ) ;
fbc - > count + = * pcount ;
* pcount = 0 ;
2017-01-20 17:34:22 +03:00
raw_spin_unlock ( & fbc - > lock ) ;
2007-07-16 10:39:51 +04:00
}
2014-09-08 04:51:29 +04:00
spin_unlock_irq ( & percpu_counters_lock ) ;
2009-01-07 01:41:04 +03:00
# endif
2016-11-03 17:50:00 +03:00
return 0 ;
2007-07-16 10:39:51 +04:00
}
2010-08-10 04:19:04 +04:00
/*
* Compare counter against given value .
* Return 1 if greater , 0 if equal and - 1 if less
*/
2015-05-29 00:39:34 +03:00
int __percpu_counter_compare ( struct percpu_counter * fbc , s64 rhs , s32 batch )
2010-08-10 04:19:04 +04:00
{
s64 count ;
count = percpu_counter_read ( fbc ) ;
/* Check to see if rough count will be sufficient for comparison */
2015-05-29 00:39:34 +03:00
if ( abs ( count - rhs ) > ( batch * num_online_cpus ( ) ) ) {
2010-08-10 04:19:04 +04:00
if ( count > rhs )
return 1 ;
else
return - 1 ;
}
/* Need to use precise count */
count = percpu_counter_sum ( fbc ) ;
if ( count > rhs )
return 1 ;
else if ( count < rhs )
return - 1 ;
else
return 0 ;
}
2015-05-29 00:39:34 +03:00
EXPORT_SYMBOL ( __percpu_counter_compare ) ;
2010-08-10 04:19:04 +04:00
2007-07-16 10:39:51 +04:00
static int __init percpu_counter_startup ( void )
{
2016-11-03 17:50:00 +03:00
int ret ;
ret = cpuhp_setup_state ( CPUHP_AP_ONLINE_DYN , " lib/percpu_cnt:online " ,
compute_batch_value , NULL ) ;
WARN_ON ( ret < 0 ) ;
ret = cpuhp_setup_state_nocalls ( CPUHP_PERCPU_CNT_DEAD ,
" lib/percpu_cnt:dead " , NULL ,
percpu_counter_cpu_dead ) ;
WARN_ON ( ret < 0 ) ;
2007-07-16 10:39:51 +04:00
return 0 ;
}
module_init ( percpu_counter_startup ) ;