2016-09-17 17:38:44 +03:00
/*
* Copyright ( C ) 2016 Facebook
* Copyright ( C ) 2013 - 2014 Jens Axboe
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < https : //www.gnu.org/licenses/>.
*/
2017-02-03 11:57:00 +03:00
# include <linux/sched.h>
2016-09-17 11:28:25 +03:00
# include <linux/random.h>
2016-09-17 17:38:44 +03:00
# include <linux/sbitmap.h>
2017-01-26 01:32:13 +03:00
# include <linux/seq_file.h>
2016-09-17 17:38:44 +03:00
int sbitmap_init_node ( struct sbitmap * sb , unsigned int depth , int shift ,
gfp_t flags , int node )
{
unsigned int bits_per_word ;
unsigned int i ;
if ( shift < 0 ) {
shift = ilog2 ( BITS_PER_LONG ) ;
/*
* If the bitmap is small , shrink the number of bits per word so
* we spread over a few cachelines , at least . If less than 4
* bits , just forget about it , it ' s not going to work optimally
* anyway .
*/
if ( depth > = 4 ) {
while ( ( 4U < < shift ) > depth )
shift - - ;
}
}
bits_per_word = 1U < < shift ;
if ( bits_per_word > BITS_PER_LONG )
return - EINVAL ;
sb - > shift = shift ;
sb - > depth = depth ;
sb - > map_nr = DIV_ROUND_UP ( sb - > depth , bits_per_word ) ;
if ( depth = = 0 ) {
sb - > map = NULL ;
return 0 ;
}
sb - > map = kzalloc_node ( sb - > map_nr * sizeof ( * sb - > map ) , flags , node ) ;
if ( ! sb - > map )
return - ENOMEM ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
sb - > map [ i ] . depth = min ( depth , bits_per_word ) ;
depth - = sb - > map [ i ] . depth ;
}
return 0 ;
}
EXPORT_SYMBOL_GPL ( sbitmap_init_node ) ;
void sbitmap_resize ( struct sbitmap * sb , unsigned int depth )
{
unsigned int bits_per_word = 1U < < sb - > shift ;
unsigned int i ;
sb - > depth = depth ;
sb - > map_nr = DIV_ROUND_UP ( sb - > depth , bits_per_word ) ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
sb - > map [ i ] . depth = min ( depth , bits_per_word ) ;
depth - = sb - > map [ i ] . depth ;
}
}
EXPORT_SYMBOL_GPL ( sbitmap_resize ) ;
2017-04-14 10:59:58 +03:00
static int __sbitmap_get_word ( unsigned long * word , unsigned long depth ,
unsigned int hint , bool wrap )
2016-09-17 17:38:44 +03:00
{
unsigned int orig_hint = hint ;
int nr ;
while ( 1 ) {
2017-04-14 10:59:58 +03:00
nr = find_next_zero_bit ( word , depth , hint ) ;
if ( unlikely ( nr > = depth ) ) {
2016-09-17 17:38:44 +03:00
/*
* We started with an offset , and we didn ' t reset the
* offset to 0 in a failure case , so start from 0 to
* exhaust the map .
*/
if ( orig_hint & & hint & & wrap ) {
hint = orig_hint = 0 ;
continue ;
}
return - 1 ;
}
2018-02-28 03:56:43 +03:00
if ( ! test_and_set_bit_lock ( nr , word ) )
2016-09-17 17:38:44 +03:00
break ;
hint = nr + 1 ;
2017-04-14 10:59:58 +03:00
if ( hint > = depth - 1 )
2016-09-17 17:38:44 +03:00
hint = 0 ;
}
return nr ;
}
int sbitmap_get ( struct sbitmap * sb , unsigned int alloc_hint , bool round_robin )
{
unsigned int i , index ;
int nr = - 1 ;
index = SB_NR_TO_INDEX ( sb , alloc_hint ) ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
2017-04-14 10:59:58 +03:00
nr = __sbitmap_get_word ( & sb - > map [ index ] . word ,
sb - > map [ index ] . depth ,
2016-09-17 17:38:44 +03:00
SB_NR_TO_BIT ( sb , alloc_hint ) ,
! round_robin ) ;
if ( nr ! = - 1 ) {
nr + = index < < sb - > shift ;
break ;
}
/* Jump to next index. */
index + + ;
alloc_hint = index < < sb - > shift ;
if ( index > = sb - > map_nr ) {
index = 0 ;
alloc_hint = 0 ;
}
}
return nr ;
}
EXPORT_SYMBOL_GPL ( sbitmap_get ) ;
2017-04-14 10:59:58 +03:00
int sbitmap_get_shallow ( struct sbitmap * sb , unsigned int alloc_hint ,
unsigned long shallow_depth )
{
unsigned int i , index ;
int nr = - 1 ;
index = SB_NR_TO_INDEX ( sb , alloc_hint ) ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
nr = __sbitmap_get_word ( & sb - > map [ index ] . word ,
min ( sb - > map [ index ] . depth , shallow_depth ) ,
SB_NR_TO_BIT ( sb , alloc_hint ) , true ) ;
if ( nr ! = - 1 ) {
nr + = index < < sb - > shift ;
break ;
}
/* Jump to next index. */
index + + ;
alloc_hint = index < < sb - > shift ;
if ( index > = sb - > map_nr ) {
index = 0 ;
alloc_hint = 0 ;
}
}
return nr ;
}
EXPORT_SYMBOL_GPL ( sbitmap_get_shallow ) ;
2016-09-17 17:38:44 +03:00
bool sbitmap_any_bit_set ( const struct sbitmap * sb )
{
unsigned int i ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
if ( sb - > map [ i ] . word )
return true ;
}
return false ;
}
EXPORT_SYMBOL_GPL ( sbitmap_any_bit_set ) ;
bool sbitmap_any_bit_clear ( const struct sbitmap * sb )
{
unsigned int i ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
const struct sbitmap_word * word = & sb - > map [ i ] ;
unsigned long ret ;
ret = find_first_zero_bit ( & word - > word , word - > depth ) ;
if ( ret < word - > depth )
return true ;
}
return false ;
}
EXPORT_SYMBOL_GPL ( sbitmap_any_bit_clear ) ;
unsigned int sbitmap_weight ( const struct sbitmap * sb )
{
2016-09-19 16:34:08 +03:00
unsigned int i , weight = 0 ;
2016-09-17 17:38:44 +03:00
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
const struct sbitmap_word * word = & sb - > map [ i ] ;
weight + = bitmap_weight ( & word - > word , word - > depth ) ;
}
return weight ;
}
EXPORT_SYMBOL_GPL ( sbitmap_weight ) ;
2017-01-26 01:32:13 +03:00
void sbitmap_show ( struct sbitmap * sb , struct seq_file * m )
{
seq_printf ( m , " depth=%u \n " , sb - > depth ) ;
seq_printf ( m , " busy=%u \n " , sbitmap_weight ( sb ) ) ;
seq_printf ( m , " bits_per_word=%u \n " , 1U < < sb - > shift ) ;
seq_printf ( m , " map_nr=%u \n " , sb - > map_nr ) ;
}
EXPORT_SYMBOL_GPL ( sbitmap_show ) ;
static inline void emit_byte ( struct seq_file * m , unsigned int offset , u8 byte )
{
if ( ( offset & 0xf ) = = 0 ) {
if ( offset ! = 0 )
seq_putc ( m , ' \n ' ) ;
seq_printf ( m , " %08x: " , offset ) ;
}
if ( ( offset & 0x1 ) = = 0 )
seq_putc ( m , ' ' ) ;
seq_printf ( m , " %02x " , byte ) ;
}
void sbitmap_bitmap_show ( struct sbitmap * sb , struct seq_file * m )
{
u8 byte = 0 ;
unsigned int byte_bits = 0 ;
unsigned int offset = 0 ;
int i ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
unsigned long word = READ_ONCE ( sb - > map [ i ] . word ) ;
unsigned int word_bits = READ_ONCE ( sb - > map [ i ] . depth ) ;
while ( word_bits > 0 ) {
unsigned int bits = min ( 8 - byte_bits , word_bits ) ;
byte | = ( word & ( BIT ( bits ) - 1 ) ) < < byte_bits ;
byte_bits + = bits ;
if ( byte_bits = = 8 ) {
emit_byte ( m , offset , byte ) ;
byte = 0 ;
byte_bits = 0 ;
offset + + ;
}
word > > = bits ;
word_bits - = bits ;
}
}
if ( byte_bits ) {
emit_byte ( m , offset , byte ) ;
offset + + ;
}
if ( offset )
seq_putc ( m , ' \n ' ) ;
}
EXPORT_SYMBOL_GPL ( sbitmap_bitmap_show ) ;
2018-05-10 03:16:31 +03:00
static unsigned int sbq_calc_wake_batch ( struct sbitmap_queue * sbq ,
unsigned int depth )
2016-09-17 17:38:44 +03:00
{
unsigned int wake_batch ;
2018-05-10 03:16:31 +03:00
unsigned int shallow_depth ;
2016-09-17 17:38:44 +03:00
/*
* For each batch , we wake up one queue . We need to make sure that our
2018-05-10 03:16:31 +03:00
* batch size is small enough that the full depth of the bitmap ,
* potentially limited by a shallow depth , is enough to wake up all of
* the queues .
*
* Each full word of the bitmap has bits_per_word bits , and there might
* be a partial word . There are depth / bits_per_word full words and
* depth % bits_per_word bits left over . In bitwise arithmetic :
*
* bits_per_word = 1 < < shift
* depth / bits_per_word = depth > > shift
* depth % bits_per_word = depth & ( ( 1 < < shift ) - 1 )
*
* Each word can be limited to sbq - > min_shallow_depth bits .
2016-09-17 17:38:44 +03:00
*/
2018-05-10 03:16:31 +03:00
shallow_depth = min ( 1U < < sbq - > sb . shift , sbq - > min_shallow_depth ) ;
depth = ( ( depth > > sbq - > sb . shift ) * shallow_depth +
min ( depth & ( ( 1U < < sbq - > sb . shift ) - 1 ) , shallow_depth ) ) ;
wake_batch = clamp_t ( unsigned int , depth / SBQ_WAIT_QUEUES , 1 ,
SBQ_WAKE_BATCH ) ;
2016-09-17 17:38:44 +03:00
return wake_batch ;
}
int sbitmap_queue_init_node ( struct sbitmap_queue * sbq , unsigned int depth ,
2016-09-17 11:28:24 +03:00
int shift , bool round_robin , gfp_t flags , int node )
2016-09-17 17:38:44 +03:00
{
int ret ;
int i ;
ret = sbitmap_init_node ( & sbq - > sb , depth , shift , flags , node ) ;
if ( ret )
return ret ;
2016-09-17 11:28:23 +03:00
sbq - > alloc_hint = alloc_percpu_gfp ( unsigned int , flags ) ;
if ( ! sbq - > alloc_hint ) {
sbitmap_free ( & sbq - > sb ) ;
return - ENOMEM ;
}
2016-09-17 11:28:25 +03:00
if ( depth & & ! round_robin ) {
for_each_possible_cpu ( i )
* per_cpu_ptr ( sbq - > alloc_hint , i ) = prandom_u32 ( ) % depth ;
}
2018-05-10 03:16:31 +03:00
sbq - > min_shallow_depth = UINT_MAX ;
sbq - > wake_batch = sbq_calc_wake_batch ( sbq , depth ) ;
2016-09-17 17:38:44 +03:00
atomic_set ( & sbq - > wake_index , 0 ) ;
2016-09-17 11:28:22 +03:00
sbq - > ws = kzalloc_node ( SBQ_WAIT_QUEUES * sizeof ( * sbq - > ws ) , flags , node ) ;
2016-09-17 17:38:44 +03:00
if ( ! sbq - > ws ) {
2016-09-17 11:28:23 +03:00
free_percpu ( sbq - > alloc_hint ) ;
2016-09-17 17:38:44 +03:00
sbitmap_free ( & sbq - > sb ) ;
return - ENOMEM ;
}
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
init_waitqueue_head ( & sbq - > ws [ i ] . wait ) ;
atomic_set ( & sbq - > ws [ i ] . wait_cnt , sbq - > wake_batch ) ;
}
2016-09-17 11:28:24 +03:00
sbq - > round_robin = round_robin ;
2016-09-17 17:38:44 +03:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_init_node ) ;
2018-05-10 03:16:31 +03:00
static void sbitmap_queue_update_wake_batch ( struct sbitmap_queue * sbq ,
unsigned int depth )
2016-09-17 17:38:44 +03:00
{
2018-05-10 03:16:31 +03:00
unsigned int wake_batch = sbq_calc_wake_batch ( sbq , depth ) ;
2017-01-18 22:55:22 +03:00
int i ;
if ( sbq - > wake_batch ! = wake_batch ) {
WRITE_ONCE ( sbq - > wake_batch , wake_batch ) ;
/*
* Pairs with the memory barrier in sbq_wake_up ( ) to ensure that
* the batch size is updated before the wait counts .
*/
smp_mb__before_atomic ( ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + )
atomic_set ( & sbq - > ws [ i ] . wait_cnt , 1 ) ;
}
2018-05-10 03:16:31 +03:00
}
void sbitmap_queue_resize ( struct sbitmap_queue * sbq , unsigned int depth )
{
sbitmap_queue_update_wake_batch ( sbq , depth ) ;
2016-09-17 17:38:44 +03:00
sbitmap_resize ( & sbq - > sb , depth ) ;
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_resize ) ;
2016-09-17 11:28:24 +03:00
int __sbitmap_queue_get ( struct sbitmap_queue * sbq )
2016-09-17 11:28:23 +03:00
{
2016-09-17 11:28:26 +03:00
unsigned int hint , depth ;
2016-09-17 11:28:23 +03:00
int nr ;
hint = this_cpu_read ( * sbq - > alloc_hint ) ;
2016-09-17 11:28:26 +03:00
depth = READ_ONCE ( sbq - > sb . depth ) ;
if ( unlikely ( hint > = depth ) ) {
hint = depth ? prandom_u32 ( ) % depth : 0 ;
this_cpu_write ( * sbq - > alloc_hint , hint ) ;
}
2016-09-17 11:28:24 +03:00
nr = sbitmap_get ( & sbq - > sb , hint , sbq - > round_robin ) ;
2016-09-17 11:28:23 +03:00
if ( nr = = - 1 ) {
/* If the map is full, a hint won't do us much good. */
this_cpu_write ( * sbq - > alloc_hint , 0 ) ;
2016-09-17 11:28:24 +03:00
} else if ( nr = = hint | | unlikely ( sbq - > round_robin ) ) {
2016-09-17 11:28:23 +03:00
/* Only update the hint if we used it. */
hint = nr + 1 ;
2016-09-17 11:28:26 +03:00
if ( hint > = depth - 1 )
2016-09-17 11:28:23 +03:00
hint = 0 ;
this_cpu_write ( * sbq - > alloc_hint , hint ) ;
}
return nr ;
}
EXPORT_SYMBOL_GPL ( __sbitmap_queue_get ) ;
2017-04-14 10:59:58 +03:00
int __sbitmap_queue_get_shallow ( struct sbitmap_queue * sbq ,
unsigned int shallow_depth )
{
unsigned int hint , depth ;
int nr ;
2018-05-10 03:29:24 +03:00
WARN_ON_ONCE ( shallow_depth < sbq - > min_shallow_depth ) ;
2017-04-14 10:59:58 +03:00
hint = this_cpu_read ( * sbq - > alloc_hint ) ;
depth = READ_ONCE ( sbq - > sb . depth ) ;
if ( unlikely ( hint > = depth ) ) {
hint = depth ? prandom_u32 ( ) % depth : 0 ;
this_cpu_write ( * sbq - > alloc_hint , hint ) ;
}
nr = sbitmap_get_shallow ( & sbq - > sb , hint , shallow_depth ) ;
if ( nr = = - 1 ) {
/* If the map is full, a hint won't do us much good. */
this_cpu_write ( * sbq - > alloc_hint , 0 ) ;
} else if ( nr = = hint | | unlikely ( sbq - > round_robin ) ) {
/* Only update the hint if we used it. */
hint = nr + 1 ;
if ( hint > = depth - 1 )
hint = 0 ;
this_cpu_write ( * sbq - > alloc_hint , hint ) ;
}
return nr ;
}
EXPORT_SYMBOL_GPL ( __sbitmap_queue_get_shallow ) ;
2018-05-10 03:16:31 +03:00
void sbitmap_queue_min_shallow_depth ( struct sbitmap_queue * sbq ,
unsigned int min_shallow_depth )
{
sbq - > min_shallow_depth = min_shallow_depth ;
sbitmap_queue_update_wake_batch ( sbq , sbq - > sb . depth ) ;
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_min_shallow_depth ) ;
2016-09-17 17:38:44 +03:00
static struct sbq_wait_state * sbq_wake_ptr ( struct sbitmap_queue * sbq )
{
int i , wake_index ;
wake_index = atomic_read ( & sbq - > wake_index ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
struct sbq_wait_state * ws = & sbq - > ws [ wake_index ] ;
if ( waitqueue_active ( & ws - > wait ) ) {
int o = atomic_read ( & sbq - > wake_index ) ;
if ( wake_index ! = o )
atomic_cmpxchg ( & sbq - > wake_index , o , wake_index ) ;
return ws ;
}
wake_index = sbq_index_inc ( wake_index ) ;
}
return NULL ;
}
static void sbq_wake_up ( struct sbitmap_queue * sbq )
{
struct sbq_wait_state * ws ;
2017-01-18 22:55:22 +03:00
unsigned int wake_batch ;
2016-09-17 17:38:44 +03:00
int wait_cnt ;
2017-01-18 22:55:21 +03:00
/*
* Pairs with the memory barrier in set_current_state ( ) to ensure the
* proper ordering of clear_bit ( ) / waitqueue_active ( ) in the waker and
2018-02-28 03:56:43 +03:00
* test_and_set_bit_lock ( ) / prepare_to_wait ( ) / finish_wait ( ) in the
* waiter . See the comment on waitqueue_active ( ) . This is __after_atomic
* because we just did clear_bit_unlock ( ) in the caller .
2017-01-18 22:55:21 +03:00
*/
smp_mb__after_atomic ( ) ;
2016-09-17 17:38:44 +03:00
ws = sbq_wake_ptr ( sbq ) ;
if ( ! ws )
return ;
wait_cnt = atomic_dec_return ( & ws - > wait_cnt ) ;
2017-01-18 22:55:22 +03:00
if ( wait_cnt < = 0 ) {
wake_batch = READ_ONCE ( sbq - > wake_batch ) ;
/*
* Pairs with the memory barrier in sbitmap_queue_resize ( ) to
* ensure that we see the batch size update before the wait
* count is reset .
*/
smp_mb__before_atomic ( ) ;
/*
* If there are concurrent callers to sbq_wake_up ( ) , the last
* one to decrement the wait count below zero will bump it back
* up . If there is a concurrent resize , the count reset will
* either cause the cmpxchg to fail or overwrite after the
* cmpxchg .
*/
atomic_cmpxchg ( & ws - > wait_cnt , wait_cnt , wait_cnt + wake_batch ) ;
2016-09-17 17:38:44 +03:00
sbq_index_atomic_inc ( & sbq - > wake_index ) ;
2017-11-14 20:24:58 +03:00
wake_up_nr ( & ws - > wait , wake_batch ) ;
2016-09-17 17:38:44 +03:00
}
}
2016-09-17 11:28:23 +03:00
void sbitmap_queue_clear ( struct sbitmap_queue * sbq , unsigned int nr ,
2016-09-17 11:28:24 +03:00
unsigned int cpu )
2016-09-17 17:38:44 +03:00
{
2018-02-28 03:56:43 +03:00
sbitmap_clear_bit_unlock ( & sbq - > sb , nr ) ;
2016-09-17 17:38:44 +03:00
sbq_wake_up ( sbq ) ;
2016-09-17 22:20:54 +03:00
if ( likely ( ! sbq - > round_robin & & nr < sbq - > sb . depth ) )
2016-09-17 11:28:23 +03:00
* per_cpu_ptr ( sbq - > alloc_hint , cpu ) = nr ;
2016-09-17 17:38:44 +03:00
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_clear ) ;
void sbitmap_queue_wake_all ( struct sbitmap_queue * sbq )
{
int i , wake_index ;
/*
2017-01-18 22:55:21 +03:00
* Pairs with the memory barrier in set_current_state ( ) like in
* sbq_wake_up ( ) .
2016-09-17 17:38:44 +03:00
*/
smp_mb ( ) ;
wake_index = atomic_read ( & sbq - > wake_index ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
struct sbq_wait_state * ws = & sbq - > ws [ wake_index ] ;
if ( waitqueue_active ( & ws - > wait ) )
wake_up ( & ws - > wait ) ;
wake_index = sbq_index_inc ( wake_index ) ;
}
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_wake_all ) ;
2017-01-26 01:32:13 +03:00
void sbitmap_queue_show ( struct sbitmap_queue * sbq , struct seq_file * m )
{
bool first ;
int i ;
sbitmap_show ( & sbq - > sb , m ) ;
seq_puts ( m , " alloc_hint={ " ) ;
first = true ;
for_each_possible_cpu ( i ) {
if ( ! first )
seq_puts ( m , " , " ) ;
first = false ;
seq_printf ( m , " %u " , * per_cpu_ptr ( sbq - > alloc_hint , i ) ) ;
}
seq_puts ( m , " } \n " ) ;
seq_printf ( m , " wake_batch=%u \n " , sbq - > wake_batch ) ;
seq_printf ( m , " wake_index=%d \n " , atomic_read ( & sbq - > wake_index ) ) ;
seq_puts ( m , " ws={ \n " ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
struct sbq_wait_state * ws = & sbq - > ws [ i ] ;
seq_printf ( m , " \t {.wait_cnt=%d, .wait=%s}, \n " ,
atomic_read ( & ws - > wait_cnt ) ,
waitqueue_active ( & ws - > wait ) ? " active " : " inactive " ) ;
}
seq_puts ( m , " } \n " ) ;
seq_printf ( m , " round_robin=%d \n " , sbq - > round_robin ) ;
2018-05-10 03:16:31 +03:00
seq_printf ( m , " min_shallow_depth=%u \n " , sbq - > min_shallow_depth ) ;
2017-01-26 01:32:13 +03:00
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_show ) ;