2016-09-17 17:38:44 +03:00
/*
* Copyright ( C ) 2016 Facebook
* Copyright ( C ) 2013 - 2014 Jens Axboe
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < https : //www.gnu.org/licenses/>.
*/
2016-09-17 11:28:25 +03:00
# include <linux/random.h>
2016-09-17 17:38:44 +03:00
# include <linux/sbitmap.h>
int sbitmap_init_node ( struct sbitmap * sb , unsigned int depth , int shift ,
gfp_t flags , int node )
{
unsigned int bits_per_word ;
unsigned int i ;
if ( shift < 0 ) {
shift = ilog2 ( BITS_PER_LONG ) ;
/*
* If the bitmap is small , shrink the number of bits per word so
* we spread over a few cachelines , at least . If less than 4
* bits , just forget about it , it ' s not going to work optimally
* anyway .
*/
if ( depth > = 4 ) {
while ( ( 4U < < shift ) > depth )
shift - - ;
}
}
bits_per_word = 1U < < shift ;
if ( bits_per_word > BITS_PER_LONG )
return - EINVAL ;
sb - > shift = shift ;
sb - > depth = depth ;
sb - > map_nr = DIV_ROUND_UP ( sb - > depth , bits_per_word ) ;
if ( depth = = 0 ) {
sb - > map = NULL ;
return 0 ;
}
sb - > map = kzalloc_node ( sb - > map_nr * sizeof ( * sb - > map ) , flags , node ) ;
if ( ! sb - > map )
return - ENOMEM ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
sb - > map [ i ] . depth = min ( depth , bits_per_word ) ;
depth - = sb - > map [ i ] . depth ;
}
return 0 ;
}
EXPORT_SYMBOL_GPL ( sbitmap_init_node ) ;
void sbitmap_resize ( struct sbitmap * sb , unsigned int depth )
{
unsigned int bits_per_word = 1U < < sb - > shift ;
unsigned int i ;
sb - > depth = depth ;
sb - > map_nr = DIV_ROUND_UP ( sb - > depth , bits_per_word ) ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
sb - > map [ i ] . depth = min ( depth , bits_per_word ) ;
depth - = sb - > map [ i ] . depth ;
}
}
EXPORT_SYMBOL_GPL ( sbitmap_resize ) ;
static int __sbitmap_get_word ( struct sbitmap_word * word , unsigned int hint ,
bool wrap )
{
unsigned int orig_hint = hint ;
int nr ;
while ( 1 ) {
nr = find_next_zero_bit ( & word - > word , word - > depth , hint ) ;
if ( unlikely ( nr > = word - > depth ) ) {
/*
* We started with an offset , and we didn ' t reset the
* offset to 0 in a failure case , so start from 0 to
* exhaust the map .
*/
if ( orig_hint & & hint & & wrap ) {
hint = orig_hint = 0 ;
continue ;
}
return - 1 ;
}
if ( ! test_and_set_bit ( nr , & word - > word ) )
break ;
hint = nr + 1 ;
if ( hint > = word - > depth - 1 )
hint = 0 ;
}
return nr ;
}
int sbitmap_get ( struct sbitmap * sb , unsigned int alloc_hint , bool round_robin )
{
unsigned int i , index ;
int nr = - 1 ;
index = SB_NR_TO_INDEX ( sb , alloc_hint ) ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
nr = __sbitmap_get_word ( & sb - > map [ index ] ,
SB_NR_TO_BIT ( sb , alloc_hint ) ,
! round_robin ) ;
if ( nr ! = - 1 ) {
nr + = index < < sb - > shift ;
break ;
}
/* Jump to next index. */
index + + ;
alloc_hint = index < < sb - > shift ;
if ( index > = sb - > map_nr ) {
index = 0 ;
alloc_hint = 0 ;
}
}
return nr ;
}
EXPORT_SYMBOL_GPL ( sbitmap_get ) ;
bool sbitmap_any_bit_set ( const struct sbitmap * sb )
{
unsigned int i ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
if ( sb - > map [ i ] . word )
return true ;
}
return false ;
}
EXPORT_SYMBOL_GPL ( sbitmap_any_bit_set ) ;
bool sbitmap_any_bit_clear ( const struct sbitmap * sb )
{
unsigned int i ;
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
const struct sbitmap_word * word = & sb - > map [ i ] ;
unsigned long ret ;
ret = find_first_zero_bit ( & word - > word , word - > depth ) ;
if ( ret < word - > depth )
return true ;
}
return false ;
}
EXPORT_SYMBOL_GPL ( sbitmap_any_bit_clear ) ;
unsigned int sbitmap_weight ( const struct sbitmap * sb )
{
2016-09-19 16:34:08 +03:00
unsigned int i , weight = 0 ;
2016-09-17 17:38:44 +03:00
for ( i = 0 ; i < sb - > map_nr ; i + + ) {
const struct sbitmap_word * word = & sb - > map [ i ] ;
weight + = bitmap_weight ( & word - > word , word - > depth ) ;
}
return weight ;
}
EXPORT_SYMBOL_GPL ( sbitmap_weight ) ;
static unsigned int sbq_calc_wake_batch ( unsigned int depth )
{
unsigned int wake_batch ;
/*
* For each batch , we wake up one queue . We need to make sure that our
* batch size is small enough that the full depth of the bitmap is
* enough to wake up all of the queues .
*/
wake_batch = SBQ_WAKE_BATCH ;
if ( wake_batch > depth / SBQ_WAIT_QUEUES )
wake_batch = max ( 1U , depth / SBQ_WAIT_QUEUES ) ;
return wake_batch ;
}
int sbitmap_queue_init_node ( struct sbitmap_queue * sbq , unsigned int depth ,
2016-09-17 11:28:24 +03:00
int shift , bool round_robin , gfp_t flags , int node )
2016-09-17 17:38:44 +03:00
{
int ret ;
int i ;
ret = sbitmap_init_node ( & sbq - > sb , depth , shift , flags , node ) ;
if ( ret )
return ret ;
2016-09-17 11:28:23 +03:00
sbq - > alloc_hint = alloc_percpu_gfp ( unsigned int , flags ) ;
if ( ! sbq - > alloc_hint ) {
sbitmap_free ( & sbq - > sb ) ;
return - ENOMEM ;
}
2016-09-17 11:28:25 +03:00
if ( depth & & ! round_robin ) {
for_each_possible_cpu ( i )
* per_cpu_ptr ( sbq - > alloc_hint , i ) = prandom_u32 ( ) % depth ;
}
2016-09-17 17:38:44 +03:00
sbq - > wake_batch = sbq_calc_wake_batch ( depth ) ;
atomic_set ( & sbq - > wake_index , 0 ) ;
2016-09-17 11:28:22 +03:00
sbq - > ws = kzalloc_node ( SBQ_WAIT_QUEUES * sizeof ( * sbq - > ws ) , flags , node ) ;
2016-09-17 17:38:44 +03:00
if ( ! sbq - > ws ) {
2016-09-17 11:28:23 +03:00
free_percpu ( sbq - > alloc_hint ) ;
2016-09-17 17:38:44 +03:00
sbitmap_free ( & sbq - > sb ) ;
return - ENOMEM ;
}
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
init_waitqueue_head ( & sbq - > ws [ i ] . wait ) ;
atomic_set ( & sbq - > ws [ i ] . wait_cnt , sbq - > wake_batch ) ;
}
2016-09-17 11:28:24 +03:00
sbq - > round_robin = round_robin ;
2016-09-17 17:38:44 +03:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_init_node ) ;
void sbitmap_queue_resize ( struct sbitmap_queue * sbq , unsigned int depth )
{
2017-01-18 22:55:22 +03:00
unsigned int wake_batch = sbq_calc_wake_batch ( depth ) ;
int i ;
if ( sbq - > wake_batch ! = wake_batch ) {
WRITE_ONCE ( sbq - > wake_batch , wake_batch ) ;
/*
* Pairs with the memory barrier in sbq_wake_up ( ) to ensure that
* the batch size is updated before the wait counts .
*/
smp_mb__before_atomic ( ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + )
atomic_set ( & sbq - > ws [ i ] . wait_cnt , 1 ) ;
}
2016-09-17 17:38:44 +03:00
sbitmap_resize ( & sbq - > sb , depth ) ;
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_resize ) ;
2016-09-17 11:28:24 +03:00
int __sbitmap_queue_get ( struct sbitmap_queue * sbq )
2016-09-17 11:28:23 +03:00
{
2016-09-17 11:28:26 +03:00
unsigned int hint , depth ;
2016-09-17 11:28:23 +03:00
int nr ;
hint = this_cpu_read ( * sbq - > alloc_hint ) ;
2016-09-17 11:28:26 +03:00
depth = READ_ONCE ( sbq - > sb . depth ) ;
if ( unlikely ( hint > = depth ) ) {
hint = depth ? prandom_u32 ( ) % depth : 0 ;
this_cpu_write ( * sbq - > alloc_hint , hint ) ;
}
2016-09-17 11:28:24 +03:00
nr = sbitmap_get ( & sbq - > sb , hint , sbq - > round_robin ) ;
2016-09-17 11:28:23 +03:00
if ( nr = = - 1 ) {
/* If the map is full, a hint won't do us much good. */
this_cpu_write ( * sbq - > alloc_hint , 0 ) ;
2016-09-17 11:28:24 +03:00
} else if ( nr = = hint | | unlikely ( sbq - > round_robin ) ) {
2016-09-17 11:28:23 +03:00
/* Only update the hint if we used it. */
hint = nr + 1 ;
2016-09-17 11:28:26 +03:00
if ( hint > = depth - 1 )
2016-09-17 11:28:23 +03:00
hint = 0 ;
this_cpu_write ( * sbq - > alloc_hint , hint ) ;
}
return nr ;
}
EXPORT_SYMBOL_GPL ( __sbitmap_queue_get ) ;
2016-09-17 17:38:44 +03:00
static struct sbq_wait_state * sbq_wake_ptr ( struct sbitmap_queue * sbq )
{
int i , wake_index ;
wake_index = atomic_read ( & sbq - > wake_index ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
struct sbq_wait_state * ws = & sbq - > ws [ wake_index ] ;
if ( waitqueue_active ( & ws - > wait ) ) {
int o = atomic_read ( & sbq - > wake_index ) ;
if ( wake_index ! = o )
atomic_cmpxchg ( & sbq - > wake_index , o , wake_index ) ;
return ws ;
}
wake_index = sbq_index_inc ( wake_index ) ;
}
return NULL ;
}
static void sbq_wake_up ( struct sbitmap_queue * sbq )
{
struct sbq_wait_state * ws ;
2017-01-18 22:55:22 +03:00
unsigned int wake_batch ;
2016-09-17 17:38:44 +03:00
int wait_cnt ;
2017-01-18 22:55:21 +03:00
/*
* Pairs with the memory barrier in set_current_state ( ) to ensure the
* proper ordering of clear_bit ( ) / waitqueue_active ( ) in the waker and
* test_and_set_bit ( ) / prepare_to_wait ( ) / finish_wait ( ) in the waiter . See
* the comment on waitqueue_active ( ) . This is __after_atomic because we
* just did clear_bit ( ) in the caller .
*/
smp_mb__after_atomic ( ) ;
2016-09-17 17:38:44 +03:00
ws = sbq_wake_ptr ( sbq ) ;
if ( ! ws )
return ;
wait_cnt = atomic_dec_return ( & ws - > wait_cnt ) ;
2017-01-18 22:55:22 +03:00
if ( wait_cnt < = 0 ) {
wake_batch = READ_ONCE ( sbq - > wake_batch ) ;
/*
* Pairs with the memory barrier in sbitmap_queue_resize ( ) to
* ensure that we see the batch size update before the wait
* count is reset .
*/
smp_mb__before_atomic ( ) ;
/*
* If there are concurrent callers to sbq_wake_up ( ) , the last
* one to decrement the wait count below zero will bump it back
* up . If there is a concurrent resize , the count reset will
* either cause the cmpxchg to fail or overwrite after the
* cmpxchg .
*/
atomic_cmpxchg ( & ws - > wait_cnt , wait_cnt , wait_cnt + wake_batch ) ;
2016-09-17 17:38:44 +03:00
sbq_index_atomic_inc ( & sbq - > wake_index ) ;
wake_up ( & ws - > wait ) ;
}
}
2016-09-17 11:28:23 +03:00
void sbitmap_queue_clear ( struct sbitmap_queue * sbq , unsigned int nr ,
2016-09-17 11:28:24 +03:00
unsigned int cpu )
2016-09-17 17:38:44 +03:00
{
sbitmap_clear_bit ( & sbq - > sb , nr ) ;
sbq_wake_up ( sbq ) ;
2016-09-17 22:20:54 +03:00
if ( likely ( ! sbq - > round_robin & & nr < sbq - > sb . depth ) )
2016-09-17 11:28:23 +03:00
* per_cpu_ptr ( sbq - > alloc_hint , cpu ) = nr ;
2016-09-17 17:38:44 +03:00
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_clear ) ;
void sbitmap_queue_wake_all ( struct sbitmap_queue * sbq )
{
int i , wake_index ;
/*
2017-01-18 22:55:21 +03:00
* Pairs with the memory barrier in set_current_state ( ) like in
* sbq_wake_up ( ) .
2016-09-17 17:38:44 +03:00
*/
smp_mb ( ) ;
wake_index = atomic_read ( & sbq - > wake_index ) ;
for ( i = 0 ; i < SBQ_WAIT_QUEUES ; i + + ) {
struct sbq_wait_state * ws = & sbq - > ws [ wake_index ] ;
if ( waitqueue_active ( & ws - > wait ) )
wake_up ( & ws - > wait ) ;
wake_index = sbq_index_inc ( wake_index ) ;
}
}
EXPORT_SYMBOL_GPL ( sbitmap_queue_wake_all ) ;