2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
# include "alloc.h"
# include "btree_iter.h"
# include "buckets.h"
# include "clock.h"
# include "disk_groups.h"
# include "extents.h"
# include "io.h"
# include "move.h"
# include "rebalance.h"
# include "super-io.h"
# include "trace.h"
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/sched/cputime.h>
static inline bool rebalance_ptr_pred ( struct bch_fs * c ,
const struct bch_extent_ptr * ptr ,
struct bch_extent_crc_unpacked crc ,
struct bch_io_opts * io_opts )
{
if ( io_opts - > background_target & &
! bch2_dev_in_target ( c , ptr - > dev , io_opts - > background_target ) & &
! ptr - > cached )
return true ;
if ( io_opts - > background_compression & &
crc . compression_type ! =
bch2_compression_opt_to_type [ io_opts - > background_compression ] )
return true ;
return false ;
}
void bch2_rebalance_add_key ( struct bch_fs * c ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts )
{
const struct bch_extent_ptr * ptr ;
struct bch_extent_crc_unpacked crc ;
struct bkey_s_c_extent e ;
if ( ! bkey_extent_is_data ( k . k ) )
return ;
if ( ! io_opts - > background_target & &
! io_opts - > background_compression )
return ;
e = bkey_s_c_to_extent ( k ) ;
extent_for_each_ptr_crc ( e , ptr , crc )
if ( rebalance_ptr_pred ( c , ptr , crc , io_opts ) ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
if ( atomic64_add_return ( crc . compressed_size ,
& ca - > rebalance_work ) = =
crc . compressed_size )
rebalance_wakeup ( c ) ;
}
}
void bch2_rebalance_add_work ( struct bch_fs * c , u64 sectors )
{
if ( atomic64_add_return ( sectors , & c - > rebalance . work_unknown_dev ) = =
sectors )
rebalance_wakeup ( c ) ;
}
static enum data_cmd rebalance_pred ( struct bch_fs * c , void * arg ,
enum bkey_type type ,
struct bkey_s_c_extent e ,
struct bch_io_opts * io_opts ,
struct data_opts * data_opts )
{
const struct bch_extent_ptr * ptr ;
struct bch_extent_crc_unpacked crc ;
/* Make sure we have room to add a new pointer: */
if ( bkey_val_u64s ( e . k ) + BKEY_EXTENT_PTR_U64s_MAX >
BKEY_EXTENT_VAL_U64s_MAX )
return DATA_SKIP ;
extent_for_each_ptr_crc ( e , ptr , crc )
if ( rebalance_ptr_pred ( c , ptr , crc , io_opts ) )
goto found ;
return DATA_SKIP ;
found :
data_opts - > target = io_opts - > background_target ;
data_opts - > btree_insert_flags = 0 ;
return DATA_ADD_REPLICAS ;
}
struct rebalance_work {
int dev_most_full_idx ;
unsigned dev_most_full_percent ;
u64 dev_most_full_work ;
u64 dev_most_full_capacity ;
u64 total_work ;
} ;
static void rebalance_work_accumulate ( struct rebalance_work * w ,
u64 dev_work , u64 unknown_dev , u64 capacity , int idx )
{
unsigned percent_full ;
u64 work = dev_work + unknown_dev ;
if ( work < dev_work | | work < unknown_dev )
work = U64_MAX ;
work = min ( work , capacity ) ;
2018-09-07 00:09:07 +03:00
percent_full = div64_u64 ( work * 100 , capacity ) ;
2017-03-17 09:18:50 +03:00
if ( percent_full > = w - > dev_most_full_percent ) {
w - > dev_most_full_idx = idx ;
w - > dev_most_full_percent = percent_full ;
w - > dev_most_full_work = work ;
w - > dev_most_full_capacity = capacity ;
}
if ( w - > total_work + dev_work > = w - > total_work & &
w - > total_work + dev_work > = dev_work )
w - > total_work + = dev_work ;
}
static struct rebalance_work rebalance_work ( struct bch_fs * c )
{
struct bch_dev * ca ;
struct rebalance_work ret = { . dev_most_full_idx = - 1 } ;
u64 unknown_dev = atomic64_read ( & c - > rebalance . work_unknown_dev ) ;
unsigned i ;
for_each_online_member ( ca , c , i )
rebalance_work_accumulate ( & ret ,
atomic64_read ( & ca - > rebalance_work ) ,
unknown_dev ,
bucket_to_sector ( ca , ca - > mi . nbuckets -
ca - > mi . first_bucket ) ,
i ) ;
rebalance_work_accumulate ( & ret ,
unknown_dev , 0 , c - > capacity , - 1 ) ;
return ret ;
}
static void rebalance_work_reset ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
for_each_online_member ( ca , c , i )
atomic64_set ( & ca - > rebalance_work , 0 ) ;
atomic64_set ( & c - > rebalance . work_unknown_dev , 0 ) ;
}
static unsigned long curr_cputime ( void )
{
u64 utime , stime ;
task_cputime_adjusted ( current , & utime , & stime ) ;
return nsecs_to_jiffies ( utime + stime ) ;
}
static int bch2_rebalance_thread ( void * arg )
{
struct bch_fs * c = arg ;
struct bch_fs_rebalance * r = & c - > rebalance ;
struct io_clock * clock = & c - > io_clock [ WRITE ] ;
struct rebalance_work w , p ;
unsigned long start , prev_start ;
unsigned long prev_run_time , prev_run_cputime ;
unsigned long cputime , prev_cputime ;
unsigned long io_start ;
long throttle ;
set_freezable ( ) ;
io_start = atomic_long_read ( & clock - > now ) ;
p = rebalance_work ( c ) ;
prev_start = jiffies ;
prev_cputime = curr_cputime ( ) ;
while ( ! kthread_wait_freezable ( r - > enabled ) ) {
start = jiffies ;
cputime = curr_cputime ( ) ;
prev_run_time = start - prev_start ;
prev_run_cputime = cputime - prev_cputime ;
w = rebalance_work ( c ) ;
BUG_ON ( ! w . dev_most_full_capacity ) ;
if ( ! w . total_work ) {
r - > state = REBALANCE_WAITING ;
kthread_wait_freezable ( rebalance_work ( c ) . total_work ) ;
continue ;
}
/*
* If there isn ' t much work to do , throttle cpu usage :
*/
throttle = prev_run_cputime * 100 /
max ( 1U , w . dev_most_full_percent ) -
prev_run_time ;
if ( w . dev_most_full_percent < 20 & & throttle > 0 ) {
r - > state = REBALANCE_THROTTLED ;
r - > throttled_until_iotime = io_start +
div_u64 ( w . dev_most_full_capacity *
( 20 - w . dev_most_full_percent ) ,
50 ) ;
r - > throttled_until_cputime = start + throttle ;
bch2_kthread_io_clock_wait ( clock ,
r - > throttled_until_iotime ,
throttle ) ;
continue ;
}
/* minimum 1 mb/sec: */
r - > pd . rate . rate =
max_t ( u64 , 1 < < 11 ,
r - > pd . rate . rate *
max ( p . dev_most_full_percent , 1U ) /
max ( w . dev_most_full_percent , 1U ) ) ;
io_start = atomic_long_read ( & clock - > now ) ;
p = w ;
prev_start = start ;
prev_cputime = cputime ;
r - > state = REBALANCE_RUNNING ;
memset ( & r - > move_stats , 0 , sizeof ( r - > move_stats ) ) ;
rebalance_work_reset ( c ) ;
bch2_move_data ( c ,
/* ratelimiting disabled for now */
NULL , /* &r->pd.rate, */
writepoint_ptr ( & c - > rebalance_write_point ) ,
POS_MIN , POS_MAX ,
rebalance_pred , NULL ,
& r - > move_stats ) ;
}
return 0 ;
}
ssize_t bch2_rebalance_work_show ( struct bch_fs * c , char * buf )
{
char * out = buf , * end = out + PAGE_SIZE ;
struct bch_fs_rebalance * r = & c - > rebalance ;
struct rebalance_work w = rebalance_work ( c ) ;
char h1 [ 21 ] , h2 [ 21 ] ;
bch2_hprint ( h1 , w . dev_most_full_work < < 9 ) ;
bch2_hprint ( h2 , w . dev_most_full_capacity < < 9 ) ;
out + = scnprintf ( out , end - out ,
" fullest_dev (%i): \t %s/%s \n " ,
w . dev_most_full_idx , h1 , h2 ) ;
bch2_hprint ( h1 , w . total_work < < 9 ) ;
bch2_hprint ( h2 , c - > capacity < < 9 ) ;
out + = scnprintf ( out , end - out ,
" total work: \t \t %s/%s \n " ,
h1 , h2 ) ;
out + = scnprintf ( out , end - out ,
" rate: \t \t \t %u \n " ,
r - > pd . rate . rate ) ;
switch ( r - > state ) {
case REBALANCE_WAITING :
out + = scnprintf ( out , end - out , " waiting \n " ) ;
break ;
case REBALANCE_THROTTLED :
bch2_hprint ( h1 ,
( r - > throttled_until_iotime -
atomic_long_read ( & c - > io_clock [ WRITE ] . now ) ) < < 9 ) ;
out + = scnprintf ( out , end - out ,
" throttled for %lu sec or %s io \n " ,
( r - > throttled_until_cputime - jiffies ) / HZ ,
h1 ) ;
break ;
case REBALANCE_RUNNING :
out + = scnprintf ( out , end - out , " running \n " ) ;
out + = scnprintf ( out , end - out , " pos %llu:%llu \n " ,
r - > move_stats . iter . pos . inode ,
r - > move_stats . iter . pos . offset ) ;
break ;
}
return out - buf ;
}
void bch2_rebalance_stop ( struct bch_fs * c )
{
struct task_struct * p ;
c - > rebalance . pd . rate . rate = UINT_MAX ;
bch2_ratelimit_reset ( & c - > rebalance . pd . rate ) ;
p = rcu_dereference_protected ( c - > rebalance . thread , 1 ) ;
c - > rebalance . thread = NULL ;
if ( p ) {
/* for sychronizing with rebalance_wakeup() */
synchronize_rcu ( ) ;
kthread_stop ( p ) ;
put_task_struct ( p ) ;
}
}
int bch2_rebalance_start ( struct bch_fs * c )
{
struct task_struct * p ;
if ( c - > opts . nochanges )
return 0 ;
p = kthread_create ( bch2_rebalance_thread , c , " bch_rebalance " ) ;
if ( IS_ERR ( p ) )
return PTR_ERR ( p ) ;
get_task_struct ( p ) ;
rcu_assign_pointer ( c - > rebalance . thread , p ) ;
wake_up_process ( p ) ;
return 0 ;
}
void bch2_fs_rebalance_init ( struct bch_fs * c )
{
bch2_pd_controller_init ( & c - > rebalance . pd ) ;
atomic64_set ( & c - > rebalance . work_unknown_dev , S64_MAX ) ;
}