2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2018-10-06 00:46:55 -04:00
# include "alloc_foreground.h"
2017-03-16 22:18:50 -08:00
# include "btree_iter.h"
# include "buckets.h"
# include "clock.h"
# include "disk_groups.h"
# include "extents.h"
# include "io.h"
# include "move.h"
# include "rebalance.h"
# include "super-io.h"
# include "trace.h"
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/sched/cputime.h>
static inline bool rebalance_ptr_pred ( struct bch_fs * c ,
2018-09-27 21:08:39 -04:00
struct extent_ptr_decoded p ,
2017-03-16 22:18:50 -08:00
struct bch_io_opts * io_opts )
{
if ( io_opts - > background_target & &
2018-09-27 21:08:39 -04:00
! bch2_dev_in_target ( c , p . ptr . dev , io_opts - > background_target ) & &
! p . ptr . cached )
2017-03-16 22:18:50 -08:00
return true ;
if ( io_opts - > background_compression & &
2018-09-27 21:08:39 -04:00
p . crc . compression_type ! =
2017-03-16 22:18:50 -08:00
bch2_compression_opt_to_type [ io_opts - > background_compression ] )
return true ;
return false ;
}
void bch2_rebalance_add_key ( struct bch_fs * c ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts )
{
2019-07-25 13:52:14 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2018-09-27 21:08:39 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
if ( ! io_opts - > background_target & &
! io_opts - > background_compression )
return ;
2019-07-25 13:52:14 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
2018-09-27 21:08:39 -04:00
if ( rebalance_ptr_pred ( c , p , io_opts ) ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
if ( atomic64_add_return ( p . crc . compressed_size ,
2017-03-16 22:18:50 -08:00
& ca - > rebalance_work ) = =
2018-09-27 21:08:39 -04:00
p . crc . compressed_size )
2017-03-16 22:18:50 -08:00
rebalance_wakeup ( c ) ;
}
}
void bch2_rebalance_add_work ( struct bch_fs * c , u64 sectors )
{
if ( atomic64_add_return ( sectors , & c - > rebalance . work_unknown_dev ) = =
sectors )
rebalance_wakeup ( c ) ;
}
static enum data_cmd rebalance_pred ( struct bch_fs * c , void * arg ,
2018-11-01 15:10:01 -04:00
struct bkey_s_c k ,
2017-03-16 22:18:50 -08:00
struct bch_io_opts * io_opts ,
struct data_opts * data_opts )
{
2019-09-07 12:39:59 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
unsigned nr_replicas = 0 ;
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry ) {
nr_replicas + = ! p . ptr . cached ;
if ( rebalance_ptr_pred ( c , p , io_opts ) )
goto found ;
2018-11-01 15:10:01 -04:00
}
2019-09-07 12:39:59 -04:00
if ( nr_replicas < io_opts - > data_replicas )
goto found ;
return DATA_SKIP ;
found :
data_opts - > target = io_opts - > background_target ;
data_opts - > btree_insert_flags = 0 ;
return DATA_ADD_REPLICAS ;
2017-03-16 22:18:50 -08:00
}
struct rebalance_work {
int dev_most_full_idx ;
unsigned dev_most_full_percent ;
u64 dev_most_full_work ;
u64 dev_most_full_capacity ;
u64 total_work ;
} ;
static void rebalance_work_accumulate ( struct rebalance_work * w ,
u64 dev_work , u64 unknown_dev , u64 capacity , int idx )
{
unsigned percent_full ;
u64 work = dev_work + unknown_dev ;
if ( work < dev_work | | work < unknown_dev )
work = U64_MAX ;
work = min ( work , capacity ) ;
2018-09-06 17:09:07 -04:00
percent_full = div64_u64 ( work * 100 , capacity ) ;
2017-03-16 22:18:50 -08:00
if ( percent_full > = w - > dev_most_full_percent ) {
w - > dev_most_full_idx = idx ;
w - > dev_most_full_percent = percent_full ;
w - > dev_most_full_work = work ;
w - > dev_most_full_capacity = capacity ;
}
if ( w - > total_work + dev_work > = w - > total_work & &
w - > total_work + dev_work > = dev_work )
w - > total_work + = dev_work ;
}
static struct rebalance_work rebalance_work ( struct bch_fs * c )
{
struct bch_dev * ca ;
struct rebalance_work ret = { . dev_most_full_idx = - 1 } ;
u64 unknown_dev = atomic64_read ( & c - > rebalance . work_unknown_dev ) ;
unsigned i ;
for_each_online_member ( ca , c , i )
rebalance_work_accumulate ( & ret ,
atomic64_read ( & ca - > rebalance_work ) ,
unknown_dev ,
bucket_to_sector ( ca , ca - > mi . nbuckets -
ca - > mi . first_bucket ) ,
i ) ;
rebalance_work_accumulate ( & ret ,
unknown_dev , 0 , c - > capacity , - 1 ) ;
return ret ;
}
static void rebalance_work_reset ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
for_each_online_member ( ca , c , i )
atomic64_set ( & ca - > rebalance_work , 0 ) ;
atomic64_set ( & c - > rebalance . work_unknown_dev , 0 ) ;
}
static unsigned long curr_cputime ( void )
{
u64 utime , stime ;
task_cputime_adjusted ( current , & utime , & stime ) ;
return nsecs_to_jiffies ( utime + stime ) ;
}
static int bch2_rebalance_thread ( void * arg )
{
struct bch_fs * c = arg ;
struct bch_fs_rebalance * r = & c - > rebalance ;
struct io_clock * clock = & c - > io_clock [ WRITE ] ;
struct rebalance_work w , p ;
unsigned long start , prev_start ;
unsigned long prev_run_time , prev_run_cputime ;
unsigned long cputime , prev_cputime ;
unsigned long io_start ;
long throttle ;
set_freezable ( ) ;
io_start = atomic_long_read ( & clock - > now ) ;
p = rebalance_work ( c ) ;
prev_start = jiffies ;
prev_cputime = curr_cputime ( ) ;
while ( ! kthread_wait_freezable ( r - > enabled ) ) {
start = jiffies ;
cputime = curr_cputime ( ) ;
prev_run_time = start - prev_start ;
prev_run_cputime = cputime - prev_cputime ;
w = rebalance_work ( c ) ;
BUG_ON ( ! w . dev_most_full_capacity ) ;
if ( ! w . total_work ) {
r - > state = REBALANCE_WAITING ;
kthread_wait_freezable ( rebalance_work ( c ) . total_work ) ;
continue ;
}
/*
* If there isn ' t much work to do , throttle cpu usage :
*/
throttle = prev_run_cputime * 100 /
max ( 1U , w . dev_most_full_percent ) -
prev_run_time ;
if ( w . dev_most_full_percent < 20 & & throttle > 0 ) {
r - > state = REBALANCE_THROTTLED ;
r - > throttled_until_iotime = io_start +
div_u64 ( w . dev_most_full_capacity *
( 20 - w . dev_most_full_percent ) ,
50 ) ;
r - > throttled_until_cputime = start + throttle ;
bch2_kthread_io_clock_wait ( clock ,
r - > throttled_until_iotime ,
throttle ) ;
continue ;
}
/* minimum 1 mb/sec: */
r - > pd . rate . rate =
max_t ( u64 , 1 < < 11 ,
r - > pd . rate . rate *
max ( p . dev_most_full_percent , 1U ) /
max ( w . dev_most_full_percent , 1U ) ) ;
io_start = atomic_long_read ( & clock - > now ) ;
p = w ;
prev_start = start ;
prev_cputime = cputime ;
r - > state = REBALANCE_RUNNING ;
memset ( & r - > move_stats , 0 , sizeof ( r - > move_stats ) ) ;
rebalance_work_reset ( c ) ;
bch2_move_data ( c ,
/* ratelimiting disabled for now */
NULL , /* &r->pd.rate, */
writepoint_ptr ( & c - > rebalance_write_point ) ,
POS_MIN , POS_MAX ,
rebalance_pred , NULL ,
& r - > move_stats ) ;
}
return 0 ;
}
ssize_t bch2_rebalance_work_show ( struct bch_fs * c , char * buf )
{
2018-11-09 01:24:07 -05:00
struct printbuf out = _PBUF ( buf , PAGE_SIZE ) ;
2017-03-16 22:18:50 -08:00
struct bch_fs_rebalance * r = & c - > rebalance ;
struct rebalance_work w = rebalance_work ( c ) ;
char h1 [ 21 ] , h2 [ 21 ] ;
2018-12-19 12:58:56 -05:00
bch2_hprint ( & PBUF ( h1 ) , w . dev_most_full_work < < 9 ) ;
bch2_hprint ( & PBUF ( h2 ) , w . dev_most_full_capacity < < 9 ) ;
2018-11-09 01:24:07 -05:00
pr_buf ( & out , " fullest_dev (%i): \t %s/%s \n " ,
w . dev_most_full_idx , h1 , h2 ) ;
2017-03-16 22:18:50 -08:00
2018-12-19 12:58:56 -05:00
bch2_hprint ( & PBUF ( h1 ) , w . total_work < < 9 ) ;
bch2_hprint ( & PBUF ( h2 ) , c - > capacity < < 9 ) ;
2018-11-09 01:24:07 -05:00
pr_buf ( & out , " total work: \t \t %s/%s \n " , h1 , h2 ) ;
2017-03-16 22:18:50 -08:00
2018-11-09 01:24:07 -05:00
pr_buf ( & out , " rate: \t \t \t %u \n " , r - > pd . rate . rate ) ;
2017-03-16 22:18:50 -08:00
switch ( r - > state ) {
case REBALANCE_WAITING :
2018-11-09 01:24:07 -05:00
pr_buf ( & out , " waiting \n " ) ;
2017-03-16 22:18:50 -08:00
break ;
case REBALANCE_THROTTLED :
2018-12-19 12:58:56 -05:00
bch2_hprint ( & PBUF ( h1 ) ,
2017-03-16 22:18:50 -08:00
( r - > throttled_until_iotime -
atomic_long_read ( & c - > io_clock [ WRITE ] . now ) ) < < 9 ) ;
2018-11-09 01:24:07 -05:00
pr_buf ( & out , " throttled for %lu sec or %s io \n " ,
( r - > throttled_until_cputime - jiffies ) / HZ ,
h1 ) ;
2017-03-16 22:18:50 -08:00
break ;
case REBALANCE_RUNNING :
2018-11-09 01:24:07 -05:00
pr_buf ( & out , " running \n " ) ;
pr_buf ( & out , " pos %llu:%llu \n " ,
2019-03-25 15:10:15 -04:00
r - > move_stats . pos . inode ,
r - > move_stats . pos . offset ) ;
2017-03-16 22:18:50 -08:00
break ;
}
2018-11-09 01:24:07 -05:00
return out . pos - buf ;
2017-03-16 22:18:50 -08:00
}
void bch2_rebalance_stop ( struct bch_fs * c )
{
struct task_struct * p ;
c - > rebalance . pd . rate . rate = UINT_MAX ;
bch2_ratelimit_reset ( & c - > rebalance . pd . rate ) ;
p = rcu_dereference_protected ( c - > rebalance . thread , 1 ) ;
c - > rebalance . thread = NULL ;
if ( p ) {
/* for sychronizing with rebalance_wakeup() */
synchronize_rcu ( ) ;
kthread_stop ( p ) ;
put_task_struct ( p ) ;
}
}
int bch2_rebalance_start ( struct bch_fs * c )
{
struct task_struct * p ;
if ( c - > opts . nochanges )
return 0 ;
p = kthread_create ( bch2_rebalance_thread , c , " bch_rebalance " ) ;
if ( IS_ERR ( p ) )
return PTR_ERR ( p ) ;
get_task_struct ( p ) ;
rcu_assign_pointer ( c - > rebalance . thread , p ) ;
wake_up_process ( p ) ;
return 0 ;
}
void bch2_fs_rebalance_init ( struct bch_fs * c )
{
bch2_pd_controller_init ( & c - > rebalance . pd ) ;
atomic64_set ( & c - > rebalance . work_unknown_dev , S64_MAX ) ;
}