2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2017-03-17 09:18:50 +03:00
# include "btree_iter.h"
# include "buckets.h"
# include "clock.h"
# include "disk_groups.h"
# include "extents.h"
# include "io.h"
# include "move.h"
# include "rebalance.h"
# include "super-io.h"
# include "trace.h"
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/sched/cputime.h>
2020-01-21 03:42:38 +03:00
/*
* Check if an extent should be moved :
* returns - 1 if it should not be moved , or
* device of pointer that should be moved , if known , or INT_MAX if unknown
*/
static int __bch2_rebalance_pred ( struct bch_fs * c ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts )
2017-03-17 09:18:50 +03:00
{
2020-01-21 03:42:38 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2018-02-24 00:26:10 +03:00
if ( io_opts - > background_compression & &
! bch2_bkey_is_incompressible ( k ) )
2020-01-21 03:42:38 +03:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
if ( ! p . ptr . cached & &
p . crc . compression_type ! =
bch2_compression_opt_to_type [ io_opts - > background_compression ] )
return p . ptr . dev ;
2017-03-17 09:18:50 +03:00
2020-01-21 03:42:38 +03:00
if ( io_opts - > background_target )
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
if ( ! p . ptr . cached & &
! bch2_dev_in_target ( c , p . ptr . dev , io_opts - > background_target ) )
return p . ptr . dev ;
2017-03-17 09:18:50 +03:00
2020-01-21 03:42:38 +03:00
return - 1 ;
2017-03-17 09:18:50 +03:00
}
void bch2_rebalance_add_key ( struct bch_fs * c ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts )
{
2020-01-21 03:42:38 +03:00
atomic64_t * counter ;
int dev ;
2017-03-17 09:18:50 +03:00
2020-01-21 03:42:38 +03:00
dev = __bch2_rebalance_pred ( c , k , io_opts ) ;
if ( dev < 0 )
2017-03-17 09:18:50 +03:00
return ;
2020-01-21 03:42:38 +03:00
counter = dev < INT_MAX
? & bch_dev_bkey_exists ( c , dev ) - > rebalance_work
: & c - > rebalance . work_unknown_dev ;
2017-03-17 09:18:50 +03:00
2020-01-21 03:42:38 +03:00
if ( atomic64_add_return ( k . k - > size , counter ) = = k . k - > size )
2017-03-17 09:18:50 +03:00
rebalance_wakeup ( c ) ;
}
static enum data_cmd rebalance_pred ( struct bch_fs * c , void * arg ,
2018-11-01 22:10:01 +03:00
struct bkey_s_c k ,
2017-03-17 09:18:50 +03:00
struct bch_io_opts * io_opts ,
struct data_opts * data_opts )
{
2020-01-21 03:42:38 +03:00
if ( __bch2_rebalance_pred ( c , k , io_opts ) > = 0 ) {
data_opts - > target = io_opts - > background_target ;
2020-07-23 06:11:48 +03:00
data_opts - > nr_replicas = 1 ;
2020-01-21 03:42:38 +03:00
data_opts - > btree_insert_flags = 0 ;
return DATA_ADD_REPLICAS ;
} else {
return DATA_SKIP ;
2018-11-01 22:10:01 +03:00
}
2020-01-21 03:42:38 +03:00
}
2019-09-07 19:39:59 +03:00
2020-01-21 03:42:38 +03:00
void bch2_rebalance_add_work ( struct bch_fs * c , u64 sectors )
{
if ( atomic64_add_return ( sectors , & c - > rebalance . work_unknown_dev ) = =
sectors )
rebalance_wakeup ( c ) ;
2017-03-17 09:18:50 +03:00
}
struct rebalance_work {
int dev_most_full_idx ;
unsigned dev_most_full_percent ;
u64 dev_most_full_work ;
u64 dev_most_full_capacity ;
u64 total_work ;
} ;
static void rebalance_work_accumulate ( struct rebalance_work * w ,
u64 dev_work , u64 unknown_dev , u64 capacity , int idx )
{
unsigned percent_full ;
u64 work = dev_work + unknown_dev ;
if ( work < dev_work | | work < unknown_dev )
work = U64_MAX ;
work = min ( work , capacity ) ;
2018-09-07 00:09:07 +03:00
percent_full = div64_u64 ( work * 100 , capacity ) ;
2017-03-17 09:18:50 +03:00
if ( percent_full > = w - > dev_most_full_percent ) {
w - > dev_most_full_idx = idx ;
w - > dev_most_full_percent = percent_full ;
w - > dev_most_full_work = work ;
w - > dev_most_full_capacity = capacity ;
}
if ( w - > total_work + dev_work > = w - > total_work & &
w - > total_work + dev_work > = dev_work )
w - > total_work + = dev_work ;
}
static struct rebalance_work rebalance_work ( struct bch_fs * c )
{
struct bch_dev * ca ;
struct rebalance_work ret = { . dev_most_full_idx = - 1 } ;
u64 unknown_dev = atomic64_read ( & c - > rebalance . work_unknown_dev ) ;
unsigned i ;
for_each_online_member ( ca , c , i )
rebalance_work_accumulate ( & ret ,
atomic64_read ( & ca - > rebalance_work ) ,
unknown_dev ,
bucket_to_sector ( ca , ca - > mi . nbuckets -
ca - > mi . first_bucket ) ,
i ) ;
rebalance_work_accumulate ( & ret ,
unknown_dev , 0 , c - > capacity , - 1 ) ;
return ret ;
}
static void rebalance_work_reset ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
for_each_online_member ( ca , c , i )
atomic64_set ( & ca - > rebalance_work , 0 ) ;
atomic64_set ( & c - > rebalance . work_unknown_dev , 0 ) ;
}
static unsigned long curr_cputime ( void )
{
u64 utime , stime ;
task_cputime_adjusted ( current , & utime , & stime ) ;
return nsecs_to_jiffies ( utime + stime ) ;
}
static int bch2_rebalance_thread ( void * arg )
{
struct bch_fs * c = arg ;
struct bch_fs_rebalance * r = & c - > rebalance ;
struct io_clock * clock = & c - > io_clock [ WRITE ] ;
struct rebalance_work w , p ;
unsigned long start , prev_start ;
unsigned long prev_run_time , prev_run_cputime ;
unsigned long cputime , prev_cputime ;
2021-01-21 23:28:59 +03:00
u64 io_start ;
2017-03-17 09:18:50 +03:00
long throttle ;
set_freezable ( ) ;
2021-01-21 23:28:59 +03:00
io_start = atomic64_read ( & clock - > now ) ;
2017-03-17 09:18:50 +03:00
p = rebalance_work ( c ) ;
prev_start = jiffies ;
prev_cputime = curr_cputime ( ) ;
while ( ! kthread_wait_freezable ( r - > enabled ) ) {
2020-01-17 00:20:53 +03:00
cond_resched ( ) ;
2017-03-17 09:18:50 +03:00
start = jiffies ;
cputime = curr_cputime ( ) ;
prev_run_time = start - prev_start ;
prev_run_cputime = cputime - prev_cputime ;
w = rebalance_work ( c ) ;
BUG_ON ( ! w . dev_most_full_capacity ) ;
if ( ! w . total_work ) {
r - > state = REBALANCE_WAITING ;
kthread_wait_freezable ( rebalance_work ( c ) . total_work ) ;
continue ;
}
/*
* If there isn ' t much work to do , throttle cpu usage :
*/
throttle = prev_run_cputime * 100 /
max ( 1U , w . dev_most_full_percent ) -
prev_run_time ;
if ( w . dev_most_full_percent < 20 & & throttle > 0 ) {
r - > throttled_until_iotime = io_start +
div_u64 ( w . dev_most_full_capacity *
( 20 - w . dev_most_full_percent ) ,
50 ) ;
2021-01-21 23:28:59 +03:00
if ( atomic64_read ( & clock - > now ) + clock - > max_slop <
2020-04-07 20:49:14 +03:00
r - > throttled_until_iotime ) {
r - > throttled_until_cputime = start + throttle ;
r - > state = REBALANCE_THROTTLED ;
bch2_kthread_io_clock_wait ( clock ,
r - > throttled_until_iotime ,
throttle ) ;
continue ;
}
2017-03-17 09:18:50 +03:00
}
/* minimum 1 mb/sec: */
r - > pd . rate . rate =
max_t ( u64 , 1 < < 11 ,
r - > pd . rate . rate *
max ( p . dev_most_full_percent , 1U ) /
max ( w . dev_most_full_percent , 1U ) ) ;
2021-01-21 23:28:59 +03:00
io_start = atomic64_read ( & clock - > now ) ;
2017-03-17 09:18:50 +03:00
p = w ;
prev_start = start ;
prev_cputime = cputime ;
r - > state = REBALANCE_RUNNING ;
memset ( & r - > move_stats , 0 , sizeof ( r - > move_stats ) ) ;
rebalance_work_reset ( c ) ;
bch2_move_data ( c ,
2021-03-15 02:01:14 +03:00
0 , POS_MIN ,
BTREE_ID_NR , POS_MAX ,
2017-03-17 09:18:50 +03:00
/* ratelimiting disabled for now */
NULL , /* &r->pd.rate, */
writepoint_ptr ( & c - > rebalance_write_point ) ,
rebalance_pred , NULL ,
& r - > move_stats ) ;
}
return 0 ;
}
2020-07-26 00:06:11 +03:00
void bch2_rebalance_work_to_text ( struct printbuf * out , struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
struct bch_fs_rebalance * r = & c - > rebalance ;
struct rebalance_work w = rebalance_work ( c ) ;
char h1 [ 21 ] , h2 [ 21 ] ;
2018-12-19 20:58:56 +03:00
bch2_hprint ( & PBUF ( h1 ) , w . dev_most_full_work < < 9 ) ;
bch2_hprint ( & PBUF ( h2 ) , w . dev_most_full_capacity < < 9 ) ;
2020-07-26 00:06:11 +03:00
pr_buf ( out , " fullest_dev (%i): \t %s/%s \n " ,
2018-11-09 09:24:07 +03:00
w . dev_most_full_idx , h1 , h2 ) ;
2017-03-17 09:18:50 +03:00
2018-12-19 20:58:56 +03:00
bch2_hprint ( & PBUF ( h1 ) , w . total_work < < 9 ) ;
bch2_hprint ( & PBUF ( h2 ) , c - > capacity < < 9 ) ;
2020-07-26 00:06:11 +03:00
pr_buf ( out , " total work: \t \t %s/%s \n " , h1 , h2 ) ;
2017-03-17 09:18:50 +03:00
2020-07-26 00:06:11 +03:00
pr_buf ( out , " rate: \t \t \t %u \n " , r - > pd . rate . rate ) ;
2017-03-17 09:18:50 +03:00
switch ( r - > state ) {
case REBALANCE_WAITING :
2020-07-26 00:06:11 +03:00
pr_buf ( out , " waiting \n " ) ;
2017-03-17 09:18:50 +03:00
break ;
case REBALANCE_THROTTLED :
2018-12-19 20:58:56 +03:00
bch2_hprint ( & PBUF ( h1 ) ,
2017-03-17 09:18:50 +03:00
( r - > throttled_until_iotime -
2021-01-21 23:28:59 +03:00
atomic64_read ( & c - > io_clock [ WRITE ] . now ) ) < < 9 ) ;
2020-07-26 00:06:11 +03:00
pr_buf ( out , " throttled for %lu sec or %s io \n " ,
2018-11-09 09:24:07 +03:00
( r - > throttled_until_cputime - jiffies ) / HZ ,
h1 ) ;
2017-03-17 09:18:50 +03:00
break ;
case REBALANCE_RUNNING :
2020-07-26 00:06:11 +03:00
pr_buf ( out , " running \n " ) ;
pr_buf ( out , " pos %llu:%llu \n " ,
2019-03-25 22:10:15 +03:00
r - > move_stats . pos . inode ,
r - > move_stats . pos . offset ) ;
2017-03-17 09:18:50 +03:00
break ;
}
}
void bch2_rebalance_stop ( struct bch_fs * c )
{
struct task_struct * p ;
c - > rebalance . pd . rate . rate = UINT_MAX ;
bch2_ratelimit_reset ( & c - > rebalance . pd . rate ) ;
p = rcu_dereference_protected ( c - > rebalance . thread , 1 ) ;
c - > rebalance . thread = NULL ;
if ( p ) {
/* for sychronizing with rebalance_wakeup() */
synchronize_rcu ( ) ;
kthread_stop ( p ) ;
put_task_struct ( p ) ;
}
}
int bch2_rebalance_start ( struct bch_fs * c )
{
struct task_struct * p ;
if ( c - > opts . nochanges )
return 0 ;
2020-11-20 04:55:33 +03:00
p = kthread_create ( bch2_rebalance_thread , c , " bch-rebalance/%s " , c - > name ) ;
2021-02-23 23:16:41 +03:00
if ( IS_ERR ( p ) ) {
bch_err ( c , " error creating rebalance thread: %li " , PTR_ERR ( p ) ) ;
2017-03-17 09:18:50 +03:00
return PTR_ERR ( p ) ;
2021-02-23 23:16:41 +03:00
}
2017-03-17 09:18:50 +03:00
get_task_struct ( p ) ;
rcu_assign_pointer ( c - > rebalance . thread , p ) ;
wake_up_process ( p ) ;
return 0 ;
}
void bch2_fs_rebalance_init ( struct bch_fs * c )
{
bch2_pd_controller_init ( & c - > rebalance . pd ) ;
atomic64_set ( & c - > rebalance . work_unknown_dev , S64_MAX ) ;
}