2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Moving / copying garbage collector
*
* Copyright 2012 Google , Inc .
*/
# include "bcachefs.h"
2021-12-25 11:37:52 +03:00
# include "alloc_background.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2017-03-17 09:18:50 +03:00
# include "btree_iter.h"
# include "btree_update.h"
2022-12-05 18:24:19 +03:00
# include "btree_write_buffer.h"
2017-03-17 09:18:50 +03:00
# include "buckets.h"
# include "clock.h"
# include "disk_groups.h"
2022-07-19 02:42:58 +03:00
# include "errcode.h"
2020-07-22 00:12:39 +03:00
# include "error.h"
2017-03-17 09:18:50 +03:00
# include "extents.h"
# include "eytzinger.h"
# include "io.h"
# include "keylist.h"
2022-12-05 18:24:19 +03:00
# include "lru.h"
2017-03-17 09:18:50 +03:00
# include "move.h"
# include "movinggc.h"
# include "super-io.h"
# include "trace.h"
2023-02-28 06:58:01 +03:00
# include <linux/bsearch.h>
2017-03-17 09:18:50 +03:00
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/math64.h>
# include <linux/sched/task.h>
# include <linux/sort.h>
# include <linux/wait.h>
2023-03-11 22:44:41 +03:00
struct buckets_in_flight {
struct rhashtable table ;
struct move_bucket_in_flight * first ;
struct move_bucket_in_flight * last ;
size_t nr ;
size_t sectors ;
} ;
static const struct rhashtable_params bch_move_bucket_params = {
. head_offset = offsetof ( struct move_bucket_in_flight , hash ) ,
. key_offset = offsetof ( struct move_bucket_in_flight , bucket . k ) ,
. key_len = sizeof ( struct move_bucket_key ) ,
} ;
static struct move_bucket_in_flight *
move_bucket_in_flight_add ( struct buckets_in_flight * list , struct move_bucket b )
{
struct move_bucket_in_flight * new = kzalloc ( sizeof ( * new ) , GFP_KERNEL ) ;
int ret ;
if ( ! new )
return ERR_PTR ( - ENOMEM ) ;
new - > bucket = b ;
ret = rhashtable_lookup_insert_fast ( & list - > table , & new - > hash ,
bch_move_bucket_params ) ;
if ( ret ) {
kfree ( new ) ;
return ERR_PTR ( ret ) ;
}
if ( ! list - > first )
list - > first = new ;
else
list - > last - > next = new ;
list - > last = new ;
list - > nr + + ;
list - > sectors + = b . sectors ;
return new ;
}
2022-12-05 18:24:19 +03:00
static int bch2_bucket_is_movable ( struct btree_trans * trans ,
2023-03-11 22:44:41 +03:00
struct move_bucket * b , u64 time )
2020-08-12 20:49:09 +03:00
{
2021-12-25 11:37:52 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
2022-12-05 18:24:19 +03:00
struct bch_alloc_v4 _a ;
const struct bch_alloc_v4 * a ;
2021-12-25 11:37:52 +03:00
int ret ;
2023-03-11 22:44:41 +03:00
if ( bch2_bucket_is_open ( trans - > c ,
b - > k . bucket . inode ,
b - > k . bucket . offset ) )
2022-12-05 18:24:19 +03:00
return 0 ;
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , & iter , BTREE_ID_alloc ,
b - > k . bucket , BTREE_ITER_CACHED ) ;
2022-12-05 18:24:19 +03:00
ret = bkey_err ( k ) ;
if ( ret )
return ret ;
a = bch2_alloc_to_v4 ( k , & _a ) ;
2023-03-11 22:44:41 +03:00
b - > k . gen = a - > gen ;
b - > sectors = a - > dirty_sectors ;
2023-03-02 05:47:07 +03:00
ret = data_type_movable ( a - > data_type ) & &
2022-12-05 18:24:19 +03:00
a - > fragmentation_lru & &
a - > fragmentation_lru < = time ;
2021-12-25 11:37:52 +03:00
2023-04-30 02:33:09 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-12-25 11:37:52 +03:00
return ret ;
}
2023-02-28 06:58:01 +03:00
static void move_buckets_wait ( struct btree_trans * trans ,
struct moving_context * ctxt ,
2023-03-11 22:44:41 +03:00
struct buckets_in_flight * list ,
bool flush )
2023-02-28 06:58:01 +03:00
{
2023-03-11 22:44:41 +03:00
struct move_bucket_in_flight * i ;
int ret ;
2023-02-28 06:58:01 +03:00
2023-03-11 22:44:41 +03:00
while ( ( i = list - > first ) ) {
if ( flush )
2023-02-28 06:58:01 +03:00
move_ctxt_wait_event ( ctxt , trans , ! atomic_read ( & i - > count ) ) ;
if ( atomic_read ( & i - > count ) )
break ;
2023-03-11 22:44:41 +03:00
list - > first = i - > next ;
if ( ! list - > first )
list - > last = NULL ;
list - > nr - - ;
list - > sectors - = i - > bucket . sectors ;
ret = rhashtable_remove_fast ( & list - > table , & i - > hash ,
bch_move_bucket_params ) ;
BUG_ON ( ret ) ;
kfree ( i ) ;
2023-02-28 06:58:01 +03:00
}
bch2_trans_unlock ( trans ) ;
}
2023-03-11 22:44:41 +03:00
static bool bucket_in_flight ( struct buckets_in_flight * list ,
struct move_bucket_key k )
{
return rhashtable_lookup_fast ( & list - > table , & k , bch_move_bucket_params ) ;
}
typedef DARRAY ( struct move_bucket ) move_buckets ;
2023-02-28 06:58:01 +03:00
static int bch2_copygc_get_buckets ( struct btree_trans * trans ,
struct moving_context * ctxt ,
2023-03-11 22:44:41 +03:00
struct buckets_in_flight * buckets_in_flight ,
2023-02-28 06:58:01 +03:00
move_buckets * buckets )
2022-12-05 18:24:19 +03:00
{
2023-03-11 22:44:41 +03:00
struct bch_fs * c = trans - > c ;
2022-12-05 18:24:19 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
2023-03-11 22:44:41 +03:00
size_t nr_to_get = max ( 16UL , buckets_in_flight - > nr / 4 ) ;
size_t saw = 0 , in_flight = 0 , not_movable = 0 , sectors = 0 ;
2022-12-05 18:24:19 +03:00
int ret ;
2023-03-11 22:44:41 +03:00
move_buckets_wait ( trans , ctxt , buckets_in_flight , false ) ;
2023-02-28 06:58:01 +03:00
2023-03-11 22:44:41 +03:00
ret = bch2_btree_write_buffer_flush ( trans ) ;
if ( bch2_fs_fatal_err_on ( ret , c , " %s: error %s from bch2_btree_write_buffer_flush() " ,
__func__ , bch2_err_str ( ret ) ) )
return ret ;
2023-02-28 06:58:01 +03:00
2022-12-05 18:24:19 +03:00
ret = for_each_btree_key2_upto ( trans , iter , BTREE_ID_lru ,
2023-02-28 06:58:01 +03:00
lru_pos ( BCH_LRU_FRAGMENTATION_START , 0 , 0 ) ,
2022-12-05 18:24:19 +03:00
lru_pos ( BCH_LRU_FRAGMENTATION_START , U64_MAX , LRU_TIME_MAX ) ,
0 , k , ( {
2023-03-11 22:44:41 +03:00
struct move_bucket b = { . k . bucket = u64_to_bucket ( k . k - > p . offset ) } ;
2023-02-28 06:58:01 +03:00
int ret = 0 ;
2023-03-11 22:44:41 +03:00
saw + + ;
2022-12-05 18:24:19 +03:00
2023-03-11 22:44:41 +03:00
if ( ! bch2_bucket_is_movable ( trans , & b , lru_pos_time ( k . k - > p ) ) )
not_movable + + ;
else if ( bucket_in_flight ( buckets_in_flight , b . k ) )
in_flight + + ;
else {
ret = darray_push ( buckets , b ) ? : buckets - > nr > = nr_to_get ;
if ( ret > = 0 )
sectors + = b . sectors ;
}
2023-02-28 06:58:01 +03:00
ret ;
2022-12-05 18:24:19 +03:00
} ) ) ;
2023-03-11 22:44:41 +03:00
pr_debug ( " have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i " ,
buckets_in_flight - > nr , buckets_in_flight - > sectors ,
saw , in_flight , not_movable , buckets - > nr , sectors , nr_to_get , ret ) ;
2022-12-05 18:24:19 +03:00
2023-02-28 06:58:01 +03:00
return ret < 0 ? ret : 0 ;
2022-12-05 18:24:19 +03:00
}
2023-04-29 23:21:51 +03:00
noinline
2023-02-28 06:58:01 +03:00
static int bch2_copygc ( struct btree_trans * trans ,
struct moving_context * ctxt ,
2023-03-11 22:44:41 +03:00
struct buckets_in_flight * buckets_in_flight )
2017-03-17 09:18:50 +03:00
{
2023-02-28 06:58:01 +03:00
struct bch_fs * c = trans - > c ;
2022-03-18 07:42:09 +03:00
struct data_update_opts data_opts = {
. btree_insert_flags = BTREE_INSERT_USE_RESERVE | JOURNAL_WATERMARK_copygc ,
} ;
2023-02-28 06:58:01 +03:00
move_buckets buckets = { 0 } ;
struct move_bucket_in_flight * f ;
struct move_bucket * i ;
u64 moved = atomic64_read ( & ctxt - > stats - > sectors_moved ) ;
2022-03-18 07:42:09 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
ret = bch2_copygc_get_buckets ( trans , ctxt , buckets_in_flight , & buckets ) ;
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
darray_for_each ( buckets , i ) {
if ( unlikely ( freezing ( current ) ) )
2022-12-05 18:24:19 +03:00
break ;
2017-03-17 09:18:50 +03:00
2023-03-11 22:44:41 +03:00
f = move_bucket_in_flight_add ( buckets_in_flight , * i ) ;
ret = PTR_ERR_OR_ZERO ( f ) ;
if ( ret = = - EEXIST ) /* rare race: copygc_get_buckets returned same bucket more than once */
continue ;
if ( ret = = - ENOMEM ) { /* flush IO, continue later */
ret = 0 ;
break ;
}
2023-02-28 06:58:01 +03:00
2023-03-11 22:44:41 +03:00
ret = __bch2_evacuate_bucket ( trans , ctxt , f , f - > bucket . k . bucket ,
f - > bucket . k . gen , data_opts ) ;
2023-02-28 06:58:01 +03:00
if ( ret )
goto err ;
}
err :
darray_exit ( & buckets ) ;
2017-03-17 09:18:50 +03:00
2022-12-05 18:24:19 +03:00
/* no entries in LRU btree found, or got to end: */
2023-05-28 02:59:59 +03:00
if ( bch2_err_matches ( ret , ENOENT ) )
2022-12-05 18:24:19 +03:00
ret = 0 ;
2022-12-12 04:37:11 +03:00
if ( ret < 0 & & ! bch2_err_matches ( ret , EROFS ) )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error from bch2_move_data() in copygc: %s " , bch2_err_str ( ret ) ) ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
moved = atomic64_read ( & ctxt - > stats - > sectors_moved ) - moved ;
trace_and_count ( c , copygc , c , moved , 0 , 0 , 0 ) ;
2022-03-18 07:42:09 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2019-12-21 00:19:46 +03:00
/*
* Copygc runs when the amount of fragmented data is above some arbitrary
* threshold :
*
* The threshold at the limit - when the device is full - is the amount of space
* we reserved in bch2_recalc_capacity ; we can ' t have more than that amount of
* disk space stranded due to fragmentation and store everything we have
* promised to store .
*
* But we don ' t want to be running copygc unnecessarily when the device still
* has plenty of free space - rather , we want copygc to smoothly run every so
* often and continually reduce the amount of fragmented space as the device
* fills up . So , we increase the threshold by half the current free space .
*/
2020-07-11 23:28:54 +03:00
unsigned long bch2_copygc_wait_amount ( struct bch_fs * c )
2019-12-21 00:19:46 +03:00
{
2020-07-11 23:28:54 +03:00
struct bch_dev * ca ;
unsigned dev_idx ;
2021-04-27 21:03:13 +03:00
s64 wait = S64_MAX , fragmented_allowed , fragmented ;
2023-03-02 05:47:07 +03:00
unsigned i ;
2020-07-11 23:28:54 +03:00
for_each_rw_member ( ca , c , dev_idx ) {
2020-07-22 20:27:00 +03:00
struct bch_dev_usage usage = bch2_dev_usage_read ( ca ) ;
2020-07-11 23:28:54 +03:00
2023-02-28 06:58:01 +03:00
fragmented_allowed = ( ( __dev_buckets_available ( ca , usage , RESERVE_stripe ) *
2022-01-10 04:48:31 +03:00
ca - > mi . bucket_size ) > > 1 ) ;
2023-03-02 05:47:07 +03:00
fragmented = 0 ;
for ( i = 0 ; i < BCH_DATA_NR ; i + + )
if ( data_type_movable ( i ) )
fragmented + = usage . d [ i ] . fragmented ;
2021-04-27 21:03:13 +03:00
wait = min ( wait , max ( 0LL , fragmented_allowed - fragmented ) ) ;
2020-07-11 23:28:54 +03:00
}
2019-12-21 00:19:46 +03:00
2021-04-27 21:03:13 +03:00
return wait ;
2019-12-21 00:19:46 +03:00
}
2023-03-02 07:10:39 +03:00
void bch2_copygc_wait_to_text ( struct printbuf * out , struct bch_fs * c )
{
prt_printf ( out , " Currently waiting for: " ) ;
prt_human_readable_u64 ( out , max ( 0LL , c - > copygc_wait -
atomic64_read ( & c - > io_clock [ WRITE ] . now ) ) < < 9 ) ;
prt_newline ( out ) ;
2023-03-17 16:59:17 +03:00
prt_printf ( out , " Currently waiting since: " ) ;
prt_human_readable_u64 ( out , max ( 0LL ,
atomic64_read ( & c - > io_clock [ WRITE ] . now ) -
c - > copygc_wait_at ) < < 9 ) ;
prt_newline ( out ) ;
2023-03-02 07:10:39 +03:00
prt_printf ( out , " Currently calculated wait: " ) ;
prt_human_readable_u64 ( out , bch2_copygc_wait_amount ( c ) ) ;
prt_newline ( out ) ;
}
2017-03-17 09:18:50 +03:00
static int bch2_copygc_thread ( void * arg )
{
2020-07-11 23:28:54 +03:00
struct bch_fs * c = arg ;
2023-02-28 06:58:01 +03:00
struct btree_trans trans ;
struct moving_context ctxt ;
struct bch_move_stats move_stats ;
2017-03-17 09:18:50 +03:00
struct io_clock * clock = & c - > io_clock [ WRITE ] ;
2023-03-11 22:44:41 +03:00
struct buckets_in_flight move_buckets ;
2021-01-21 23:28:59 +03:00
u64 last , wait ;
2022-06-15 17:06:43 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2023-03-11 22:44:41 +03:00
memset ( & move_buckets , 0 , sizeof ( move_buckets ) ) ;
ret = rhashtable_init ( & move_buckets . table , & bch_move_bucket_params ) ;
if ( ret ) {
bch_err ( c , " error allocating copygc buckets in flight: %s " ,
bch2_err_str ( ret ) ) ;
return ret ;
2023-02-28 06:58:01 +03:00
}
2017-03-17 09:18:50 +03:00
set_freezable ( ) ;
2023-02-28 06:58:01 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
bch2_move_stats_init ( & move_stats , " copygc " ) ;
bch2_moving_ctxt_init ( & ctxt , c , NULL , & move_stats ,
writepoint_ptr ( & c - > copygc_write_point ) ,
false ) ;
2017-03-17 09:18:50 +03:00
2022-06-15 17:06:43 +03:00
while ( ! ret & & ! kthread_should_stop ( ) ) {
2023-02-28 06:58:01 +03:00
bch2_trans_unlock ( & trans ) ;
2021-05-26 01:42:05 +03:00
cond_resched ( ) ;
2023-02-28 06:58:01 +03:00
if ( ! c - > copy_gc_enabled ) {
2023-03-11 22:44:41 +03:00
move_buckets_wait ( & trans , & ctxt , & move_buckets , true ) ;
2023-02-28 06:58:01 +03:00
kthread_wait_freezable ( c - > copy_gc_enabled ) ;
}
if ( unlikely ( freezing ( current ) ) ) {
2023-03-11 22:44:41 +03:00
move_buckets_wait ( & trans , & ctxt , & move_buckets , true ) ;
2023-02-28 06:58:01 +03:00
__refrigerator ( false ) ;
continue ;
}
2017-03-17 09:18:50 +03:00
2021-01-21 23:28:59 +03:00
last = atomic64_read ( & clock - > now ) ;
2020-07-11 23:28:54 +03:00
wait = bch2_copygc_wait_amount ( c ) ;
2017-03-17 09:18:50 +03:00
2019-12-21 00:19:46 +03:00
if ( wait > clock - > max_slop ) {
2023-03-17 16:59:17 +03:00
c - > copygc_wait_at = last ;
c - > copygc_wait = last + wait ;
2023-03-11 22:44:41 +03:00
move_buckets_wait ( & trans , & ctxt , & move_buckets , true ) ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , copygc_wait , c , wait , last + wait ) ;
2019-12-21 00:19:46 +03:00
bch2_kthread_io_clock_wait ( clock , last + wait ,
2017-03-17 09:18:50 +03:00
MAX_SCHEDULE_TIMEOUT ) ;
continue ;
}
2021-04-13 21:45:55 +03:00
c - > copygc_wait = 0 ;
2022-06-15 17:06:43 +03:00
c - > copygc_running = true ;
2023-02-28 06:58:01 +03:00
ret = bch2_copygc ( & trans , & ctxt , & move_buckets ) ;
2022-06-15 17:06:43 +03:00
c - > copygc_running = false ;
wake_up ( & c - > copygc_running_wq ) ;
2017-03-17 09:18:50 +03:00
}
2023-03-11 22:44:41 +03:00
move_buckets_wait ( & trans , & ctxt , & move_buckets , true ) ;
2023-02-28 06:58:01 +03:00
bch2_trans_exit ( & trans ) ;
2023-03-14 18:48:07 +03:00
bch2_moving_ctxt_exit ( & ctxt ) ;
2023-02-28 06:58:01 +03:00
2017-03-17 09:18:50 +03:00
return 0 ;
}
2020-07-11 23:28:54 +03:00
void bch2_copygc_stop ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
2020-07-11 23:28:54 +03:00
if ( c - > copygc_thread ) {
kthread_stop ( c - > copygc_thread ) ;
put_task_struct ( c - > copygc_thread ) ;
2017-03-17 09:18:50 +03:00
}
2020-07-11 23:28:54 +03:00
c - > copygc_thread = NULL ;
2017-03-17 09:18:50 +03:00
}
2020-07-11 23:28:54 +03:00
int bch2_copygc_start ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
struct task_struct * t ;
2022-07-19 02:42:58 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2020-07-11 23:28:54 +03:00
if ( c - > copygc_thread )
2019-05-24 21:45:33 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
if ( c - > opts . nochanges )
return 0 ;
if ( bch2_fs_init_fault ( " copygc_start " ) )
return - ENOMEM ;
2020-11-20 04:55:33 +03:00
t = kthread_create ( bch2_copygc_thread , c , " bch-copygc/%s " , c - > name ) ;
2022-07-19 02:42:58 +03:00
ret = PTR_ERR_OR_ZERO ( t ) ;
if ( ret ) {
bch_err ( c , " error creating copygc thread: %s " , bch2_err_str ( ret ) ) ;
return ret ;
2021-02-23 23:16:41 +03:00
}
2017-03-17 09:18:50 +03:00
get_task_struct ( t ) ;
2020-07-11 23:28:54 +03:00
c - > copygc_thread = t ;
wake_up_process ( c - > copygc_thread ) ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
2020-07-11 23:28:54 +03:00
void bch2_fs_copygc_init ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
2022-06-15 17:06:43 +03:00
init_waitqueue_head ( & c - > copygc_running_wq ) ;
c - > copygc_running = false ;
2017-03-17 09:18:50 +03:00
}