2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Moving / copying garbage collector
*
* Copyright 2012 Google , Inc .
*/
# include "bcachefs.h"
2021-12-25 11:37:52 +03:00
# include "alloc_background.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2017-03-17 09:18:50 +03:00
# include "btree_iter.h"
# include "btree_update.h"
2022-12-05 18:24:19 +03:00
# include "btree_write_buffer.h"
2017-03-17 09:18:50 +03:00
# include "buckets.h"
# include "clock.h"
# include "disk_groups.h"
2022-07-19 02:42:58 +03:00
# include "errcode.h"
2020-07-22 00:12:39 +03:00
# include "error.h"
2017-03-17 09:18:50 +03:00
# include "extents.h"
# include "eytzinger.h"
# include "io.h"
# include "keylist.h"
2022-12-05 18:24:19 +03:00
# include "lru.h"
2017-03-17 09:18:50 +03:00
# include "move.h"
# include "movinggc.h"
# include "super-io.h"
# include "trace.h"
2023-02-28 06:58:01 +03:00
# include <linux/bsearch.h>
2017-03-17 09:18:50 +03:00
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/math64.h>
# include <linux/sched/task.h>
# include <linux/sort.h>
# include <linux/wait.h>
2022-12-05 18:24:19 +03:00
static int bch2_bucket_is_movable ( struct btree_trans * trans ,
struct bpos bucket , u64 time , u8 * gen )
2020-08-12 20:49:09 +03:00
{
2021-12-25 11:37:52 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
2022-12-05 18:24:19 +03:00
struct bch_alloc_v4 _a ;
const struct bch_alloc_v4 * a ;
2021-12-25 11:37:52 +03:00
int ret ;
2022-12-05 18:24:19 +03:00
if ( bch2_bucket_is_open ( trans - > c , bucket . inode , bucket . offset ) )
return 0 ;
2023-03-14 15:35:04 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc , bucket , BTREE_ITER_CACHED ) ;
2022-12-05 18:24:19 +03:00
k = bch2_btree_iter_peek_slot ( & iter ) ;
ret = bkey_err ( k ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
if ( ret )
return ret ;
a = bch2_alloc_to_v4 ( k , & _a ) ;
* gen = a - > gen ;
2023-03-02 05:47:07 +03:00
ret = data_type_movable ( a - > data_type ) & &
2022-12-05 18:24:19 +03:00
a - > fragmentation_lru & &
a - > fragmentation_lru < = time ;
2021-12-25 11:37:52 +03:00
2022-12-05 18:24:19 +03:00
if ( ret ) {
struct printbuf buf = PRINTBUF ;
2021-12-25 11:37:52 +03:00
2022-12-05 18:24:19 +03:00
bch2_bkey_val_to_text ( & buf , trans - > c , k ) ;
pr_debug ( " %s " , buf . buf ) ;
printbuf_exit ( & buf ) ;
2021-12-25 11:37:52 +03:00
}
return ret ;
}
2023-02-28 06:58:01 +03:00
typedef FIFO ( struct move_bucket_in_flight ) move_buckets_in_flight ;
struct move_bucket {
struct bpos bucket ;
u8 gen ;
} ;
typedef DARRAY ( struct move_bucket ) move_buckets ;
static int move_bucket_cmp ( const void * _l , const void * _r )
{
const struct move_bucket * l = _l ;
const struct move_bucket * r = _r ;
return bkey_cmp ( l - > bucket , r - > bucket ) ;
}
static bool bucket_in_flight ( move_buckets * buckets_sorted , struct move_bucket b )
{
return bsearch ( & b ,
buckets_sorted - > data ,
buckets_sorted - > nr ,
sizeof ( buckets_sorted - > data [ 0 ] ) ,
move_bucket_cmp ) ! = NULL ;
}
static void move_buckets_wait ( struct btree_trans * trans ,
struct moving_context * ctxt ,
move_buckets_in_flight * buckets_in_flight ,
size_t nr , bool verify_evacuated )
{
while ( ! fifo_empty ( buckets_in_flight ) ) {
struct move_bucket_in_flight * i = & fifo_peek_front ( buckets_in_flight ) ;
if ( fifo_used ( buckets_in_flight ) > nr )
move_ctxt_wait_event ( ctxt , trans , ! atomic_read ( & i - > count ) ) ;
if ( atomic_read ( & i - > count ) )
break ;
/*
* moving_ctxt_exit calls bch2_write as it flushes pending
* reads , which inits another btree_trans ; this one must be
* unlocked :
*/
if ( verify_evacuated )
bch2_verify_bucket_evacuated ( trans , i - > bucket , i - > gen ) ;
buckets_in_flight - > front + + ;
}
bch2_trans_unlock ( trans ) ;
}
static int bch2_copygc_get_buckets ( struct btree_trans * trans ,
struct moving_context * ctxt ,
move_buckets_in_flight * buckets_in_flight ,
move_buckets * buckets )
2022-12-05 18:24:19 +03:00
{
struct btree_iter iter ;
2023-02-28 06:58:01 +03:00
move_buckets buckets_sorted = { 0 } ;
struct move_bucket_in_flight * i ;
2022-12-05 18:24:19 +03:00
struct bkey_s_c k ;
2023-02-28 06:58:01 +03:00
size_t fifo_iter , nr_to_get ;
2022-12-05 18:24:19 +03:00
int ret ;
2023-02-28 06:58:01 +03:00
move_buckets_wait ( trans , ctxt , buckets_in_flight , buckets_in_flight - > size / 2 , true ) ;
nr_to_get = max ( 16UL , fifo_used ( buckets_in_flight ) / 4 ) ;
fifo_for_each_entry_ptr ( i , buckets_in_flight , fifo_iter ) {
ret = darray_push ( & buckets_sorted , ( ( struct move_bucket ) { i - > bucket , i - > gen } ) ) ;
if ( ret ) {
bch_err ( trans - > c , " error allocating move_buckets_sorted " ) ;
goto err ;
}
}
sort ( buckets_sorted . data ,
buckets_sorted . nr ,
sizeof ( buckets_sorted . data [ 0 ] ) ,
move_bucket_cmp ,
NULL ) ;
2022-12-05 18:24:19 +03:00
ret = for_each_btree_key2_upto ( trans , iter , BTREE_ID_lru ,
2023-02-28 06:58:01 +03:00
lru_pos ( BCH_LRU_FRAGMENTATION_START , 0 , 0 ) ,
2022-12-05 18:24:19 +03:00
lru_pos ( BCH_LRU_FRAGMENTATION_START , U64_MAX , LRU_TIME_MAX ) ,
0 , k , ( {
2023-02-28 06:58:01 +03:00
struct move_bucket b = { . bucket = u64_to_bucket ( k . k - > p . offset ) } ;
int ret = 0 ;
if ( ! bucket_in_flight ( & buckets_sorted , b ) & &
bch2_bucket_is_movable ( trans , b . bucket , lru_pos_time ( k . k - > p ) , & b . gen ) )
ret = darray_push ( buckets , b ) ? : buckets - > nr > = nr_to_get ;
2022-12-05 18:24:19 +03:00
2023-02-28 06:58:01 +03:00
ret ;
2022-12-05 18:24:19 +03:00
} ) ) ;
2023-02-28 06:58:01 +03:00
err :
darray_exit ( & buckets_sorted ) ;
2022-12-05 18:24:19 +03:00
2023-02-28 06:58:01 +03:00
return ret < 0 ? ret : 0 ;
2022-12-05 18:24:19 +03:00
}
2023-02-28 06:58:01 +03:00
static int bch2_copygc ( struct btree_trans * trans ,
struct moving_context * ctxt ,
move_buckets_in_flight * buckets_in_flight )
2017-03-17 09:18:50 +03:00
{
2023-02-28 06:58:01 +03:00
struct bch_fs * c = trans - > c ;
2022-03-18 07:42:09 +03:00
struct data_update_opts data_opts = {
. btree_insert_flags = BTREE_INSERT_USE_RESERVE | JOURNAL_WATERMARK_copygc ,
} ;
2023-02-28 06:58:01 +03:00
move_buckets buckets = { 0 } ;
struct move_bucket_in_flight * f ;
struct move_bucket * i ;
u64 moved = atomic64_read ( & ctxt - > stats - > sectors_moved ) ;
2022-03-18 07:42:09 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
ret = bch2_btree_write_buffer_flush ( trans ) ;
if ( bch2_fs_fatal_err_on ( ret , c , " %s: error %s from bch2_btree_write_buffer_flush() " ,
__func__ , bch2_err_str ( ret ) ) )
return ret ;
2022-12-05 18:24:19 +03:00
2023-02-28 06:58:01 +03:00
ret = bch2_copygc_get_buckets ( trans , ctxt , buckets_in_flight , & buckets ) ;
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
darray_for_each ( buckets , i ) {
if ( unlikely ( freezing ( current ) ) )
2022-12-05 18:24:19 +03:00
break ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
f = fifo_push_ref ( buckets_in_flight ) ;
f - > bucket = i - > bucket ;
f - > gen = i - > gen ;
atomic_set ( & f - > count , 0 ) ;
ret = __bch2_evacuate_bucket ( trans , ctxt , f , f - > bucket , f - > gen , data_opts ) ;
if ( ret )
goto err ;
}
err :
darray_exit ( & buckets ) ;
2017-03-17 09:18:50 +03:00
2022-12-05 18:24:19 +03:00
/* no entries in LRU btree found, or got to end: */
if ( ret = = - ENOENT )
ret = 0 ;
2022-12-12 04:37:11 +03:00
if ( ret < 0 & & ! bch2_err_matches ( ret , EROFS ) )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error from bch2_move_data() in copygc: %s " , bch2_err_str ( ret ) ) ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
moved = atomic64_read ( & ctxt - > stats - > sectors_moved ) - moved ;
trace_and_count ( c , copygc , c , moved , 0 , 0 , 0 ) ;
2022-03-18 07:42:09 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2019-12-21 00:19:46 +03:00
/*
* Copygc runs when the amount of fragmented data is above some arbitrary
* threshold :
*
* The threshold at the limit - when the device is full - is the amount of space
* we reserved in bch2_recalc_capacity ; we can ' t have more than that amount of
* disk space stranded due to fragmentation and store everything we have
* promised to store .
*
* But we don ' t want to be running copygc unnecessarily when the device still
* has plenty of free space - rather , we want copygc to smoothly run every so
* often and continually reduce the amount of fragmented space as the device
* fills up . So , we increase the threshold by half the current free space .
*/
2020-07-11 23:28:54 +03:00
unsigned long bch2_copygc_wait_amount ( struct bch_fs * c )
2019-12-21 00:19:46 +03:00
{
2020-07-11 23:28:54 +03:00
struct bch_dev * ca ;
unsigned dev_idx ;
2021-04-27 21:03:13 +03:00
s64 wait = S64_MAX , fragmented_allowed , fragmented ;
2023-03-02 05:47:07 +03:00
unsigned i ;
2020-07-11 23:28:54 +03:00
for_each_rw_member ( ca , c , dev_idx ) {
2020-07-22 20:27:00 +03:00
struct bch_dev_usage usage = bch2_dev_usage_read ( ca ) ;
2020-07-11 23:28:54 +03:00
2023-02-28 06:58:01 +03:00
fragmented_allowed = ( ( __dev_buckets_available ( ca , usage , RESERVE_stripe ) *
2022-01-10 04:48:31 +03:00
ca - > mi . bucket_size ) > > 1 ) ;
2023-03-02 05:47:07 +03:00
fragmented = 0 ;
for ( i = 0 ; i < BCH_DATA_NR ; i + + )
if ( data_type_movable ( i ) )
fragmented + = usage . d [ i ] . fragmented ;
2021-04-27 21:03:13 +03:00
wait = min ( wait , max ( 0LL , fragmented_allowed - fragmented ) ) ;
2020-07-11 23:28:54 +03:00
}
2019-12-21 00:19:46 +03:00
2021-04-27 21:03:13 +03:00
return wait ;
2019-12-21 00:19:46 +03:00
}
2023-03-02 07:10:39 +03:00
void bch2_copygc_wait_to_text ( struct printbuf * out , struct bch_fs * c )
{
prt_printf ( out , " Currently waiting for: " ) ;
prt_human_readable_u64 ( out , max ( 0LL , c - > copygc_wait -
atomic64_read ( & c - > io_clock [ WRITE ] . now ) ) < < 9 ) ;
prt_newline ( out ) ;
2023-03-17 16:59:17 +03:00
prt_printf ( out , " Currently waiting since: " ) ;
prt_human_readable_u64 ( out , max ( 0LL ,
atomic64_read ( & c - > io_clock [ WRITE ] . now ) -
c - > copygc_wait_at ) < < 9 ) ;
prt_newline ( out ) ;
2023-03-02 07:10:39 +03:00
prt_printf ( out , " Currently calculated wait: " ) ;
prt_human_readable_u64 ( out , bch2_copygc_wait_amount ( c ) ) ;
prt_newline ( out ) ;
}
2017-03-17 09:18:50 +03:00
static int bch2_copygc_thread ( void * arg )
{
2020-07-11 23:28:54 +03:00
struct bch_fs * c = arg ;
2023-02-28 06:58:01 +03:00
struct btree_trans trans ;
struct moving_context ctxt ;
struct bch_move_stats move_stats ;
2017-03-17 09:18:50 +03:00
struct io_clock * clock = & c - > io_clock [ WRITE ] ;
2023-02-28 06:58:01 +03:00
move_buckets_in_flight move_buckets ;
2021-01-21 23:28:59 +03:00
u64 last , wait ;
2022-06-15 17:06:43 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
if ( ! init_fifo ( & move_buckets , 1 < < 14 , GFP_KERNEL ) ) {
bch_err ( c , " error allocating copygc buckets in flight " ) ;
return - ENOMEM ;
}
2017-03-17 09:18:50 +03:00
set_freezable ( ) ;
2023-02-28 06:58:01 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
bch2_move_stats_init ( & move_stats , " copygc " ) ;
bch2_moving_ctxt_init ( & ctxt , c , NULL , & move_stats ,
writepoint_ptr ( & c - > copygc_write_point ) ,
false ) ;
2017-03-17 09:18:50 +03:00
2022-06-15 17:06:43 +03:00
while ( ! ret & & ! kthread_should_stop ( ) ) {
2023-02-28 06:58:01 +03:00
bch2_trans_unlock ( & trans ) ;
2021-05-26 01:42:05 +03:00
cond_resched ( ) ;
2023-02-28 06:58:01 +03:00
if ( ! c - > copy_gc_enabled ) {
move_buckets_wait ( & trans , & ctxt , & move_buckets , 0 , true ) ;
kthread_wait_freezable ( c - > copy_gc_enabled ) ;
}
if ( unlikely ( freezing ( current ) ) ) {
move_buckets_wait ( & trans , & ctxt , & move_buckets , 0 , true ) ;
__refrigerator ( false ) ;
continue ;
}
2017-03-17 09:18:50 +03:00
2021-01-21 23:28:59 +03:00
last = atomic64_read ( & clock - > now ) ;
2020-07-11 23:28:54 +03:00
wait = bch2_copygc_wait_amount ( c ) ;
2017-03-17 09:18:50 +03:00
2019-12-21 00:19:46 +03:00
if ( wait > clock - > max_slop ) {
2023-03-17 16:59:17 +03:00
c - > copygc_wait_at = last ;
c - > copygc_wait = last + wait ;
2023-02-28 06:58:01 +03:00
move_buckets_wait ( & trans , & ctxt , & move_buckets , 0 , true ) ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , copygc_wait , c , wait , last + wait ) ;
2019-12-21 00:19:46 +03:00
bch2_kthread_io_clock_wait ( clock , last + wait ,
2017-03-17 09:18:50 +03:00
MAX_SCHEDULE_TIMEOUT ) ;
continue ;
}
2021-04-13 21:45:55 +03:00
c - > copygc_wait = 0 ;
2022-06-15 17:06:43 +03:00
c - > copygc_running = true ;
2023-02-28 06:58:01 +03:00
ret = bch2_copygc ( & trans , & ctxt , & move_buckets ) ;
2022-06-15 17:06:43 +03:00
c - > copygc_running = false ;
wake_up ( & c - > copygc_running_wq ) ;
2017-03-17 09:18:50 +03:00
}
2023-02-28 06:58:01 +03:00
bch2_trans_exit ( & trans ) ;
2023-03-14 18:48:07 +03:00
bch2_moving_ctxt_exit ( & ctxt ) ;
2023-02-28 06:58:01 +03:00
free_fifo ( & move_buckets ) ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
2020-07-11 23:28:54 +03:00
void bch2_copygc_stop ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
2020-07-11 23:28:54 +03:00
if ( c - > copygc_thread ) {
kthread_stop ( c - > copygc_thread ) ;
put_task_struct ( c - > copygc_thread ) ;
2017-03-17 09:18:50 +03:00
}
2020-07-11 23:28:54 +03:00
c - > copygc_thread = NULL ;
2017-03-17 09:18:50 +03:00
}
2020-07-11 23:28:54 +03:00
int bch2_copygc_start ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
struct task_struct * t ;
2022-07-19 02:42:58 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2020-07-11 23:28:54 +03:00
if ( c - > copygc_thread )
2019-05-24 21:45:33 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
if ( c - > opts . nochanges )
return 0 ;
if ( bch2_fs_init_fault ( " copygc_start " ) )
return - ENOMEM ;
2020-11-20 04:55:33 +03:00
t = kthread_create ( bch2_copygc_thread , c , " bch-copygc/%s " , c - > name ) ;
2022-07-19 02:42:58 +03:00
ret = PTR_ERR_OR_ZERO ( t ) ;
if ( ret ) {
bch_err ( c , " error creating copygc thread: %s " , bch2_err_str ( ret ) ) ;
return ret ;
2021-02-23 23:16:41 +03:00
}
2017-03-17 09:18:50 +03:00
get_task_struct ( t ) ;
2020-07-11 23:28:54 +03:00
c - > copygc_thread = t ;
wake_up_process ( c - > copygc_thread ) ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
2020-07-11 23:28:54 +03:00
void bch2_fs_copygc_init ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
2022-06-15 17:06:43 +03:00
init_waitqueue_head ( & c - > copygc_running_wq ) ;
c - > copygc_running = false ;
2017-03-17 09:18:50 +03:00
}