2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2022-03-18 07:42:09 +03:00
# include "alloc_background.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2022-03-18 07:42:09 +03:00
# include "backpointers.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2017-03-17 09:18:50 +03:00
# include "btree_gc.h"
# include "btree_update.h"
2019-01-21 23:32:13 +03:00
# include "btree_update_interior.h"
2022-03-18 07:42:09 +03:00
# include "btree_write_buffer.h"
2018-11-05 07:10:09 +03:00
# include "disk_groups.h"
2021-10-29 23:29:13 +03:00
# include "ec.h"
2022-07-19 02:42:58 +03:00
# include "errcode.h"
2022-03-18 07:42:09 +03:00
# include "error.h"
2017-03-17 09:18:50 +03:00
# include "inode.h"
2023-09-11 01:05:17 +03:00
# include "io_read.h"
# include "io_write.h"
2017-03-17 09:18:50 +03:00
# include "journal_reclaim.h"
# include "keylist.h"
# include "move.h"
# include "replicas.h"
2023-10-21 22:03:05 +03:00
# include "snapshot.h"
2017-03-17 09:18:50 +03:00
# include "super-io.h"
# include "trace.h"
# include <linux/ioprio.h>
# include <linux/kthread.h>
2023-11-21 02:43:48 +03:00
const char * const bch2_data_ops_strs [ ] = {
# define x(t, n, ...) [n] = #t,
BCH_DATA_OPS ( )
# undef x
NULL
} ;
2023-04-20 22:24:07 +03:00
static void trace_move_extent2 ( struct bch_fs * c , struct bkey_s_c k )
{
if ( trace_move_extent_enabled ( ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
trace_move_extent ( c , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
}
static void trace_move_extent_read2 ( struct bch_fs * c , struct bkey_s_c k )
{
if ( trace_move_extent_read_enabled ( ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
trace_move_extent_read ( c , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
}
2017-03-17 09:18:50 +03:00
struct moving_io {
2023-03-12 04:38:46 +03:00
struct list_head read_list ;
struct list_head io_list ;
2023-02-28 06:58:01 +03:00
struct move_bucket_in_flight * b ;
struct closure cl ;
bool read_completed ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
unsigned read_sectors ;
unsigned write_sectors ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
struct bch_read_bio rbio ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
struct data_update write ;
2017-03-17 09:18:50 +03:00
/* Must be last since it is variable size */
2023-11-28 21:22:55 +03:00
struct bio_vec bi_inline_vecs [ ] ;
2017-03-17 09:18:50 +03:00
} ;
2022-10-29 09:47:33 +03:00
static void move_free ( struct moving_io * io )
2017-03-17 09:18:50 +03:00
{
struct moving_context * ctxt = io - > write . ctxt ;
2023-02-28 06:58:01 +03:00
if ( io - > b )
atomic_dec ( & io - > b - > count ) ;
2022-06-14 02:17:45 +03:00
bch2_data_update_exit ( & io - > write ) ;
2023-03-12 04:38:46 +03:00
mutex_lock ( & ctxt - > lock ) ;
list_del ( & io - > io_list ) ;
2017-03-17 09:18:50 +03:00
wake_up ( & ctxt - > wait ) ;
2023-03-12 04:38:46 +03:00
mutex_unlock ( & ctxt - > lock ) ;
2017-03-17 09:18:50 +03:00
kfree ( io ) ;
}
2022-10-29 09:47:33 +03:00
static void move_write_done ( struct bch_write_op * op )
2017-03-17 09:18:50 +03:00
{
2022-10-29 09:47:33 +03:00
struct moving_io * io = container_of ( op , struct moving_io , write . op ) ;
struct moving_context * ctxt = io - > write . ctxt ;
2017-03-17 09:18:50 +03:00
2022-03-18 07:42:09 +03:00
if ( io - > write . op . error )
ctxt - > write_error = true ;
2017-03-17 09:18:50 +03:00
atomic_sub ( io - > write_sectors , & io - > write . ctxt - > write_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_dec ( & io - > write . ctxt - > write_ios ) ;
2022-10-29 09:47:33 +03:00
move_free ( io ) ;
closure_put ( & ctxt - > cl ) ;
2017-03-17 09:18:50 +03:00
}
2022-10-29 09:47:33 +03:00
static void move_write ( struct moving_io * io )
2017-03-17 09:18:50 +03:00
{
if ( unlikely ( io - > rbio . bio . bi_status | | io - > rbio . hole ) ) {
2022-10-29 09:47:33 +03:00
move_free ( io ) ;
2017-03-17 09:18:50 +03:00
return ;
}
2022-10-29 09:47:33 +03:00
closure_get ( & io - > write . ctxt - > cl ) ;
2017-03-17 09:18:50 +03:00
atomic_add ( io - > write_sectors , & io - > write . ctxt - > write_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_inc ( & io - > write . ctxt - > write_ios ) ;
2022-10-29 09:47:33 +03:00
2022-06-14 02:17:45 +03:00
bch2_data_update_read_done ( & io - > write , io - > rbio . pick . crc ) ;
2017-03-17 09:18:50 +03:00
}
2023-01-03 01:53:02 +03:00
struct moving_io * bch2_moving_ctxt_next_pending_write ( struct moving_context * ctxt )
2017-03-17 09:18:50 +03:00
{
struct moving_io * io =
2023-03-12 04:38:46 +03:00
list_first_entry_or_null ( & ctxt - > reads , struct moving_io , read_list ) ;
2017-03-17 09:18:50 +03:00
return io & & io - > read_completed ? io : NULL ;
}
static void move_read_endio ( struct bio * bio )
{
struct moving_io * io = container_of ( bio , struct moving_io , rbio . bio ) ;
struct moving_context * ctxt = io - > write . ctxt ;
atomic_sub ( io - > read_sectors , & ctxt - > read_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_dec ( & ctxt - > read_ios ) ;
2017-03-17 09:18:50 +03:00
io - > read_completed = true ;
2022-02-21 21:22:11 +03:00
wake_up ( & ctxt - > wait ) ;
2017-03-17 09:18:50 +03:00
closure_put ( & ctxt - > cl ) ;
}
2023-10-20 20:32:42 +03:00
void bch2_moving_ctxt_do_pending_writes ( struct moving_context * ctxt )
2017-03-17 09:18:50 +03:00
{
struct moving_io * io ;
2023-01-03 01:53:02 +03:00
while ( ( io = bch2_moving_ctxt_next_pending_write ( ctxt ) ) ) {
2023-10-30 22:13:09 +03:00
bch2_trans_unlock_long ( ctxt - > trans ) ;
2023-03-12 04:38:46 +03:00
list_del ( & io - > read_list ) ;
2022-10-29 09:47:33 +03:00
move_write ( io ) ;
2017-03-17 09:18:50 +03:00
}
}
2023-10-20 20:32:42 +03:00
void bch2_move_ctxt_wait_for_io ( struct moving_context * ctxt )
2017-03-17 09:18:50 +03:00
{
unsigned sectors_pending = atomic_read ( & ctxt - > write_sectors ) ;
2023-10-20 20:32:42 +03:00
move_ctxt_wait_event ( ctxt ,
2017-03-17 09:18:50 +03:00
! atomic_read ( & ctxt - > write_sectors ) | |
atomic_read ( & ctxt - > write_sectors ) ! = sectors_pending ) ;
}
2023-11-26 09:33:31 +03:00
void bch2_moving_ctxt_flush_all ( struct moving_context * ctxt )
2023-11-21 01:24:32 +03:00
{
move_ctxt_wait_event ( ctxt , list_empty ( & ctxt - > reads ) ) ;
bch2_trans_unlock_long ( ctxt - > trans ) ;
closure_sync ( & ctxt - > cl ) ;
}
2022-06-20 22:40:26 +03:00
void bch2_moving_ctxt_exit ( struct moving_context * ctxt )
{
2023-10-20 20:32:42 +03:00
struct bch_fs * c = ctxt - > trans - > c ;
2023-03-12 04:38:46 +03:00
2023-11-21 01:24:32 +03:00
bch2_moving_ctxt_flush_all ( ctxt ) ;
2023-01-09 09:45:18 +03:00
2022-06-20 22:40:26 +03:00
EBUG_ON ( atomic_read ( & ctxt - > write_sectors ) ) ;
2023-01-09 09:45:18 +03:00
EBUG_ON ( atomic_read ( & ctxt - > write_ios ) ) ;
EBUG_ON ( atomic_read ( & ctxt - > read_sectors ) ) ;
EBUG_ON ( atomic_read ( & ctxt - > read_ios ) ) ;
2022-06-20 22:40:26 +03:00
2023-03-12 04:38:46 +03:00
mutex_lock ( & c - > moving_context_lock ) ;
list_del ( & ctxt - > list ) ;
mutex_unlock ( & c - > moving_context_lock ) ;
2023-10-20 20:32:42 +03:00
bch2_trans_put ( ctxt - > trans ) ;
memset ( ctxt , 0 , sizeof ( * ctxt ) ) ;
2022-06-20 22:40:26 +03:00
}
void bch2_moving_ctxt_init ( struct moving_context * ctxt ,
struct bch_fs * c ,
struct bch_ratelimit * rate ,
struct bch_move_stats * stats ,
struct write_point_specifier wp ,
bool wait_on_copygc )
{
memset ( ctxt , 0 , sizeof ( * ctxt ) ) ;
2023-10-20 20:32:42 +03:00
ctxt - > trans = bch2_trans_get ( c ) ;
2023-03-12 04:38:46 +03:00
ctxt - > fn = ( void * ) _RET_IP_ ;
2022-06-20 22:40:26 +03:00
ctxt - > rate = rate ;
ctxt - > stats = stats ;
ctxt - > wp = wp ;
ctxt - > wait_on_copygc = wait_on_copygc ;
closure_init_stack ( & ctxt - > cl ) ;
2023-03-12 04:38:46 +03:00
mutex_init ( & ctxt - > lock ) ;
2022-06-20 22:40:26 +03:00
INIT_LIST_HEAD ( & ctxt - > reads ) ;
2023-03-12 04:38:46 +03:00
INIT_LIST_HEAD ( & ctxt - > ios ) ;
2022-06-20 22:40:26 +03:00
init_waitqueue_head ( & ctxt - > wait ) ;
2023-03-12 04:38:46 +03:00
mutex_lock ( & c - > moving_context_lock ) ;
list_add ( & ctxt - > list , & c - > moving_context_list ) ;
mutex_unlock ( & c - > moving_context_lock ) ;
2023-10-23 23:21:54 +03:00
}
2023-03-12 04:38:46 +03:00
2023-10-23 23:21:54 +03:00
void bch2_move_stats_exit ( struct bch_move_stats * stats , struct bch_fs * c )
{
trace_move_data ( c , stats ) ;
2022-06-20 22:40:26 +03:00
}
2023-11-21 02:43:48 +03:00
void bch2_move_stats_init ( struct bch_move_stats * stats , const char * name )
2022-06-20 22:40:26 +03:00
{
memset ( stats , 0 , sizeof ( * stats ) ) ;
2023-10-23 23:21:54 +03:00
stats - > data_type = BCH_DATA_user ;
2022-06-20 22:40:26 +03:00
scnprintf ( stats - > name , sizeof ( stats - > name ) , " %s " , name ) ;
}
2023-10-20 20:32:42 +03:00
int bch2_move_extent ( struct moving_context * ctxt ,
2023-10-20 20:32:42 +03:00
struct move_bucket_in_flight * bucket_in_flight ,
2023-10-20 20:32:42 +03:00
struct btree_iter * iter ,
2023-10-20 20:32:42 +03:00
struct bkey_s_c k ,
2023-10-20 20:32:42 +03:00
struct bch_io_opts io_opts ,
2023-10-20 20:32:42 +03:00
struct data_update_opts data_opts )
2017-03-17 09:18:50 +03:00
{
2023-10-20 20:32:42 +03:00
struct btree_trans * trans = ctxt - > trans ;
2020-10-17 04:39:16 +03:00
struct bch_fs * c = trans - > c ;
2019-07-25 20:52:14 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2017-03-17 09:18:50 +03:00
struct moving_io * io ;
2018-09-28 04:08:39 +03:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2019-07-25 20:52:14 +03:00
unsigned sectors = k . k - > size , pages ;
2017-03-17 09:18:50 +03:00
int ret = - ENOMEM ;
2023-10-23 23:21:54 +03:00
if ( ctxt - > stats )
ctxt - > stats - > pos = BBPOS ( iter - > btree_id , iter - > pos ) ;
2023-04-20 22:24:07 +03:00
trace_move_extent2 ( c , k ) ;
2022-10-09 10:32:17 +03:00
bch2_data_update_opts_normalize ( k , & data_opts ) ;
if ( ! data_opts . rewrite_ptrs & &
! data_opts . extra_replicas ) {
if ( data_opts . kill_ptrs )
return bch2_extent_drop_ptrs ( trans , iter , k , data_opts ) ;
return 0 ;
}
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-03 00:12:00 +03:00
/*
* Before memory allocations & taking nocow locks in
* bch2_data_update_init ( ) :
*/
bch2_trans_unlock ( trans ) ;
2017-03-17 09:18:50 +03:00
/* write path might have to decompress data: */
2019-07-25 20:52:14 +03:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
2018-09-28 04:08:39 +03:00
sectors = max_t ( unsigned , sectors , p . crc . uncompressed_size ) ;
2017-03-17 09:18:50 +03:00
pages = DIV_ROUND_UP ( sectors , PAGE_SECTORS ) ;
io = kzalloc ( sizeof ( struct moving_io ) +
sizeof ( struct bio_vec ) * pages , GFP_KERNEL ) ;
if ( ! io )
goto err ;
2023-03-12 04:38:46 +03:00
INIT_LIST_HEAD ( & io - > io_list ) ;
2017-03-17 09:18:50 +03:00
io - > write . ctxt = ctxt ;
2019-07-25 20:52:14 +03:00
io - > read_sectors = k . k - > size ;
io - > write_sectors = k . k - > size ;
2017-03-17 09:18:50 +03:00
bio_init ( & io - > write . op . wbio . bio , NULL , io - > bi_inline_vecs , pages , 0 ) ;
bio_set_prio ( & io - > write . op . wbio . bio ,
IOPRIO_PRIO_VALUE ( IOPRIO_CLASS_IDLE , 0 ) ) ;
if ( bch2_bio_alloc_pages ( & io - > write . op . wbio . bio , sectors < < 9 ,
GFP_KERNEL ) )
goto err_free ;
2019-09-07 20:16:41 +03:00
io - > rbio . c = c ;
io - > rbio . opts = io_opts ;
2017-03-17 09:18:50 +03:00
bio_init ( & io - > rbio . bio , NULL , io - > bi_inline_vecs , pages , 0 ) ;
io - > rbio . bio . bi_vcnt = pages ;
bio_set_prio ( & io - > rbio . bio , IOPRIO_PRIO_VALUE ( IOPRIO_CLASS_IDLE , 0 ) ) ;
io - > rbio . bio . bi_iter . bi_size = sectors < < 9 ;
io - > rbio . bio . bi_opf = REQ_OP_READ ;
2019-07-25 20:52:14 +03:00
io - > rbio . bio . bi_iter . bi_sector = bkey_start_offset ( k . k ) ;
2017-03-17 09:18:50 +03:00
io - > rbio . bio . bi_end_io = move_read_endio ;
2023-11-25 05:51:45 +03:00
ret = bch2_data_update_init ( trans , iter , ctxt , & io - > write , ctxt - > wp ,
2023-10-20 20:32:42 +03:00
io_opts , data_opts , iter - > btree_id , k ) ;
2023-11-25 05:51:45 +03:00
if ( ret )
2017-03-17 09:18:50 +03:00
goto err_free_pages ;
2023-03-04 10:51:12 +03:00
io - > write . op . end_io = move_write_done ;
2023-10-20 20:32:42 +03:00
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate , k . k - > size ) ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats ) {
atomic64_inc ( & ctxt - > stats - > keys_moved ) ;
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_moved ) ;
}
2023-02-28 06:58:01 +03:00
if ( bucket_in_flight ) {
io - > b = bucket_in_flight ;
atomic_inc ( & io - > b - > count ) ;
}
2022-03-15 11:36:33 +03:00
this_cpu_add ( c - > counters [ BCH_COUNTER_io_move ] , k . k - > size ) ;
2022-08-27 19:48:36 +03:00
this_cpu_add ( c - > counters [ BCH_COUNTER_move_extent_read ] , k . k - > size ) ;
2023-04-20 22:24:07 +03:00
trace_move_extent_read2 ( c , k ) ;
2023-03-12 04:38:46 +03:00
mutex_lock ( & ctxt - > lock ) ;
2017-03-17 09:18:50 +03:00
atomic_add ( io - > read_sectors , & ctxt - > read_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_inc ( & ctxt - > read_ios ) ;
2023-03-12 04:38:46 +03:00
list_add_tail ( & io - > read_list , & ctxt - > reads ) ;
list_add_tail ( & io - > io_list , & ctxt - > ios ) ;
mutex_unlock ( & ctxt - > lock ) ;
2017-03-17 09:18:50 +03:00
/*
* dropped by move_read_endio ( ) - guards against use after free of
* ctxt when doing wakeup
*/
closure_get ( & ctxt - > cl ) ;
2021-03-15 04:30:08 +03:00
bch2_read_extent ( trans , & io - > rbio ,
bkey_start_pos ( k . k ) ,
2023-10-20 20:32:42 +03:00
iter - > btree_id , k , 0 ,
2017-03-17 09:18:50 +03:00
BCH_READ_NODECODE |
BCH_READ_LAST_FRAGMENT ) ;
return 0 ;
err_free_pages :
bio_free_pages ( & io - > write . op . wbio . bio ) ;
err_free :
kfree ( io ) ;
err :
2023-11-25 05:51:45 +03:00
if ( ret = = - BCH_ERR_data_update_done )
return 0 ;
2023-11-27 07:11:18 +03:00
if ( bch2_err_matches ( ret , EROFS ) | |
bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ret ;
2023-11-28 06:37:27 +03:00
count_event ( c , move_extent_start_fail ) ;
2023-11-27 05:13:54 +03:00
if ( trace_move_extent_start_fail_enabled ( ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
prt_str ( & buf , " : " ) ;
prt_str ( & buf , bch2_err_str ( ret ) ) ;
trace_move_extent_start_fail ( c , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
2017-03-17 09:18:50 +03:00
return ret ;
}
2023-10-21 22:03:05 +03:00
struct bch_io_opts * bch2_move_get_io_opts ( struct btree_trans * trans ,
struct per_snapshot_io_opts * io_opts ,
struct bkey_s_c extent_k )
{
struct bch_fs * c = trans - > c ;
u32 restart_count = trans - > restart_count ;
int ret = 0 ;
if ( io_opts - > cur_inum ! = extent_k . k - > p . inode ) {
io_opts - > d . nr = 0 ;
2023-12-08 07:33:11 +03:00
ret = for_each_btree_key ( trans , iter , BTREE_ID_inodes , POS ( 0 , extent_k . k - > p . inode ) ,
BTREE_ITER_ALL_SNAPSHOTS , k , ( {
2023-10-21 22:03:05 +03:00
if ( k . k - > p . offset ! = extent_k . k - > p . inode )
break ;
if ( ! bkey_is_inode ( k . k ) )
continue ;
struct bch_inode_unpacked inode ;
BUG_ON ( bch2_inode_unpack ( k , & inode ) ) ;
struct snapshot_io_opts_entry e = { . snapshot = k . k - > p . snapshot } ;
bch2_inode_opts_get ( & e . io_opts , trans - > c , & inode ) ;
2023-12-08 07:28:26 +03:00
darray_push ( & io_opts - > d , e ) ;
} ) ) ;
2023-10-21 22:03:05 +03:00
io_opts - > cur_inum = extent_k . k - > p . inode ;
}
ret = ret ? : trans_was_restarted ( trans , restart_count ) ;
if ( ret )
return ERR_PTR ( ret ) ;
2023-12-17 05:40:26 +03:00
if ( extent_k . k - > p . snapshot )
2023-10-21 22:03:05 +03:00
darray_for_each ( io_opts - > d , i )
if ( bch2_snapshot_is_ancestor ( c , extent_k . k - > p . snapshot , i - > snapshot ) )
return & i - > io_opts ;
return & io_opts - > fs_io_opts ;
}
2023-10-20 20:32:42 +03:00
int bch2_move_get_io_opts_one ( struct btree_trans * trans ,
struct bch_io_opts * io_opts ,
struct bkey_s_c extent_k )
2021-03-17 01:08:10 +03:00
{
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-03-17 01:08:10 +03:00
struct bkey_s_c k ;
int ret ;
2023-10-21 22:03:05 +03:00
/* reflink btree? */
if ( ! extent_k . k - > p . inode ) {
* io_opts = bch2_opts_to_inode_opts ( trans - > c - > opts ) ;
return 0 ;
}
k = bch2_bkey_get_iter ( trans , & iter , BTREE_ID_inodes ,
SPOS ( 0 , extent_k . k - > p . inode , extent_k . k - > p . snapshot ) ,
BTREE_ITER_CACHED ) ;
2021-03-17 01:08:10 +03:00
ret = bkey_err ( k ) ;
2023-10-21 22:03:05 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ret ;
2021-03-17 01:08:10 +03:00
2023-10-21 22:03:05 +03:00
if ( ! ret & & bkey_is_inode ( k . k ) ) {
struct bch_inode_unpacked inode ;
bch2_inode_unpack ( k , & inode ) ;
bch2_inode_opts_get ( io_opts , trans - > c , & inode ) ;
} else {
* io_opts = bch2_opts_to_inode_opts ( trans - > c - > opts ) ;
2021-05-24 01:42:51 +03:00
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2023-10-21 22:03:05 +03:00
return 0 ;
2021-03-17 01:08:10 +03:00
}
2023-10-20 20:32:42 +03:00
int bch2_move_ratelimit ( struct moving_context * ctxt )
2022-06-15 17:06:43 +03:00
{
2023-10-20 20:32:42 +03:00
struct bch_fs * c = ctxt - > trans - > c ;
2023-11-29 00:33:52 +03:00
bool is_kthread = current - > flags & PF_KTHREAD ;
2022-06-15 17:06:43 +03:00
u64 delay ;
2023-11-21 01:24:32 +03:00
if ( ctxt - > wait_on_copygc & & c - > copygc_running ) {
bch2_moving_ctxt_flush_all ( ctxt ) ;
2022-06-15 17:06:43 +03:00
wait_event_killable ( c - > copygc_running_wq ,
! c - > copygc_running | |
2023-11-29 00:33:52 +03:00
( is_kthread & & kthread_should_stop ( ) ) ) ;
2022-06-15 17:06:43 +03:00
}
do {
2022-06-20 22:40:26 +03:00
delay = ctxt - > rate ? bch2_ratelimit_delay ( ctxt - > rate ) : 0 ;
2022-06-15 17:06:43 +03:00
2023-11-29 00:33:52 +03:00
if ( is_kthread & & kthread_should_stop ( ) )
2022-06-15 17:06:43 +03:00
return 1 ;
if ( delay )
2023-11-23 07:44:47 +03:00
move_ctxt_wait_event_timeout ( ctxt ,
2023-11-29 00:33:52 +03:00
freezing ( current ) | |
( is_kthread & & kthread_should_stop ( ) ) ,
2023-11-23 07:44:47 +03:00
delay ) ;
2022-06-15 17:06:43 +03:00
if ( unlikely ( freezing ( current ) ) ) {
2023-11-21 01:24:32 +03:00
bch2_moving_ctxt_flush_all ( ctxt ) ;
2022-06-15 17:06:43 +03:00
try_to_freeze ( ) ;
}
} while ( delay ) ;
2023-01-09 09:45:18 +03:00
/*
* XXX : these limits really ought to be per device , SSDs and hard drives
* will want different limits
*/
2023-10-20 20:32:42 +03:00
move_ctxt_wait_event ( ctxt ,
2023-01-09 09:45:18 +03:00
atomic_read ( & ctxt - > write_sectors ) < c - > opts . move_bytes_in_flight > > 9 & &
atomic_read ( & ctxt - > read_sectors ) < c - > opts . move_bytes_in_flight > > 9 & &
atomic_read ( & ctxt - > write_ios ) < c - > opts . move_ios_in_flight & &
atomic_read ( & ctxt - > read_ios ) < c - > opts . move_ios_in_flight ) ;
2022-06-15 17:06:43 +03:00
return 0 ;
}
2023-10-20 20:32:42 +03:00
static int bch2_move_data_btree ( struct moving_context * ctxt ,
struct bpos start ,
struct bpos end ,
move_pred_fn pred , void * arg ,
enum btree_id btree_id )
2017-03-17 09:18:50 +03:00
{
2023-10-20 20:32:42 +03:00
struct btree_trans * trans = ctxt - > trans ;
struct bch_fs * c = trans - > c ;
2023-10-21 22:03:05 +03:00
struct per_snapshot_io_opts snapshot_io_opts ;
struct bch_io_opts * io_opts ;
2020-12-17 23:08:58 +03:00
struct bkey_buf sk ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2022-06-14 02:17:45 +03:00
struct data_update_opts data_opts ;
2017-03-17 09:18:50 +03:00
int ret = 0 , ret2 ;
2023-10-21 22:03:05 +03:00
per_snapshot_io_opts_init ( & snapshot_io_opts , c ) ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_init ( & sk ) ;
2019-03-25 22:10:15 +03:00
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats ) {
ctxt - > stats - > data_type = BCH_DATA_user ;
2023-10-23 22:36:45 +03:00
ctxt - > stats - > pos = BBPOS ( btree_id , start ) ;
2023-03-04 10:51:12 +03:00
}
2019-03-25 22:10:15 +03:00
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & iter , btree_id , start ,
2021-03-16 07:28:17 +03:00
BTREE_ITER_PREFETCH |
BTREE_ITER_ALL_SNAPSHOTS ) ;
2017-03-17 09:18:50 +03:00
2022-06-20 22:40:26 +03:00
if ( ctxt - > rate )
bch2_ratelimit_reset ( ctxt - > rate ) ;
2017-03-17 09:18:50 +03:00
2023-10-20 20:32:42 +03:00
while ( ! bch2_move_ratelimit ( ctxt ) ) {
2023-09-13 00:16:02 +03:00
bch2_trans_begin ( trans ) ;
2021-07-25 03:24:10 +03:00
2021-08-30 22:18:31 +03:00
k = bch2_btree_iter_peek ( & iter ) ;
2017-03-17 09:18:50 +03:00
if ( ! k . k )
break ;
2022-01-10 04:52:10 +03:00
2019-03-28 05:03:30 +03:00
ret = bkey_err ( k ) ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2022-01-10 04:52:10 +03:00
continue ;
2017-03-17 09:18:50 +03:00
if ( ret )
break ;
2022-01-10 04:52:10 +03:00
2022-11-24 11:12:22 +03:00
if ( bkey_ge ( bkey_start_pos ( k . k ) , end ) )
2017-03-17 09:18:50 +03:00
break ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats )
2023-10-23 22:36:45 +03:00
ctxt - > stats - > pos = BBPOS ( iter . btree_id , iter . pos ) ;
2022-01-10 04:52:10 +03:00
2019-09-07 23:13:20 +03:00
if ( ! bkey_extent_is_direct_data ( k . k ) )
2017-03-17 09:18:50 +03:00
goto next_nondata ;
2023-10-21 22:03:05 +03:00
io_opts = bch2_move_get_io_opts ( trans , & snapshot_io_opts , k ) ;
ret = PTR_ERR_OR_ZERO ( io_opts ) ;
2022-03-18 07:42:09 +03:00
if ( ret )
continue ;
2017-03-17 09:18:50 +03:00
2022-06-14 02:17:45 +03:00
memset ( & data_opts , 0 , sizeof ( data_opts ) ) ;
2023-10-21 22:03:05 +03:00
if ( ! pred ( c , arg , k , io_opts , & data_opts ) )
2017-03-17 09:18:50 +03:00
goto next ;
2022-02-15 08:06:59 +03:00
/*
* The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @ k elsewhere :
2022-10-20 01:31:33 +03:00
*/
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
2019-11-10 00:01:15 +03:00
k = bkey_i_to_s_c ( sk . k ) ;
2017-03-17 09:18:50 +03:00
2023-10-20 20:32:42 +03:00
ret2 = bch2_move_extent ( ctxt , NULL , & iter , k , * io_opts , data_opts ) ;
2017-03-17 09:18:50 +03:00
if ( ret2 ) {
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret2 , BCH_ERR_transaction_restart ) )
2020-12-21 05:42:19 +03:00
continue ;
2017-03-17 09:18:50 +03:00
if ( ret2 = = - ENOMEM ) {
/* memory allocation failure, wait for some IO to finish */
2023-10-20 20:32:42 +03:00
bch2_move_ctxt_wait_for_io ( ctxt ) ;
2017-03-17 09:18:50 +03:00
continue ;
}
/* XXX signal failure */
goto next ;
}
next :
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats )
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_seen ) ;
2017-03-17 09:18:50 +03:00
next_nondata :
2021-08-30 22:18:31 +03:00
bch2_btree_iter_advance ( & iter ) ;
2017-03-17 09:18:50 +03:00
}
2021-03-20 03:29:11 +03:00
2023-09-13 00:16:02 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_exit ( & sk , c ) ;
2023-10-21 22:03:05 +03:00
per_snapshot_io_opts_exit ( & snapshot_io_opts ) ;
2019-08-16 16:59:56 +03:00
return ret ;
}
2023-10-20 20:32:42 +03:00
int __bch2_move_data ( struct moving_context * ctxt ,
2023-10-20 20:32:42 +03:00
struct bbpos start ,
struct bbpos end ,
move_pred_fn pred , void * arg )
2019-08-16 16:59:56 +03:00
{
2023-10-20 20:32:42 +03:00
struct bch_fs * c = ctxt - > trans - > c ;
2021-03-15 02:01:14 +03:00
enum btree_id id ;
2023-09-20 08:32:20 +03:00
int ret = 0 ;
2019-08-16 16:59:56 +03:00
2023-10-20 20:32:42 +03:00
for ( id = start . btree ;
id < = min_t ( unsigned , end . btree , btree_id_nr_alive ( c ) - 1 ) ;
2021-03-15 02:01:14 +03:00
id + + ) {
2023-10-23 22:36:45 +03:00
ctxt - > stats - > pos = BBPOS ( id , POS_MIN ) ;
2021-03-15 02:01:14 +03:00
2023-10-20 20:32:42 +03:00
if ( ! btree_type_has_ptrs ( id ) | |
! bch2_btree_id_root ( c , id ) - > b )
2021-03-15 02:01:14 +03:00
continue ;
2023-10-20 20:32:42 +03:00
ret = bch2_move_data_btree ( ctxt ,
2023-10-20 20:32:42 +03:00
id = = start . btree ? start . pos : POS_MIN ,
id = = end . btree ? end . pos : POS_MAX ,
2022-06-20 22:40:26 +03:00
pred , arg , id ) ;
2021-03-15 02:01:14 +03:00
if ( ret )
break ;
}
2023-10-20 20:32:42 +03:00
return ret ;
}
int bch2_move_data ( struct bch_fs * c ,
struct bbpos start ,
struct bbpos end ,
struct bch_ratelimit * rate ,
struct bch_move_stats * stats ,
struct write_point_specifier wp ,
bool wait_on_copygc ,
move_pred_fn pred , void * arg )
{
struct moving_context ctxt ;
int ret ;
bch2_moving_ctxt_init ( & ctxt , c , rate , stats , wp , wait_on_copygc ) ;
2023-10-20 20:32:42 +03:00
ret = __bch2_move_data ( & ctxt , start , end , pred , arg ) ;
2022-06-20 22:40:26 +03:00
bch2_moving_ctxt_exit ( & ctxt ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2023-11-26 10:26:07 +03:00
int bch2_evacuate_bucket ( struct moving_context * ctxt ,
2023-02-28 06:58:01 +03:00
struct move_bucket_in_flight * bucket_in_flight ,
2022-03-18 07:42:09 +03:00
struct bpos bucket , int gen ,
struct data_update_opts _data_opts )
{
2023-10-20 20:32:42 +03:00
struct btree_trans * trans = ctxt - > trans ;
struct bch_fs * c = trans - > c ;
2023-11-29 00:33:52 +03:00
bool is_kthread = current - > flags & PF_KTHREAD ;
2022-03-18 07:42:09 +03:00
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
struct btree_iter iter ;
struct bkey_buf sk ;
struct bch_backpointer bp ;
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
struct bkey_s_c k ;
struct data_update_opts data_opts ;
unsigned dirty_sectors , bucket_size ;
2022-12-05 18:24:19 +03:00
u64 fragmentation ;
2023-03-31 23:24:45 +03:00
struct bpos bp_pos = POS_MIN ;
2022-03-18 07:42:09 +03:00
int ret = 0 ;
2023-04-20 22:24:07 +03:00
trace_bucket_evacuate ( c , & bucket ) ;
2022-03-18 07:42:09 +03:00
bch2_bkey_buf_init ( & sk ) ;
2023-03-19 21:13:17 +03:00
/*
* We ' re not run in a context that handles transaction restarts :
*/
bch2_trans_begin ( trans ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc ,
2022-03-18 07:42:09 +03:00
bucket , BTREE_ITER_CACHED ) ;
2022-12-05 18:24:19 +03:00
ret = lockrestart_do ( trans ,
2022-03-18 07:42:09 +03:00
bkey_err ( k = bch2_btree_iter_peek_slot ( & iter ) ) ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
2023-12-17 06:43:41 +03:00
bch_err_msg ( c , ret , " looking up alloc key " ) ;
if ( ret )
2022-03-18 07:42:09 +03:00
goto err ;
a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2023-11-24 02:05:18 +03:00
dirty_sectors = bch2_bucket_sectors_dirty ( * a ) ;
2022-03-18 07:42:09 +03:00
bucket_size = bch_dev_bkey_exists ( c , bucket . inode ) - > mi . bucket_size ;
2022-12-05 18:24:19 +03:00
fragmentation = a - > fragmentation_lru ;
2022-03-18 07:42:09 +03:00
2023-11-03 03:36:00 +03:00
ret = bch2_btree_write_buffer_tryflush ( trans ) ;
bch_err_msg ( c , ret , " flushing btree write buffer " ) ;
if ( ret )
2022-03-18 07:42:09 +03:00
goto err ;
2023-10-20 20:32:42 +03:00
while ( ! ( ret = bch2_move_ratelimit ( ctxt ) ) ) {
2023-11-29 00:33:52 +03:00
if ( is_kthread & & kthread_should_stop ( ) )
break ;
2022-12-05 18:24:19 +03:00
bch2_trans_begin ( trans ) ;
2022-03-18 07:42:09 +03:00
2022-12-05 18:24:19 +03:00
ret = bch2_get_next_backpointer ( trans , bucket , gen ,
2023-03-31 23:24:45 +03:00
& bp_pos , & bp ,
2022-10-14 14:02:36 +03:00
BTREE_ITER_CACHED ) ;
2022-03-18 07:42:09 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
2023-03-31 23:24:45 +03:00
if ( bkey_eq ( bp_pos , POS_MAX ) )
2022-03-18 07:42:09 +03:00
break ;
if ( ! bp . level ) {
2023-03-31 23:24:45 +03:00
k = bch2_backpointer_get_key ( trans , & iter , bp_pos , bp , 0 ) ;
2022-03-18 07:42:09 +03:00
ret = bkey_err ( k ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ! k . k )
goto next ;
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
k = bkey_i_to_s_c ( sk . k ) ;
2023-10-21 22:03:05 +03:00
ret = bch2_move_get_io_opts_one ( trans , & io_opts , k ) ;
2022-03-18 07:42:09 +03:00
if ( ret ) {
2022-12-05 18:24:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
continue ;
}
data_opts = _data_opts ;
data_opts . target = io_opts . background_target ;
data_opts . rewrite_ptrs = 0 ;
2023-12-21 23:47:15 +03:00
unsigned i = 0 ;
2022-03-18 07:42:09 +03:00
bkey_for_each_ptr ( bch2_bkey_ptrs_c ( k ) , ptr ) {
2023-03-11 02:00:10 +03:00
if ( ptr - > dev = = bucket . inode ) {
2022-03-18 07:42:09 +03:00
data_opts . rewrite_ptrs | = 1U < < i ;
2023-03-11 02:00:10 +03:00
if ( ptr - > cached ) {
bch2_trans_iter_exit ( trans , & iter ) ;
goto next ;
}
}
2022-03-18 07:42:09 +03:00
i + + ;
}
2023-10-20 20:32:42 +03:00
ret = bch2_move_extent ( ctxt , bucket_in_flight ,
& iter , k , io_opts , data_opts ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret = = - ENOMEM ) {
/* memory allocation failure, wait for some IO to finish */
2023-10-20 20:32:42 +03:00
bch2_move_ctxt_wait_for_io ( ctxt ) ;
2022-03-18 07:42:09 +03:00
continue ;
}
if ( ret )
goto err ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats )
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_seen ) ;
2022-03-18 07:42:09 +03:00
} else {
struct btree * b ;
2023-03-31 23:24:45 +03:00
b = bch2_backpointer_get_node ( trans , & iter , bp_pos , bp ) ;
2022-03-18 07:42:09 +03:00
ret = PTR_ERR_OR_ZERO ( b ) ;
if ( ret = = - BCH_ERR_backpointer_to_overwritten_btree_node )
continue ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ! b )
goto next ;
2022-12-05 18:24:19 +03:00
ret = bch2_btree_node_rewrite ( trans , & iter , b , 0 ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate ,
c - > opts . btree_node_size > > 9 ) ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats ) {
atomic64_add ( c - > opts . btree_node_size > > 9 , & ctxt - > stats - > sectors_seen ) ;
atomic64_add ( c - > opts . btree_node_size > > 9 , & ctxt - > stats - > sectors_moved ) ;
}
2022-03-18 07:42:09 +03:00
}
next :
2023-03-31 23:24:45 +03:00
bp_pos = bpos_nosnap_successor ( bp_pos ) ;
2022-03-18 07:42:09 +03:00
}
2022-12-05 18:24:19 +03:00
trace_evacuate_bucket ( c , & bucket , dirty_sectors , bucket_size , fragmentation , ret ) ;
2022-03-18 07:42:09 +03:00
err :
bch2_bkey_buf_exit ( & sk , c ) ;
return ret ;
}
2022-06-14 02:17:45 +03:00
typedef bool ( * move_btree_pred ) ( struct bch_fs * , void * ,
struct btree * , struct bch_io_opts * ,
struct data_update_opts * ) ;
2021-03-15 02:01:14 +03:00
2017-03-17 09:18:50 +03:00
static int bch2_move_btree ( struct bch_fs * c ,
2023-11-21 02:52:33 +03:00
struct bbpos start ,
struct bbpos end ,
2021-03-15 02:01:14 +03:00
move_btree_pred pred , void * arg ,
2017-03-17 09:18:50 +03:00
struct bch_move_stats * stats )
{
2021-03-15 02:01:14 +03:00
bool kthread = ( current - > flags & PF_KTHREAD ) ! = 0 ;
2017-03-17 09:18:50 +03:00
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
2023-10-23 23:21:54 +03:00
struct moving_context ctxt ;
struct btree_trans * trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct btree * b ;
2023-11-21 02:52:33 +03:00
enum btree_id btree ;
2022-06-14 02:17:45 +03:00
struct data_update_opts data_opts ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2023-10-23 23:21:54 +03:00
bch2_moving_ctxt_init ( & ctxt , c , NULL , stats ,
writepoint_ptr ( & c - > btree_write_point ) ,
true ) ;
trans = ctxt . trans ;
2019-03-25 22:10:15 +03:00
2020-07-10 01:28:11 +03:00
stats - > data_type = BCH_DATA_btree ;
2017-03-17 09:18:50 +03:00
2023-11-21 02:52:33 +03:00
for ( btree = start . btree ;
btree < = min_t ( unsigned , end . btree , btree_id_nr_alive ( c ) - 1 ) ;
btree + + ) {
stats - > pos = BBPOS ( btree , POS_MIN ) ;
2019-03-25 22:10:15 +03:00
2023-11-21 02:52:33 +03:00
if ( ! bch2_btree_id_root ( c , btree ) - > b )
2023-06-29 05:09:13 +03:00
continue ;
2023-11-21 02:52:33 +03:00
bch2_trans_node_iter_init ( trans , & iter , btree , POS_MIN , 0 , 0 ,
2021-10-08 01:08:01 +03:00
BTREE_ITER_PREFETCH ) ;
2021-10-19 21:20:50 +03:00
retry :
2021-10-19 22:11:45 +03:00
ret = 0 ;
2023-09-13 00:16:02 +03:00
while ( bch2_trans_begin ( trans ) ,
2021-10-19 21:20:50 +03:00
( b = bch2_btree_iter_peek_node ( & iter ) ) & &
! ( ret = PTR_ERR_OR_ZERO ( b ) ) ) {
2021-03-15 02:01:14 +03:00
if ( kthread & & kthread_should_stop ( ) )
2021-04-21 03:21:39 +03:00
break ;
2021-03-15 02:01:14 +03:00
2023-11-21 02:52:33 +03:00
if ( ( cmp_int ( btree , end . btree ) ? :
bpos_cmp ( b - > key . k . p , end . pos ) ) > 0 )
2021-03-15 02:01:14 +03:00
break ;
2023-10-23 22:36:45 +03:00
stats - > pos = BBPOS ( iter . btree_id , iter . pos ) ;
2019-03-25 22:10:15 +03:00
2022-06-14 02:17:45 +03:00
if ( ! pred ( c , arg , b , & io_opts , & data_opts ) )
2017-03-17 09:18:50 +03:00
goto next ;
2023-09-13 00:16:02 +03:00
ret = bch2_btree_node_rewrite ( trans , & iter , b , 0 ) ? : ret ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-10-24 23:59:33 +03:00
continue ;
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
next :
2021-10-08 01:08:01 +03:00
bch2_btree_iter_next_node ( & iter ) ;
2017-03-17 09:18:50 +03:00
}
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-10-19 21:20:50 +03:00
goto retry ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2017-03-17 09:18:50 +03:00
2021-04-21 03:21:39 +03:00
if ( kthread & & kthread_should_stop ( ) )
break ;
2017-03-17 09:18:50 +03:00
}
2021-04-21 03:21:39 +03:00
2023-10-23 23:21:54 +03:00
bch_err_fn ( c , ret ) ;
bch2_moving_ctxt_exit ( & ctxt ) ;
2022-04-18 00:30:49 +03:00
bch2_btree_interior_updates_flush ( c ) ;
2021-10-25 00:00:33 +03:00
2017-03-17 09:18:50 +03:00
return ret ;
}
2022-06-14 02:17:45 +03:00
static bool rereplicate_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2017-03-17 09:18:50 +03:00
{
2018-11-01 22:10:01 +03:00
unsigned nr_good = bch2_bkey_durability ( c , k ) ;
2021-10-12 21:15:45 +03:00
unsigned replicas = bkey_is_btree_ptr ( k . k )
? c - > opts . metadata_replicas
: io_opts - > data_replicas ;
2017-03-17 09:18:50 +03:00
if ( ! nr_good | | nr_good > = replicas )
2022-06-14 02:17:45 +03:00
return false ;
2017-03-17 09:18:50 +03:00
data_opts - > target = 0 ;
2022-06-14 02:17:45 +03:00
data_opts - > extra_replicas = replicas - nr_good ;
2018-11-01 22:10:01 +03:00
data_opts - > btree_insert_flags = 0 ;
2022-06-14 02:17:45 +03:00
return true ;
2017-03-17 09:18:50 +03:00
}
2022-06-14 02:17:45 +03:00
static bool migrate_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2017-03-17 09:18:50 +03:00
{
2022-06-14 02:17:45 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2017-03-17 09:18:50 +03:00
struct bch_ioctl_data * op = arg ;
2022-06-14 02:17:45 +03:00
unsigned i = 0 ;
2017-03-17 09:18:50 +03:00
2022-06-14 02:17:45 +03:00
data_opts - > rewrite_ptrs = 0 ;
2017-03-17 09:18:50 +03:00
data_opts - > target = 0 ;
2022-06-14 02:17:45 +03:00
data_opts - > extra_replicas = 0 ;
2017-03-17 09:18:50 +03:00
data_opts - > btree_insert_flags = 0 ;
2022-06-14 02:17:45 +03:00
bkey_for_each_ptr ( ptrs , ptr ) {
if ( ptr - > dev = = op - > migrate . dev )
data_opts - > rewrite_ptrs | = 1U < < i ;
i + + ;
}
2022-10-20 01:31:33 +03:00
return data_opts - > rewrite_ptrs ! = 0 ;
2017-03-17 09:18:50 +03:00
}
2022-06-14 02:17:45 +03:00
static bool rereplicate_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-15 02:01:14 +03:00
{
return rereplicate_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2022-06-14 02:17:45 +03:00
static bool migrate_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-15 02:01:14 +03:00
{
return migrate_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2021-03-21 06:55:36 +03:00
static bool bformat_needs_redo ( struct bkey_format * f )
{
unsigned i ;
for ( i = 0 ; i < f - > nr_fields ; i + + ) {
unsigned unpacked_bits = bch2_bkey_format_current . bits_per_field [ i ] ;
u64 unpacked_mask = ~ ( ( ~ 0ULL < < 1 ) < < ( unpacked_bits - 1 ) ) ;
u64 field_offset = le64_to_cpu ( f - > field_offset [ i ] ) ;
if ( f - > bits_per_field [ i ] > unpacked_bits )
return true ;
if ( ( f - > bits_per_field [ i ] = = unpacked_bits ) & & field_offset )
return true ;
if ( ( ( field_offset + ( ( 1ULL < < f - > bits_per_field [ i ] ) - 1 ) ) &
unpacked_mask ) <
field_offset )
return true ;
}
return false ;
}
2022-06-14 02:17:45 +03:00
static bool rewrite_old_nodes_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-15 02:01:14 +03:00
{
if ( b - > version_ondisk ! = c - > sb . version | |
2021-03-21 06:55:36 +03:00
btree_node_need_rewrite ( b ) | |
bformat_needs_redo ( & b - > format ) ) {
2021-03-15 02:01:14 +03:00
data_opts - > target = 0 ;
2022-06-14 02:17:45 +03:00
data_opts - > extra_replicas = 0 ;
2021-03-15 02:01:14 +03:00
data_opts - > btree_insert_flags = 0 ;
2022-06-14 02:17:45 +03:00
return true ;
2021-03-15 02:01:14 +03:00
}
2022-06-14 02:17:45 +03:00
return false ;
2021-03-15 02:01:14 +03:00
}
2021-03-23 01:39:16 +03:00
int bch2_scan_old_btree_nodes ( struct bch_fs * c , struct bch_move_stats * stats )
{
int ret ;
ret = bch2_move_btree ( c ,
2023-11-21 02:52:33 +03:00
BBPOS_MIN ,
BBPOS_MAX ,
2021-03-23 01:39:16 +03:00
rewrite_old_nodes_pred , c , stats ) ;
if ( ! ret ) {
mutex_lock ( & c - > sb_lock ) ;
2021-05-24 00:04:13 +03:00
c - > disk_sb . sb - > compat [ 0 ] | = cpu_to_le64 ( 1ULL < < BCH_COMPAT_extents_above_btree_updates_done ) ;
c - > disk_sb . sb - > compat [ 0 ] | = cpu_to_le64 ( 1ULL < < BCH_COMPAT_bformat_overflow_done ) ;
2021-03-23 01:39:16 +03:00
c - > disk_sb . sb - > version_min = c - > disk_sb . sb - > version ;
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
}
2023-10-23 23:21:54 +03:00
bch_err_fn ( c , ret ) ;
2021-03-23 01:39:16 +03:00
return ret ;
}
2023-11-21 03:12:40 +03:00
static bool drop_extra_replicas_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
{
unsigned durability = bch2_bkey_durability ( c , k ) ;
unsigned replicas = bkey_is_btree_ptr ( k . k )
? c - > opts . metadata_replicas
: io_opts - > data_replicas ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
unsigned i = 0 ;
bkey_for_each_ptr_decode ( k . k , bch2_bkey_ptrs_c ( k ) , p , entry ) {
unsigned d = bch2_extent_ptr_durability ( c , & p ) ;
if ( d & & durability - d > = replicas ) {
data_opts - > kill_ptrs | = BIT ( i ) ;
durability - = d ;
}
i + + ;
}
return data_opts - > kill_ptrs ! = 0 ;
}
static bool drop_extra_replicas_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
{
return drop_extra_replicas_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2017-03-17 09:18:50 +03:00
int bch2_data_job ( struct bch_fs * c ,
struct bch_move_stats * stats ,
struct bch_ioctl_data op )
{
2023-11-21 02:52:33 +03:00
struct bbpos start = BBPOS ( op . start_btree , op . start_pos ) ;
struct bbpos end = BBPOS ( op . end_btree , op . end_pos ) ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2023-11-21 02:43:48 +03:00
if ( op . op > = BCH_DATA_OP_NR )
return - EINVAL ;
bch2_move_stats_init ( stats , bch2_data_ops_strs [ op . op ] ) ;
2017-03-17 09:18:50 +03:00
switch ( op . op ) {
2023-11-21 02:43:48 +03:00
case BCH_DATA_OP_rereplicate :
2020-07-10 01:28:11 +03:00
stats - > data_type = BCH_DATA_journal ;
2017-03-17 09:18:50 +03:00
ret = bch2_journal_flush_device_pins ( & c - > journal , - 1 ) ;
2023-11-21 02:52:33 +03:00
ret = bch2_move_btree ( c , start , end ,
2021-03-15 02:01:14 +03:00
rereplicate_btree_pred , c , stats ) ? : ret ;
2023-11-21 02:52:33 +03:00
ret = bch2_move_data ( c , start , end ,
2022-06-20 22:40:26 +03:00
NULL ,
stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
rereplicate_pred , c ) ? : ret ;
2019-05-01 00:15:39 +03:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-17 09:18:50 +03:00
break ;
2023-11-21 02:43:48 +03:00
case BCH_DATA_OP_migrate :
2017-03-17 09:18:50 +03:00
if ( op . migrate . dev > = c - > sb . nr_devices )
return - EINVAL ;
2020-07-10 01:28:11 +03:00
stats - > data_type = BCH_DATA_journal ;
2017-03-17 09:18:50 +03:00
ret = bch2_journal_flush_device_pins ( & c - > journal , op . migrate . dev ) ;
2023-11-21 02:52:33 +03:00
ret = bch2_move_btree ( c , start , end ,
2021-03-15 02:01:14 +03:00
migrate_btree_pred , & op , stats ) ? : ret ;
2023-11-21 02:52:33 +03:00
ret = bch2_move_data ( c , start , end ,
2022-06-20 22:40:26 +03:00
NULL ,
stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
migrate_pred , & op ) ? : ret ;
2019-05-01 00:15:39 +03:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-17 09:18:50 +03:00
break ;
2023-11-21 02:43:48 +03:00
case BCH_DATA_OP_rewrite_old_nodes :
2021-03-23 01:39:16 +03:00
ret = bch2_scan_old_btree_nodes ( c , stats ) ;
2021-03-15 02:01:14 +03:00
break ;
2023-11-21 03:12:40 +03:00
case BCH_DATA_OP_drop_extra_replicas :
ret = bch2_move_btree ( c , start , end ,
drop_extra_replicas_btree_pred , c , stats ) ? : ret ;
ret = bch2_move_data ( c , start , end , NULL , stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
drop_extra_replicas_pred , c ) ? : ret ;
ret = bch2_replicas_gc2 ( c ) ? : ret ;
break ;
2017-03-17 09:18:50 +03:00
default :
ret = - EINVAL ;
}
2023-11-21 02:43:48 +03:00
bch2_move_stats_exit ( stats , c ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2023-03-12 04:38:46 +03:00
2023-10-23 23:21:54 +03:00
void bch2_move_stats_to_text ( struct printbuf * out , struct bch_move_stats * stats )
2023-03-12 04:38:46 +03:00
{
2024-01-07 04:57:43 +03:00
prt_printf ( out , " %s: data type== " , stats - > name ) ;
bch2_prt_data_type ( out , stats - > data_type ) ;
prt_str ( out , " pos= " ) ;
2023-10-23 23:21:54 +03:00
bch2_bbpos_to_text ( out , stats - > pos ) ;
prt_newline ( out ) ;
printbuf_indent_add ( out , 2 ) ;
2023-03-12 04:38:46 +03:00
2023-10-23 23:21:54 +03:00
prt_str ( out , " keys moved: " ) ;
prt_u64 ( out , atomic64_read ( & stats - > keys_moved ) ) ;
2023-08-24 04:20:42 +03:00
prt_newline ( out ) ;
2023-10-23 23:21:54 +03:00
prt_str ( out , " keys raced: " ) ;
prt_u64 ( out , atomic64_read ( & stats - > keys_raced ) ) ;
prt_newline ( out ) ;
prt_str ( out , " bytes seen: " ) ;
prt_human_readable_u64 ( out , atomic64_read ( & stats - > sectors_seen ) < < 9 ) ;
prt_newline ( out ) ;
prt_str ( out , " bytes moved: " ) ;
prt_human_readable_u64 ( out , atomic64_read ( & stats - > sectors_moved ) < < 9 ) ;
2023-03-12 04:38:46 +03:00
prt_newline ( out ) ;
2023-10-23 23:21:54 +03:00
prt_str ( out , " bytes raced: " ) ;
prt_human_readable_u64 ( out , atomic64_read ( & stats - > sectors_raced ) < < 9 ) ;
prt_newline ( out ) ;
printbuf_indent_sub ( out , 2 ) ;
}
static void bch2_moving_ctxt_to_text ( struct printbuf * out , struct bch_fs * c , struct moving_context * ctxt )
{
struct moving_io * io ;
bch2_move_stats_to_text ( out , ctxt - > stats ) ;
2023-03-12 04:38:46 +03:00
printbuf_indent_add ( out , 2 ) ;
2023-08-24 04:20:42 +03:00
prt_printf ( out , " reads: ios %u/%u sectors %u/%u " ,
2023-03-12 04:38:46 +03:00
atomic_read ( & ctxt - > read_ios ) ,
2023-08-24 04:20:42 +03:00
c - > opts . move_ios_in_flight ,
atomic_read ( & ctxt - > read_sectors ) ,
c - > opts . move_bytes_in_flight > > 9 ) ;
2023-03-12 04:38:46 +03:00
prt_newline ( out ) ;
2023-08-24 04:20:42 +03:00
prt_printf ( out , " writes: ios %u/%u sectors %u/%u " ,
2023-03-12 04:38:46 +03:00
atomic_read ( & ctxt - > write_ios ) ,
2023-08-24 04:20:42 +03:00
c - > opts . move_ios_in_flight ,
atomic_read ( & ctxt - > write_sectors ) ,
c - > opts . move_bytes_in_flight > > 9 ) ;
2023-03-12 04:38:46 +03:00
prt_newline ( out ) ;
printbuf_indent_add ( out , 2 ) ;
mutex_lock ( & ctxt - > lock ) ;
2023-08-24 04:20:42 +03:00
list_for_each_entry ( io , & ctxt - > ios , io_list )
2023-03-12 04:38:46 +03:00
bch2_write_op_to_text ( out , & io - > write . op ) ;
mutex_unlock ( & ctxt - > lock ) ;
printbuf_indent_sub ( out , 4 ) ;
}
void bch2_fs_moving_ctxts_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct moving_context * ctxt ;
mutex_lock ( & c - > moving_context_lock ) ;
list_for_each_entry ( ctxt , & c - > moving_context_list , list )
2023-08-24 04:20:42 +03:00
bch2_moving_ctxt_to_text ( out , c , ctxt ) ;
2023-03-12 04:38:46 +03:00
mutex_unlock ( & c - > moving_context_lock ) ;
}
void bch2_fs_move_init ( struct bch_fs * c )
{
INIT_LIST_HEAD ( & c - > moving_context_list ) ;
mutex_init ( & c - > moving_context_lock ) ;
}