2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2022-03-18 07:42:09 +03:00
# include "alloc_background.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2022-03-18 07:42:09 +03:00
# include "backpointers.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2017-03-17 09:18:50 +03:00
# include "btree_gc.h"
# include "btree_update.h"
2019-01-21 23:32:13 +03:00
# include "btree_update_interior.h"
2022-03-18 07:42:09 +03:00
# include "btree_write_buffer.h"
2018-11-05 07:10:09 +03:00
# include "disk_groups.h"
2021-10-29 23:29:13 +03:00
# include "ec.h"
2022-07-19 02:42:58 +03:00
# include "errcode.h"
2022-03-18 07:42:09 +03:00
# include "error.h"
2017-03-17 09:18:50 +03:00
# include "inode.h"
# include "io.h"
# include "journal_reclaim.h"
# include "keylist.h"
# include "move.h"
# include "replicas.h"
# include "super-io.h"
# include "trace.h"
# include <linux/ioprio.h>
# include <linux/kthread.h>
2023-04-20 22:24:07 +03:00
static void trace_move_extent2 ( struct bch_fs * c , struct bkey_s_c k )
{
if ( trace_move_extent_enabled ( ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
trace_move_extent ( c , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
}
static void trace_move_extent_read2 ( struct bch_fs * c , struct bkey_s_c k )
{
if ( trace_move_extent_read_enabled ( ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
trace_move_extent_read ( c , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
}
static void trace_move_extent_alloc_mem_fail2 ( struct bch_fs * c , struct bkey_s_c k )
{
if ( trace_move_extent_alloc_mem_fail_enabled ( ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
trace_move_extent_alloc_mem_fail ( c , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
}
2022-06-20 22:40:26 +03:00
static void progress_list_add ( struct bch_fs * c , struct bch_move_stats * stats )
{
mutex_lock ( & c - > data_progress_lock ) ;
list_add ( & stats - > list , & c - > data_progress_list ) ;
mutex_unlock ( & c - > data_progress_lock ) ;
}
static void progress_list_del ( struct bch_fs * c , struct bch_move_stats * stats )
{
mutex_lock ( & c - > data_progress_lock ) ;
list_del ( & stats - > list ) ;
mutex_unlock ( & c - > data_progress_lock ) ;
}
2017-03-17 09:18:50 +03:00
struct moving_io {
2023-03-12 04:38:46 +03:00
struct list_head read_list ;
struct list_head io_list ;
2023-02-28 06:58:01 +03:00
struct move_bucket_in_flight * b ;
struct closure cl ;
bool read_completed ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
unsigned read_sectors ;
unsigned write_sectors ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
struct bch_read_bio rbio ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
struct data_update write ;
2017-03-17 09:18:50 +03:00
/* Must be last since it is variable size */
2023-02-28 06:58:01 +03:00
struct bio_vec bi_inline_vecs [ 0 ] ;
2017-03-17 09:18:50 +03:00
} ;
2022-10-29 09:47:33 +03:00
static void move_free ( struct moving_io * io )
2017-03-17 09:18:50 +03:00
{
struct moving_context * ctxt = io - > write . ctxt ;
2023-02-28 06:58:01 +03:00
if ( io - > b )
atomic_dec ( & io - > b - > count ) ;
2022-06-14 02:17:45 +03:00
bch2_data_update_exit ( & io - > write ) ;
2023-03-12 04:38:46 +03:00
mutex_lock ( & ctxt - > lock ) ;
list_del ( & io - > io_list ) ;
2017-03-17 09:18:50 +03:00
wake_up ( & ctxt - > wait ) ;
2023-03-12 04:38:46 +03:00
mutex_unlock ( & ctxt - > lock ) ;
2017-03-17 09:18:50 +03:00
kfree ( io ) ;
}
2022-10-29 09:47:33 +03:00
static void move_write_done ( struct bch_write_op * op )
2017-03-17 09:18:50 +03:00
{
2022-10-29 09:47:33 +03:00
struct moving_io * io = container_of ( op , struct moving_io , write . op ) ;
struct moving_context * ctxt = io - > write . ctxt ;
2017-03-17 09:18:50 +03:00
2022-03-18 07:42:09 +03:00
if ( io - > write . op . error )
ctxt - > write_error = true ;
2017-03-17 09:18:50 +03:00
atomic_sub ( io - > write_sectors , & io - > write . ctxt - > write_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_dec ( & io - > write . ctxt - > write_ios ) ;
2022-10-29 09:47:33 +03:00
move_free ( io ) ;
closure_put ( & ctxt - > cl ) ;
2017-03-17 09:18:50 +03:00
}
2022-10-29 09:47:33 +03:00
static void move_write ( struct moving_io * io )
2017-03-17 09:18:50 +03:00
{
if ( unlikely ( io - > rbio . bio . bi_status | | io - > rbio . hole ) ) {
2022-10-29 09:47:33 +03:00
move_free ( io ) ;
2017-03-17 09:18:50 +03:00
return ;
}
2022-10-29 09:47:33 +03:00
closure_get ( & io - > write . ctxt - > cl ) ;
2017-03-17 09:18:50 +03:00
atomic_add ( io - > write_sectors , & io - > write . ctxt - > write_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_inc ( & io - > write . ctxt - > write_ios ) ;
2022-10-29 09:47:33 +03:00
2022-06-14 02:17:45 +03:00
bch2_data_update_read_done ( & io - > write , io - > rbio . pick . crc ) ;
2017-03-17 09:18:50 +03:00
}
2023-01-03 01:53:02 +03:00
struct moving_io * bch2_moving_ctxt_next_pending_write ( struct moving_context * ctxt )
2017-03-17 09:18:50 +03:00
{
struct moving_io * io =
2023-03-12 04:38:46 +03:00
list_first_entry_or_null ( & ctxt - > reads , struct moving_io , read_list ) ;
2017-03-17 09:18:50 +03:00
return io & & io - > read_completed ? io : NULL ;
}
static void move_read_endio ( struct bio * bio )
{
struct moving_io * io = container_of ( bio , struct moving_io , rbio . bio ) ;
struct moving_context * ctxt = io - > write . ctxt ;
atomic_sub ( io - > read_sectors , & ctxt - > read_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_dec ( & ctxt - > read_ios ) ;
2017-03-17 09:18:50 +03:00
io - > read_completed = true ;
2022-02-21 21:22:11 +03:00
wake_up ( & ctxt - > wait ) ;
2017-03-17 09:18:50 +03:00
closure_put ( & ctxt - > cl ) ;
}
2023-01-03 01:53:02 +03:00
void bch2_moving_ctxt_do_pending_writes ( struct moving_context * ctxt ,
struct btree_trans * trans )
2017-03-17 09:18:50 +03:00
{
struct moving_io * io ;
2022-02-16 07:40:30 +03:00
if ( trans )
bch2_trans_unlock ( trans ) ;
2023-01-03 01:53:02 +03:00
while ( ( io = bch2_moving_ctxt_next_pending_write ( ctxt ) ) ) {
2023-03-12 04:38:46 +03:00
list_del ( & io - > read_list ) ;
2022-10-29 09:47:33 +03:00
move_write ( io ) ;
2017-03-17 09:18:50 +03:00
}
}
2022-02-16 07:40:30 +03:00
static void bch2_move_ctxt_wait_for_io ( struct moving_context * ctxt ,
struct btree_trans * trans )
2017-03-17 09:18:50 +03:00
{
unsigned sectors_pending = atomic_read ( & ctxt - > write_sectors ) ;
2022-02-16 07:40:30 +03:00
move_ctxt_wait_event ( ctxt , trans ,
2017-03-17 09:18:50 +03:00
! atomic_read ( & ctxt - > write_sectors ) | |
atomic_read ( & ctxt - > write_sectors ) ! = sectors_pending ) ;
}
2022-06-20 22:40:26 +03:00
void bch2_moving_ctxt_exit ( struct moving_context * ctxt )
{
2023-03-12 04:38:46 +03:00
struct bch_fs * c = ctxt - > c ;
2022-06-20 22:40:26 +03:00
move_ctxt_wait_event ( ctxt , NULL , list_empty ( & ctxt - > reads ) ) ;
closure_sync ( & ctxt - > cl ) ;
2023-01-09 09:45:18 +03:00
2022-06-20 22:40:26 +03:00
EBUG_ON ( atomic_read ( & ctxt - > write_sectors ) ) ;
2023-01-09 09:45:18 +03:00
EBUG_ON ( atomic_read ( & ctxt - > write_ios ) ) ;
EBUG_ON ( atomic_read ( & ctxt - > read_sectors ) ) ;
EBUG_ON ( atomic_read ( & ctxt - > read_ios ) ) ;
2022-06-20 22:40:26 +03:00
2022-06-30 00:14:06 +03:00
if ( ctxt - > stats ) {
2023-03-12 04:38:46 +03:00
progress_list_del ( c , ctxt - > stats ) ;
trace_move_data ( c ,
2022-06-30 00:14:06 +03:00
atomic64_read ( & ctxt - > stats - > sectors_moved ) ,
atomic64_read ( & ctxt - > stats - > keys_moved ) ) ;
}
2023-03-12 04:38:46 +03:00
mutex_lock ( & c - > moving_context_lock ) ;
list_del ( & ctxt - > list ) ;
mutex_unlock ( & c - > moving_context_lock ) ;
2022-06-20 22:40:26 +03:00
}
void bch2_moving_ctxt_init ( struct moving_context * ctxt ,
struct bch_fs * c ,
struct bch_ratelimit * rate ,
struct bch_move_stats * stats ,
struct write_point_specifier wp ,
bool wait_on_copygc )
{
memset ( ctxt , 0 , sizeof ( * ctxt ) ) ;
ctxt - > c = c ;
2023-03-12 04:38:46 +03:00
ctxt - > fn = ( void * ) _RET_IP_ ;
2022-06-20 22:40:26 +03:00
ctxt - > rate = rate ;
ctxt - > stats = stats ;
ctxt - > wp = wp ;
ctxt - > wait_on_copygc = wait_on_copygc ;
closure_init_stack ( & ctxt - > cl ) ;
2023-03-12 04:38:46 +03:00
mutex_init ( & ctxt - > lock ) ;
2022-06-20 22:40:26 +03:00
INIT_LIST_HEAD ( & ctxt - > reads ) ;
2023-03-12 04:38:46 +03:00
INIT_LIST_HEAD ( & ctxt - > ios ) ;
2022-06-20 22:40:26 +03:00
init_waitqueue_head ( & ctxt - > wait ) ;
2023-03-12 04:38:46 +03:00
mutex_lock ( & c - > moving_context_lock ) ;
list_add ( & ctxt - > list , & c - > moving_context_list ) ;
mutex_unlock ( & c - > moving_context_lock ) ;
2022-06-30 00:14:06 +03:00
if ( stats ) {
progress_list_add ( c , stats ) ;
2022-06-20 22:40:26 +03:00
stats - > data_type = BCH_DATA_user ;
2022-06-30 00:14:06 +03:00
}
2022-06-20 22:40:26 +03:00
}
2022-11-14 04:01:42 +03:00
void bch2_move_stats_init ( struct bch_move_stats * stats , char * name )
2022-06-20 22:40:26 +03:00
{
memset ( stats , 0 , sizeof ( * stats ) ) ;
scnprintf ( stats - > name , sizeof ( stats - > name ) , " %s " , name ) ;
}
2022-10-09 10:32:17 +03:00
static int bch2_extent_drop_ptrs ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k ,
struct data_update_opts data_opts )
{
struct bch_fs * c = trans - > c ;
struct bkey_i * n ;
int ret ;
2023-05-01 02:21:06 +03:00
n = bch2_bkey_make_mut_noupdate ( trans , k ) ;
2022-10-09 10:32:17 +03:00
ret = PTR_ERR_OR_ZERO ( n ) ;
if ( ret )
return ret ;
while ( data_opts . kill_ptrs ) {
unsigned i = 0 , drop = __fls ( data_opts . kill_ptrs ) ;
struct bch_extent_ptr * ptr ;
bch2_bkey_drop_ptrs ( bkey_i_to_s ( n ) , ptr , i + + = = drop ) ;
data_opts . kill_ptrs ^ = 1U < < drop ;
}
/*
* If the new extent no longer has any pointers , bch2_extent_normalize ( )
* will do the appropriate thing with it ( turning it into a
* KEY_TYPE_error key , or just a discard if it was a cached extent )
*/
bch2_extent_normalize ( c , bkey_i_to_s ( n ) ) ;
/*
* Since we ' re not inserting through an extent iterator
* ( BTREE_ITER_ALL_SNAPSHOTS iterators aren ' t extent iterators ) ,
* we aren ' t using the extent overwrite path to delete , we ' re
* just using the normal key deletion path :
*/
if ( bkey_deleted ( & n - > k ) )
n - > k . size = 0 ;
2023-02-23 03:39:02 +03:00
return bch2_trans_relock ( trans ) ? :
bch2_trans_update ( trans , iter , n , BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE ) ? :
2022-10-09 10:32:17 +03:00
bch2_trans_commit ( trans , NULL , NULL , BTREE_INSERT_NOFAIL ) ;
}
2020-10-17 04:39:16 +03:00
static int bch2_move_extent ( struct btree_trans * trans ,
2022-10-09 10:32:17 +03:00
struct btree_iter * iter ,
2017-03-17 09:18:50 +03:00
struct moving_context * ctxt ,
2023-02-28 06:58:01 +03:00
struct move_bucket_in_flight * bucket_in_flight ,
2017-03-17 09:18:50 +03:00
struct bch_io_opts io_opts ,
2019-08-16 16:59:56 +03:00
enum btree_id btree_id ,
2019-07-25 20:52:14 +03:00
struct bkey_s_c k ,
2022-06-14 02:17:45 +03:00
struct data_update_opts data_opts )
2017-03-17 09:18:50 +03:00
{
2020-10-17 04:39:16 +03:00
struct bch_fs * c = trans - > c ;
2019-07-25 20:52:14 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2017-03-17 09:18:50 +03:00
struct moving_io * io ;
2018-09-28 04:08:39 +03:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2019-07-25 20:52:14 +03:00
unsigned sectors = k . k - > size , pages ;
2017-03-17 09:18:50 +03:00
int ret = - ENOMEM ;
2023-04-20 22:24:07 +03:00
trace_move_extent2 ( c , k ) ;
2022-10-09 10:32:17 +03:00
bch2_data_update_opts_normalize ( k , & data_opts ) ;
if ( ! data_opts . rewrite_ptrs & &
! data_opts . extra_replicas ) {
if ( data_opts . kill_ptrs )
return bch2_extent_drop_ptrs ( trans , iter , k , data_opts ) ;
return 0 ;
}
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-03 00:12:00 +03:00
/*
* Before memory allocations & taking nocow locks in
* bch2_data_update_init ( ) :
*/
bch2_trans_unlock ( trans ) ;
2017-03-17 09:18:50 +03:00
/* write path might have to decompress data: */
2019-07-25 20:52:14 +03:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
2018-09-28 04:08:39 +03:00
sectors = max_t ( unsigned , sectors , p . crc . uncompressed_size ) ;
2017-03-17 09:18:50 +03:00
pages = DIV_ROUND_UP ( sectors , PAGE_SECTORS ) ;
io = kzalloc ( sizeof ( struct moving_io ) +
sizeof ( struct bio_vec ) * pages , GFP_KERNEL ) ;
if ( ! io )
goto err ;
2023-03-12 04:38:46 +03:00
INIT_LIST_HEAD ( & io - > io_list ) ;
2017-03-17 09:18:50 +03:00
io - > write . ctxt = ctxt ;
2019-07-25 20:52:14 +03:00
io - > read_sectors = k . k - > size ;
io - > write_sectors = k . k - > size ;
2017-03-17 09:18:50 +03:00
bio_init ( & io - > write . op . wbio . bio , NULL , io - > bi_inline_vecs , pages , 0 ) ;
bio_set_prio ( & io - > write . op . wbio . bio ,
IOPRIO_PRIO_VALUE ( IOPRIO_CLASS_IDLE , 0 ) ) ;
if ( bch2_bio_alloc_pages ( & io - > write . op . wbio . bio , sectors < < 9 ,
GFP_KERNEL ) )
goto err_free ;
2019-09-07 20:16:41 +03:00
io - > rbio . c = c ;
io - > rbio . opts = io_opts ;
2017-03-17 09:18:50 +03:00
bio_init ( & io - > rbio . bio , NULL , io - > bi_inline_vecs , pages , 0 ) ;
io - > rbio . bio . bi_vcnt = pages ;
bio_set_prio ( & io - > rbio . bio , IOPRIO_PRIO_VALUE ( IOPRIO_CLASS_IDLE , 0 ) ) ;
io - > rbio . bio . bi_iter . bi_size = sectors < < 9 ;
io - > rbio . bio . bi_opf = REQ_OP_READ ;
2019-07-25 20:52:14 +03:00
io - > rbio . bio . bi_iter . bi_sector = bkey_start_offset ( k . k ) ;
2017-03-17 09:18:50 +03:00
io - > rbio . bio . bi_end_io = move_read_endio ;
2023-01-03 01:53:02 +03:00
ret = bch2_data_update_init ( trans , ctxt , & io - > write , ctxt - > wp ,
io_opts , data_opts , btree_id , k ) ;
2022-11-14 09:31:10 +03:00
if ( ret & & ret ! = - BCH_ERR_unwritten_extent_update )
2017-03-17 09:18:50 +03:00
goto err_free_pages ;
2022-11-14 09:31:10 +03:00
if ( ret = = - BCH_ERR_unwritten_extent_update ) {
bch2_update_unwritten_extent ( trans , & io - > write ) ;
move_free ( io ) ;
return 0 ;
}
BUG_ON ( ret ) ;
2023-03-04 10:51:12 +03:00
io - > write . ctxt = ctxt ;
io - > write . op . end_io = move_write_done ;
if ( ctxt - > stats ) {
atomic64_inc ( & ctxt - > stats - > keys_moved ) ;
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_moved ) ;
}
2023-02-28 06:58:01 +03:00
if ( bucket_in_flight ) {
io - > b = bucket_in_flight ;
atomic_inc ( & io - > b - > count ) ;
}
2022-03-15 11:36:33 +03:00
this_cpu_add ( c - > counters [ BCH_COUNTER_io_move ] , k . k - > size ) ;
2022-08-27 19:48:36 +03:00
this_cpu_add ( c - > counters [ BCH_COUNTER_move_extent_read ] , k . k - > size ) ;
2023-04-20 22:24:07 +03:00
trace_move_extent_read2 ( c , k ) ;
2023-03-12 04:38:46 +03:00
mutex_lock ( & ctxt - > lock ) ;
2017-03-17 09:18:50 +03:00
atomic_add ( io - > read_sectors , & ctxt - > read_sectors ) ;
2023-01-09 09:45:18 +03:00
atomic_inc ( & ctxt - > read_ios ) ;
2023-03-12 04:38:46 +03:00
list_add_tail ( & io - > read_list , & ctxt - > reads ) ;
list_add_tail ( & io - > io_list , & ctxt - > ios ) ;
mutex_unlock ( & ctxt - > lock ) ;
2017-03-17 09:18:50 +03:00
/*
* dropped by move_read_endio ( ) - guards against use after free of
* ctxt when doing wakeup
*/
closure_get ( & ctxt - > cl ) ;
2021-03-15 04:30:08 +03:00
bch2_read_extent ( trans , & io - > rbio ,
bkey_start_pos ( k . k ) ,
btree_id , k , 0 ,
2017-03-17 09:18:50 +03:00
BCH_READ_NODECODE |
BCH_READ_LAST_FRAGMENT ) ;
return 0 ;
err_free_pages :
bio_free_pages ( & io - > write . op . wbio . bio ) ;
err_free :
kfree ( io ) ;
err :
2023-04-20 22:24:07 +03:00
this_cpu_inc ( c - > counters [ BCH_COUNTER_move_extent_alloc_mem_fail ] ) ;
trace_move_extent_alloc_mem_fail2 ( c , k ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2021-03-17 01:08:10 +03:00
static int lookup_inode ( struct btree_trans * trans , struct bpos pos ,
struct bch_inode_unpacked * inode )
{
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-03-17 01:08:10 +03:00
struct bkey_s_c k ;
int ret ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_inodes , pos ,
BTREE_ITER_ALL_SNAPSHOTS ) ;
k = bch2_btree_iter_peek ( & iter ) ;
2021-03-17 01:08:10 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2022-11-24 11:12:22 +03:00
if ( ! k . k | | ! bkey_eq ( k . k - > p , pos ) ) {
2023-05-28 02:59:59 +03:00
ret = - BCH_ERR_ENOENT_inode ;
2021-05-24 01:42:51 +03:00
goto err ;
}
2021-10-30 04:14:23 +03:00
ret = bkey_is_inode ( k . k ) ? 0 : - EIO ;
2021-03-17 01:08:10 +03:00
if ( ret )
goto err ;
2021-10-30 04:14:23 +03:00
ret = bch2_inode_unpack ( k , inode ) ;
2021-03-17 01:08:10 +03:00
if ( ret )
goto err ;
err :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-03-17 01:08:10 +03:00
return ret ;
}
2022-06-15 17:06:43 +03:00
static int move_ratelimit ( struct btree_trans * trans ,
2022-06-20 22:40:26 +03:00
struct moving_context * ctxt )
2022-06-15 17:06:43 +03:00
{
struct bch_fs * c = trans - > c ;
u64 delay ;
2022-06-20 22:40:26 +03:00
if ( ctxt - > wait_on_copygc ) {
2022-06-15 17:06:43 +03:00
bch2_trans_unlock ( trans ) ;
wait_event_killable ( c - > copygc_running_wq ,
! c - > copygc_running | |
kthread_should_stop ( ) ) ;
}
do {
2022-06-20 22:40:26 +03:00
delay = ctxt - > rate ? bch2_ratelimit_delay ( ctxt - > rate ) : 0 ;
2022-06-15 17:06:43 +03:00
if ( delay ) {
bch2_trans_unlock ( trans ) ;
set_current_state ( TASK_INTERRUPTIBLE ) ;
}
if ( ( current - > flags & PF_KTHREAD ) & & kthread_should_stop ( ) ) {
__set_current_state ( TASK_RUNNING ) ;
return 1 ;
}
if ( delay )
schedule_timeout ( delay ) ;
if ( unlikely ( freezing ( current ) ) ) {
move_ctxt_wait_event ( ctxt , trans , list_empty ( & ctxt - > reads ) ) ;
try_to_freeze ( ) ;
}
} while ( delay ) ;
2023-01-09 09:45:18 +03:00
/*
* XXX : these limits really ought to be per device , SSDs and hard drives
* will want different limits
*/
2022-06-15 17:06:43 +03:00
move_ctxt_wait_event ( ctxt , trans ,
2023-01-09 09:45:18 +03:00
atomic_read ( & ctxt - > write_sectors ) < c - > opts . move_bytes_in_flight > > 9 & &
atomic_read ( & ctxt - > read_sectors ) < c - > opts . move_bytes_in_flight > > 9 & &
atomic_read ( & ctxt - > write_ios ) < c - > opts . move_ios_in_flight & &
atomic_read ( & ctxt - > read_ios ) < c - > opts . move_ios_in_flight ) ;
2022-06-15 17:06:43 +03:00
return 0 ;
}
2022-03-18 07:42:09 +03:00
static int move_get_io_opts ( struct btree_trans * trans ,
struct bch_io_opts * io_opts ,
struct bkey_s_c k , u64 * cur_inum )
{
struct bch_inode_unpacked inode ;
int ret ;
if ( * cur_inum = = k . k - > p . inode )
return 0 ;
ret = lookup_inode ( trans ,
SPOS ( 0 , k . k - > p . inode , k . k - > p . snapshot ) ,
& inode ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ret ;
if ( ! ret )
bch2_inode_opts_get ( io_opts , trans - > c , & inode ) ;
else
* io_opts = bch2_opts_to_inode_opts ( trans - > c - > opts ) ;
* cur_inum = k . k - > p . inode ;
return 0 ;
}
2022-06-20 22:40:26 +03:00
static int __bch2_move_data ( struct moving_context * ctxt ,
2022-06-15 17:06:43 +03:00
struct bpos start ,
struct bpos end ,
move_pred_fn pred , void * arg ,
2022-06-20 22:40:26 +03:00
enum btree_id btree_id )
2017-03-17 09:18:50 +03:00
{
2022-06-20 22:40:26 +03:00
struct bch_fs * c = ctxt - > c ;
2017-03-17 09:18:50 +03:00
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
2020-12-17 23:08:58 +03:00
struct bkey_buf sk ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2022-06-14 02:17:45 +03:00
struct data_update_opts data_opts ;
2022-06-15 17:06:43 +03:00
u64 cur_inum = U64_MAX ;
2017-03-17 09:18:50 +03:00
int ret = 0 , ret2 ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_init ( & sk ) ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2019-03-25 22:10:15 +03:00
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats ) {
ctxt - > stats - > data_type = BCH_DATA_user ;
ctxt - > stats - > btree_id = btree_id ;
ctxt - > stats - > pos = start ;
}
2019-03-25 22:10:15 +03:00
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( & trans , & iter , btree_id , start ,
2021-03-16 07:28:17 +03:00
BTREE_ITER_PREFETCH |
BTREE_ITER_ALL_SNAPSHOTS ) ;
2017-03-17 09:18:50 +03:00
2022-06-20 22:40:26 +03:00
if ( ctxt - > rate )
bch2_ratelimit_reset ( ctxt - > rate ) ;
2017-03-17 09:18:50 +03:00
2022-06-20 22:40:26 +03:00
while ( ! move_ratelimit ( & trans , ctxt ) ) {
2021-07-25 03:24:10 +03:00
bch2_trans_begin ( & trans ) ;
2021-08-30 22:18:31 +03:00
k = bch2_btree_iter_peek ( & iter ) ;
2017-03-17 09:18:50 +03:00
if ( ! k . k )
break ;
2022-01-10 04:52:10 +03:00
2019-03-28 05:03:30 +03:00
ret = bkey_err ( k ) ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2022-01-10 04:52:10 +03:00
continue ;
2017-03-17 09:18:50 +03:00
if ( ret )
break ;
2022-01-10 04:52:10 +03:00
2022-11-24 11:12:22 +03:00
if ( bkey_ge ( bkey_start_pos ( k . k ) , end ) )
2017-03-17 09:18:50 +03:00
break ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats )
ctxt - > stats - > pos = iter . pos ;
2022-01-10 04:52:10 +03:00
2019-09-07 23:13:20 +03:00
if ( ! bkey_extent_is_direct_data ( k . k ) )
2017-03-17 09:18:50 +03:00
goto next_nondata ;
2022-03-18 07:42:09 +03:00
ret = move_get_io_opts ( & trans , & io_opts , k , & cur_inum ) ;
if ( ret )
continue ;
2017-03-17 09:18:50 +03:00
2022-06-14 02:17:45 +03:00
memset ( & data_opts , 0 , sizeof ( data_opts ) ) ;
if ( ! pred ( c , arg , k , & io_opts , & data_opts ) )
2017-03-17 09:18:50 +03:00
goto next ;
2022-02-15 08:06:59 +03:00
/*
* The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @ k elsewhere :
2022-10-20 01:31:33 +03:00
*/
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
2019-11-10 00:01:15 +03:00
k = bkey_i_to_s_c ( sk . k ) ;
2017-03-17 09:18:50 +03:00
2023-02-28 06:58:01 +03:00
ret2 = bch2_move_extent ( & trans , & iter , ctxt , NULL ,
io_opts , btree_id , k , data_opts ) ;
2017-03-17 09:18:50 +03:00
if ( ret2 ) {
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret2 , BCH_ERR_transaction_restart ) )
2020-12-21 05:42:19 +03:00
continue ;
2017-03-17 09:18:50 +03:00
if ( ret2 = = - ENOMEM ) {
/* memory allocation failure, wait for some IO to finish */
2022-02-16 07:40:30 +03:00
bch2_move_ctxt_wait_for_io ( ctxt , & trans ) ;
2017-03-17 09:18:50 +03:00
continue ;
}
/* XXX signal failure */
goto next ;
}
2022-06-20 22:40:26 +03:00
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate , k . k - > size ) ;
2017-03-17 09:18:50 +03:00
next :
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats )
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_seen ) ;
2017-03-17 09:18:50 +03:00
next_nondata :
2021-08-30 22:18:31 +03:00
bch2_btree_iter_advance ( & iter ) ;
2017-03-17 09:18:50 +03:00
}
2021-03-20 03:29:11 +03:00
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_exit ( & sk , c ) ;
2019-08-16 16:59:56 +03:00
return ret ;
}
int bch2_move_data ( struct bch_fs * c ,
2021-03-15 02:01:14 +03:00
enum btree_id start_btree_id , struct bpos start_pos ,
enum btree_id end_btree_id , struct bpos end_pos ,
2019-08-16 16:59:56 +03:00
struct bch_ratelimit * rate ,
2022-06-15 17:06:43 +03:00
struct bch_move_stats * stats ,
2022-06-20 22:40:26 +03:00
struct write_point_specifier wp ,
bool wait_on_copygc ,
move_pred_fn pred , void * arg )
2019-08-16 16:59:56 +03:00
{
2022-06-20 22:40:26 +03:00
struct moving_context ctxt ;
2021-03-15 02:01:14 +03:00
enum btree_id id ;
2019-08-16 16:59:56 +03:00
int ret ;
2022-06-20 22:40:26 +03:00
bch2_moving_ctxt_init ( & ctxt , c , rate , stats , wp , wait_on_copygc ) ;
2019-08-16 16:59:56 +03:00
2021-03-15 02:01:14 +03:00
for ( id = start_btree_id ;
id < = min_t ( unsigned , end_btree_id , BTREE_ID_NR - 1 ) ;
id + + ) {
stats - > btree_id = id ;
2021-02-21 03:27:37 +03:00
if ( id ! = BTREE_ID_extents & &
id ! = BTREE_ID_reflink )
2021-03-15 02:01:14 +03:00
continue ;
2022-06-20 22:40:26 +03:00
ret = __bch2_move_data ( & ctxt ,
2021-03-15 02:01:14 +03:00
id = = start_btree_id ? start_pos : POS_MIN ,
id = = end_btree_id ? end_pos : POS_MAX ,
2022-06-20 22:40:26 +03:00
pred , arg , id ) ;
2021-03-15 02:01:14 +03:00
if ( ret )
break ;
}
2022-06-20 22:40:26 +03:00
bch2_moving_ctxt_exit ( & ctxt ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2022-12-05 18:24:19 +03:00
int __bch2_evacuate_bucket ( struct btree_trans * trans ,
struct moving_context * ctxt ,
2023-02-28 06:58:01 +03:00
struct move_bucket_in_flight * bucket_in_flight ,
2022-03-18 07:42:09 +03:00
struct bpos bucket , int gen ,
struct data_update_opts _data_opts )
{
struct bch_fs * c = ctxt - > c ;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
struct btree_iter iter ;
struct bkey_buf sk ;
struct bch_backpointer bp ;
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
struct bkey_s_c k ;
struct data_update_opts data_opts ;
unsigned dirty_sectors , bucket_size ;
2022-12-05 18:24:19 +03:00
u64 fragmentation ;
2023-03-31 23:24:45 +03:00
u64 cur_inum = U64_MAX ;
struct bpos bp_pos = POS_MIN ;
2022-03-18 07:42:09 +03:00
int ret = 0 ;
2023-04-20 22:24:07 +03:00
trace_bucket_evacuate ( c , & bucket ) ;
2022-03-18 07:42:09 +03:00
bch2_bkey_buf_init ( & sk ) ;
2023-03-19 21:13:17 +03:00
/*
* We ' re not run in a context that handles transaction restarts :
*/
bch2_trans_begin ( trans ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc ,
2022-03-18 07:42:09 +03:00
bucket , BTREE_ITER_CACHED ) ;
2022-12-05 18:24:19 +03:00
ret = lockrestart_do ( trans ,
2022-03-18 07:42:09 +03:00
bkey_err ( k = bch2_btree_iter_peek_slot ( & iter ) ) ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
if ( ret ) {
bch_err ( c , " %s: error looking up alloc key: %s " , __func__ , bch2_err_str ( ret ) ) ;
goto err ;
}
a = bch2_alloc_to_v4 ( k , & a_convert ) ;
dirty_sectors = a - > dirty_sectors ;
bucket_size = bch_dev_bkey_exists ( c , bucket . inode ) - > mi . bucket_size ;
2022-12-05 18:24:19 +03:00
fragmentation = a - > fragmentation_lru ;
2022-03-18 07:42:09 +03:00
2022-12-05 18:24:19 +03:00
ret = bch2_btree_write_buffer_flush ( trans ) ;
2022-03-18 07:42:09 +03:00
if ( ret ) {
bch_err ( c , " %s: error flushing btree write buffer: %s " , __func__ , bch2_err_str ( ret ) ) ;
goto err ;
}
2022-12-05 18:24:19 +03:00
while ( ! ( ret = move_ratelimit ( trans , ctxt ) ) ) {
bch2_trans_begin ( trans ) ;
2022-03-18 07:42:09 +03:00
2022-12-05 18:24:19 +03:00
ret = bch2_get_next_backpointer ( trans , bucket , gen ,
2023-03-31 23:24:45 +03:00
& bp_pos , & bp ,
2022-10-14 14:02:36 +03:00
BTREE_ITER_CACHED ) ;
2022-03-18 07:42:09 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
2023-03-31 23:24:45 +03:00
if ( bkey_eq ( bp_pos , POS_MAX ) )
2022-03-18 07:42:09 +03:00
break ;
if ( ! bp . level ) {
const struct bch_extent_ptr * ptr ;
struct bkey_s_c k ;
unsigned i = 0 ;
2023-03-31 23:24:45 +03:00
k = bch2_backpointer_get_key ( trans , & iter , bp_pos , bp , 0 ) ;
2022-03-18 07:42:09 +03:00
ret = bkey_err ( k ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ! k . k )
goto next ;
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
k = bkey_i_to_s_c ( sk . k ) ;
2022-12-05 18:24:19 +03:00
ret = move_get_io_opts ( trans , & io_opts , k , & cur_inum ) ;
2022-03-18 07:42:09 +03:00
if ( ret ) {
2022-12-05 18:24:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
continue ;
}
data_opts = _data_opts ;
data_opts . target = io_opts . background_target ;
data_opts . rewrite_ptrs = 0 ;
bkey_for_each_ptr ( bch2_bkey_ptrs_c ( k ) , ptr ) {
2023-03-11 02:00:10 +03:00
if ( ptr - > dev = = bucket . inode ) {
2022-03-18 07:42:09 +03:00
data_opts . rewrite_ptrs | = 1U < < i ;
2023-03-11 02:00:10 +03:00
if ( ptr - > cached ) {
bch2_trans_iter_exit ( trans , & iter ) ;
goto next ;
}
}
2022-03-18 07:42:09 +03:00
i + + ;
}
2023-02-28 06:58:01 +03:00
ret = bch2_move_extent ( trans , & iter , ctxt ,
bucket_in_flight ,
io_opts , bp . btree_id , k , data_opts ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret = = - ENOMEM ) {
/* memory allocation failure, wait for some IO to finish */
2022-12-05 18:24:19 +03:00
bch2_move_ctxt_wait_for_io ( ctxt , trans ) ;
2022-03-18 07:42:09 +03:00
continue ;
}
if ( ret )
goto err ;
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate , k . k - > size ) ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats )
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_seen ) ;
2022-03-18 07:42:09 +03:00
} else {
struct btree * b ;
2023-03-31 23:24:45 +03:00
b = bch2_backpointer_get_node ( trans , & iter , bp_pos , bp ) ;
2022-03-18 07:42:09 +03:00
ret = PTR_ERR_OR_ZERO ( b ) ;
if ( ret = = - BCH_ERR_backpointer_to_overwritten_btree_node )
continue ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ! b )
goto next ;
2022-12-05 18:24:19 +03:00
ret = bch2_btree_node_rewrite ( trans , & iter , b , 0 ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
2022-03-18 07:42:09 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate ,
c - > opts . btree_node_size > > 9 ) ;
2023-03-04 10:51:12 +03:00
if ( ctxt - > stats ) {
atomic64_add ( c - > opts . btree_node_size > > 9 , & ctxt - > stats - > sectors_seen ) ;
atomic64_add ( c - > opts . btree_node_size > > 9 , & ctxt - > stats - > sectors_moved ) ;
}
2022-03-18 07:42:09 +03:00
}
next :
2023-03-31 23:24:45 +03:00
bp_pos = bpos_nosnap_successor ( bp_pos ) ;
2022-03-18 07:42:09 +03:00
}
2022-12-05 18:24:19 +03:00
trace_evacuate_bucket ( c , & bucket , dirty_sectors , bucket_size , fragmentation , ret ) ;
2022-03-18 07:42:09 +03:00
err :
bch2_bkey_buf_exit ( & sk , c ) ;
return ret ;
}
int bch2_evacuate_bucket ( struct bch_fs * c ,
struct bpos bucket , int gen ,
struct data_update_opts data_opts ,
struct bch_ratelimit * rate ,
struct bch_move_stats * stats ,
struct write_point_specifier wp ,
bool wait_on_copygc )
{
2022-12-05 18:24:19 +03:00
struct btree_trans trans ;
2022-03-18 07:42:09 +03:00
struct moving_context ctxt ;
int ret ;
2022-12-05 18:24:19 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2022-03-18 07:42:09 +03:00
bch2_moving_ctxt_init ( & ctxt , c , rate , stats , wp , wait_on_copygc ) ;
2023-02-28 06:58:01 +03:00
ret = __bch2_evacuate_bucket ( & trans , & ctxt , NULL , bucket , gen , data_opts ) ;
2022-03-18 07:42:09 +03:00
bch2_moving_ctxt_exit ( & ctxt ) ;
2022-12-05 18:24:19 +03:00
bch2_trans_exit ( & trans ) ;
2022-03-18 07:42:09 +03:00
return ret ;
}
2022-06-14 02:17:45 +03:00
typedef bool ( * move_btree_pred ) ( struct bch_fs * , void * ,
struct btree * , struct bch_io_opts * ,
struct data_update_opts * ) ;
2021-03-15 02:01:14 +03:00
2017-03-17 09:18:50 +03:00
static int bch2_move_btree ( struct bch_fs * c ,
2021-03-15 02:01:14 +03:00
enum btree_id start_btree_id , struct bpos start_pos ,
enum btree_id end_btree_id , struct bpos end_pos ,
move_btree_pred pred , void * arg ,
2017-03-17 09:18:50 +03:00
struct bch_move_stats * stats )
{
2021-03-15 02:01:14 +03:00
bool kthread = ( current - > flags & PF_KTHREAD ) ! = 0 ;
2017-03-17 09:18:50 +03:00
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct btree * b ;
2021-03-15 02:01:14 +03:00
enum btree_id id ;
2022-06-14 02:17:45 +03:00
struct data_update_opts data_opts ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-07-23 22:57:19 +03:00
progress_list_add ( c , stats ) ;
2019-03-25 22:10:15 +03:00
2020-07-10 01:28:11 +03:00
stats - > data_type = BCH_DATA_btree ;
2017-03-17 09:18:50 +03:00
2021-03-15 02:01:14 +03:00
for ( id = start_btree_id ;
id < = min_t ( unsigned , end_btree_id , BTREE_ID_NR - 1 ) ;
id + + ) {
2019-03-25 22:10:15 +03:00
stats - > btree_id = id ;
2021-10-08 01:08:01 +03:00
bch2_trans_node_iter_init ( & trans , & iter , id , POS_MIN , 0 , 0 ,
BTREE_ITER_PREFETCH ) ;
2021-10-19 21:20:50 +03:00
retry :
2021-10-19 22:11:45 +03:00
ret = 0 ;
2021-10-08 01:08:01 +03:00
while ( bch2_trans_begin ( & trans ) ,
2021-10-19 21:20:50 +03:00
( b = bch2_btree_iter_peek_node ( & iter ) ) & &
! ( ret = PTR_ERR_OR_ZERO ( b ) ) ) {
2021-03-15 02:01:14 +03:00
if ( kthread & & kthread_should_stop ( ) )
2021-04-21 03:21:39 +03:00
break ;
2021-03-15 02:01:14 +03:00
if ( ( cmp_int ( id , end_btree_id ) ? :
2021-07-06 05:02:07 +03:00
bpos_cmp ( b - > key . k . p , end_pos ) ) > 0 )
2021-03-15 02:01:14 +03:00
break ;
2021-08-30 22:18:31 +03:00
stats - > pos = iter . pos ;
2019-03-25 22:10:15 +03:00
2022-06-14 02:17:45 +03:00
if ( ! pred ( c , arg , b , & io_opts , & data_opts ) )
2017-03-17 09:18:50 +03:00
goto next ;
2021-10-24 23:59:33 +03:00
ret = bch2_btree_node_rewrite ( & trans , & iter , b , 0 ) ? : ret ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-10-24 23:59:33 +03:00
continue ;
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
next :
2021-10-08 01:08:01 +03:00
bch2_btree_iter_next_node ( & iter ) ;
2017-03-17 09:18:50 +03:00
}
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-10-19 21:20:50 +03:00
goto retry ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2017-03-17 09:18:50 +03:00
2021-04-21 03:21:39 +03:00
if ( kthread & & kthread_should_stop ( ) )
break ;
2017-03-17 09:18:50 +03:00
}
2021-04-21 03:21:39 +03:00
2019-03-25 22:10:15 +03:00
bch2_trans_exit ( & trans ) ;
2021-04-05 04:57:35 +03:00
if ( ret )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error in %s(): %s " , __func__ , bch2_err_str ( ret ) ) ;
2021-04-05 04:57:35 +03:00
2022-04-18 00:30:49 +03:00
bch2_btree_interior_updates_flush ( c ) ;
2021-10-25 00:00:33 +03:00
2021-07-23 22:57:19 +03:00
progress_list_del ( c , stats ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2022-06-14 02:17:45 +03:00
static bool rereplicate_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2017-03-17 09:18:50 +03:00
{
2018-11-01 22:10:01 +03:00
unsigned nr_good = bch2_bkey_durability ( c , k ) ;
2021-10-12 21:15:45 +03:00
unsigned replicas = bkey_is_btree_ptr ( k . k )
? c - > opts . metadata_replicas
: io_opts - > data_replicas ;
2017-03-17 09:18:50 +03:00
if ( ! nr_good | | nr_good > = replicas )
2022-06-14 02:17:45 +03:00
return false ;
2017-03-17 09:18:50 +03:00
data_opts - > target = 0 ;
2022-06-14 02:17:45 +03:00
data_opts - > extra_replicas = replicas - nr_good ;
2018-11-01 22:10:01 +03:00
data_opts - > btree_insert_flags = 0 ;
2022-06-14 02:17:45 +03:00
return true ;
2017-03-17 09:18:50 +03:00
}
2022-06-14 02:17:45 +03:00
static bool migrate_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2017-03-17 09:18:50 +03:00
{
2022-06-14 02:17:45 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
2017-03-17 09:18:50 +03:00
struct bch_ioctl_data * op = arg ;
2022-06-14 02:17:45 +03:00
unsigned i = 0 ;
2017-03-17 09:18:50 +03:00
2022-06-14 02:17:45 +03:00
data_opts - > rewrite_ptrs = 0 ;
2017-03-17 09:18:50 +03:00
data_opts - > target = 0 ;
2022-06-14 02:17:45 +03:00
data_opts - > extra_replicas = 0 ;
2017-03-17 09:18:50 +03:00
data_opts - > btree_insert_flags = 0 ;
2022-06-14 02:17:45 +03:00
bkey_for_each_ptr ( ptrs , ptr ) {
if ( ptr - > dev = = op - > migrate . dev )
data_opts - > rewrite_ptrs | = 1U < < i ;
i + + ;
}
2022-10-20 01:31:33 +03:00
return data_opts - > rewrite_ptrs ! = 0 ;
2017-03-17 09:18:50 +03:00
}
2022-06-14 02:17:45 +03:00
static bool rereplicate_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-15 02:01:14 +03:00
{
return rereplicate_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2022-06-14 02:17:45 +03:00
static bool migrate_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-15 02:01:14 +03:00
{
return migrate_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2021-03-21 06:55:36 +03:00
static bool bformat_needs_redo ( struct bkey_format * f )
{
unsigned i ;
for ( i = 0 ; i < f - > nr_fields ; i + + ) {
unsigned unpacked_bits = bch2_bkey_format_current . bits_per_field [ i ] ;
u64 unpacked_mask = ~ ( ( ~ 0ULL < < 1 ) < < ( unpacked_bits - 1 ) ) ;
u64 field_offset = le64_to_cpu ( f - > field_offset [ i ] ) ;
if ( f - > bits_per_field [ i ] > unpacked_bits )
return true ;
if ( ( f - > bits_per_field [ i ] = = unpacked_bits ) & & field_offset )
return true ;
if ( ( ( field_offset + ( ( 1ULL < < f - > bits_per_field [ i ] ) - 1 ) ) &
unpacked_mask ) <
field_offset )
return true ;
}
return false ;
}
2022-06-14 02:17:45 +03:00
static bool rewrite_old_nodes_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-15 02:01:14 +03:00
{
if ( b - > version_ondisk ! = c - > sb . version | |
2021-03-21 06:55:36 +03:00
btree_node_need_rewrite ( b ) | |
bformat_needs_redo ( & b - > format ) ) {
2021-03-15 02:01:14 +03:00
data_opts - > target = 0 ;
2022-06-14 02:17:45 +03:00
data_opts - > extra_replicas = 0 ;
2021-03-15 02:01:14 +03:00
data_opts - > btree_insert_flags = 0 ;
2022-06-14 02:17:45 +03:00
return true ;
2021-03-15 02:01:14 +03:00
}
2022-06-14 02:17:45 +03:00
return false ;
2021-03-15 02:01:14 +03:00
}
2021-03-23 01:39:16 +03:00
int bch2_scan_old_btree_nodes ( struct bch_fs * c , struct bch_move_stats * stats )
{
int ret ;
ret = bch2_move_btree ( c ,
0 , POS_MIN ,
2021-07-06 05:02:07 +03:00
BTREE_ID_NR , SPOS_MAX ,
2021-03-23 01:39:16 +03:00
rewrite_old_nodes_pred , c , stats ) ;
if ( ! ret ) {
mutex_lock ( & c - > sb_lock ) ;
2021-05-24 00:04:13 +03:00
c - > disk_sb . sb - > compat [ 0 ] | = cpu_to_le64 ( 1ULL < < BCH_COMPAT_extents_above_btree_updates_done ) ;
c - > disk_sb . sb - > compat [ 0 ] | = cpu_to_le64 ( 1ULL < < BCH_COMPAT_bformat_overflow_done ) ;
2021-03-23 01:39:16 +03:00
c - > disk_sb . sb - > version_min = c - > disk_sb . sb - > version ;
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
}
return ret ;
}
2017-03-17 09:18:50 +03:00
int bch2_data_job ( struct bch_fs * c ,
struct bch_move_stats * stats ,
struct bch_ioctl_data op )
{
int ret = 0 ;
switch ( op . op ) {
case BCH_DATA_OP_REREPLICATE :
2022-11-14 04:01:42 +03:00
bch2_move_stats_init ( stats , " rereplicate " ) ;
2020-07-10 01:28:11 +03:00
stats - > data_type = BCH_DATA_journal ;
2017-03-17 09:18:50 +03:00
ret = bch2_journal_flush_device_pins ( & c - > journal , - 1 ) ;
2021-03-15 02:01:14 +03:00
ret = bch2_move_btree ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
rereplicate_btree_pred , c , stats ) ? : ret ;
2019-05-01 00:15:39 +03:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-17 09:18:50 +03:00
2021-03-15 02:01:14 +03:00
ret = bch2_move_data ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
2022-06-20 22:40:26 +03:00
NULL ,
stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
rereplicate_pred , c ) ? : ret ;
2019-05-01 00:15:39 +03:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-17 09:18:50 +03:00
break ;
case BCH_DATA_OP_MIGRATE :
if ( op . migrate . dev > = c - > sb . nr_devices )
return - EINVAL ;
2022-11-14 04:01:42 +03:00
bch2_move_stats_init ( stats , " migrate " ) ;
2020-07-10 01:28:11 +03:00
stats - > data_type = BCH_DATA_journal ;
2017-03-17 09:18:50 +03:00
ret = bch2_journal_flush_device_pins ( & c - > journal , op . migrate . dev ) ;
2021-03-15 02:01:14 +03:00
ret = bch2_move_btree ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
migrate_btree_pred , & op , stats ) ? : ret ;
2019-05-01 00:15:39 +03:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-17 09:18:50 +03:00
2021-03-15 02:01:14 +03:00
ret = bch2_move_data ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
2022-06-20 22:40:26 +03:00
NULL ,
stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
migrate_pred , & op ) ? : ret ;
2019-05-01 00:15:39 +03:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-17 09:18:50 +03:00
break ;
2021-03-15 02:01:14 +03:00
case BCH_DATA_OP_REWRITE_OLD_NODES :
2022-11-14 04:01:42 +03:00
bch2_move_stats_init ( stats , " rewrite_old_nodes " ) ;
2021-03-23 01:39:16 +03:00
ret = bch2_scan_old_btree_nodes ( c , stats ) ;
2021-03-15 02:01:14 +03:00
break ;
2017-03-17 09:18:50 +03:00
default :
ret = - EINVAL ;
}
return ret ;
}
2023-03-12 04:38:46 +03:00
void bch2_data_jobs_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct bch_move_stats * stats ;
mutex_lock ( & c - > data_progress_lock ) ;
list_for_each_entry ( stats , & c - > data_progress_list , list ) {
prt_printf ( out , " %s: data type %s btree_id %s position: " ,
stats - > name ,
bch2_data_types [ stats - > data_type ] ,
bch2_btree_ids [ stats - > btree_id ] ) ;
bch2_bpos_to_text ( out , stats - > pos ) ;
prt_printf ( out , " %s " , " \n " ) ;
}
mutex_unlock ( & c - > data_progress_lock ) ;
}
static void bch2_moving_ctxt_to_text ( struct printbuf * out , struct moving_context * ctxt )
{
struct moving_io * io ;
prt_printf ( out , " %ps: " , ctxt - > fn ) ;
prt_newline ( out ) ;
printbuf_indent_add ( out , 2 ) ;
prt_printf ( out , " reads: %u sectors %u " ,
atomic_read ( & ctxt - > read_ios ) ,
atomic_read ( & ctxt - > read_sectors ) ) ;
prt_newline ( out ) ;
prt_printf ( out , " writes: %u sectors %u " ,
atomic_read ( & ctxt - > write_ios ) ,
atomic_read ( & ctxt - > write_sectors ) ) ;
prt_newline ( out ) ;
printbuf_indent_add ( out , 2 ) ;
mutex_lock ( & ctxt - > lock ) ;
list_for_each_entry ( io , & ctxt - > ios , io_list ) {
bch2_write_op_to_text ( out , & io - > write . op ) ;
}
mutex_unlock ( & ctxt - > lock ) ;
printbuf_indent_sub ( out , 4 ) ;
}
void bch2_fs_moving_ctxts_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct moving_context * ctxt ;
mutex_lock ( & c - > moving_context_lock ) ;
list_for_each_entry ( ctxt , & c - > moving_context_list , list )
bch2_moving_ctxt_to_text ( out , ctxt ) ;
mutex_unlock ( & c - > moving_context_lock ) ;
}
void bch2_fs_move_init ( struct bch_fs * c )
{
INIT_LIST_HEAD ( & c - > moving_context_list ) ;
mutex_init ( & c - > moving_context_lock ) ;
INIT_LIST_HEAD ( & c - > data_progress_list ) ;
mutex_init ( & c - > data_progress_lock ) ;
}