2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2022-03-18 00:42:09 -04:00
# include "alloc_background.h"
2018-10-06 00:46:55 -04:00
# include "alloc_foreground.h"
2022-03-18 00:42:09 -04:00
# include "backpointers.h"
2020-12-17 15:08:58 -05:00
# include "bkey_buf.h"
2017-03-16 22:18:50 -08:00
# include "btree_gc.h"
# include "btree_update.h"
2019-01-21 15:32:13 -05:00
# include "btree_update_interior.h"
2022-03-18 00:42:09 -04:00
# include "btree_write_buffer.h"
2018-11-04 23:10:09 -05:00
# include "disk_groups.h"
2021-10-29 16:29:13 -04:00
# include "ec.h"
2022-07-18 19:42:58 -04:00
# include "errcode.h"
2022-03-18 00:42:09 -04:00
# include "error.h"
2017-03-16 22:18:50 -08:00
# include "inode.h"
# include "io.h"
# include "journal_reclaim.h"
# include "keylist.h"
# include "move.h"
# include "replicas.h"
# include "super-io.h"
# include "trace.h"
# include <linux/ioprio.h>
# include <linux/kthread.h>
2022-06-20 15:40:26 -04:00
static void progress_list_add ( struct bch_fs * c , struct bch_move_stats * stats )
{
mutex_lock ( & c - > data_progress_lock ) ;
list_add ( & stats - > list , & c - > data_progress_list ) ;
mutex_unlock ( & c - > data_progress_lock ) ;
}
static void progress_list_del ( struct bch_fs * c , struct bch_move_stats * stats )
{
mutex_lock ( & c - > data_progress_lock ) ;
list_del ( & stats - > list ) ;
mutex_unlock ( & c - > data_progress_lock ) ;
}
2017-03-16 22:18:50 -08:00
struct moving_io {
struct list_head list ;
struct closure cl ;
bool read_completed ;
unsigned read_sectors ;
unsigned write_sectors ;
struct bch_read_bio rbio ;
2022-06-13 19:07:19 -04:00
struct data_update write ;
2017-03-16 22:18:50 -08:00
/* Must be last since it is variable size */
struct bio_vec bi_inline_vecs [ 0 ] ;
} ;
2022-10-29 02:47:33 -04:00
static void move_free ( struct moving_io * io )
2017-03-16 22:18:50 -08:00
{
struct moving_context * ctxt = io - > write . ctxt ;
2022-06-20 19:43:35 -04:00
struct bch_fs * c = ctxt - > c ;
2017-03-16 22:18:50 -08:00
2022-06-13 19:17:45 -04:00
bch2_data_update_exit ( & io - > write ) ;
2017-03-16 22:18:50 -08:00
wake_up ( & ctxt - > wait ) ;
2023-02-09 12:21:45 -05:00
bch2_write_ref_put ( c , BCH_WRITE_REF_move ) ;
2017-03-16 22:18:50 -08:00
kfree ( io ) ;
}
2022-10-29 02:47:33 -04:00
static void move_write_done ( struct bch_write_op * op )
2017-03-16 22:18:50 -08:00
{
2022-10-29 02:47:33 -04:00
struct moving_io * io = container_of ( op , struct moving_io , write . op ) ;
struct moving_context * ctxt = io - > write . ctxt ;
2017-03-16 22:18:50 -08:00
2022-03-18 00:42:09 -04:00
if ( io - > write . op . error )
ctxt - > write_error = true ;
2017-03-16 22:18:50 -08:00
atomic_sub ( io - > write_sectors , & io - > write . ctxt - > write_sectors ) ;
2022-10-29 02:47:33 -04:00
move_free ( io ) ;
closure_put ( & ctxt - > cl ) ;
2017-03-16 22:18:50 -08:00
}
2022-10-29 02:47:33 -04:00
static void move_write ( struct moving_io * io )
2017-03-16 22:18:50 -08:00
{
if ( unlikely ( io - > rbio . bio . bi_status | | io - > rbio . hole ) ) {
2022-10-29 02:47:33 -04:00
move_free ( io ) ;
2017-03-16 22:18:50 -08:00
return ;
}
2022-10-29 02:47:33 -04:00
closure_get ( & io - > write . ctxt - > cl ) ;
2017-03-16 22:18:50 -08:00
atomic_add ( io - > write_sectors , & io - > write . ctxt - > write_sectors ) ;
2022-10-29 02:47:33 -04:00
2022-06-13 19:17:45 -04:00
bch2_data_update_read_done ( & io - > write , io - > rbio . pick . crc ) ;
2017-03-16 22:18:50 -08:00
}
2023-01-02 17:53:02 -05:00
struct moving_io * bch2_moving_ctxt_next_pending_write ( struct moving_context * ctxt )
2017-03-16 22:18:50 -08:00
{
struct moving_io * io =
list_first_entry_or_null ( & ctxt - > reads , struct moving_io , list ) ;
return io & & io - > read_completed ? io : NULL ;
}
static void move_read_endio ( struct bio * bio )
{
struct moving_io * io = container_of ( bio , struct moving_io , rbio . bio ) ;
struct moving_context * ctxt = io - > write . ctxt ;
atomic_sub ( io - > read_sectors , & ctxt - > read_sectors ) ;
io - > read_completed = true ;
2022-02-21 13:22:11 -05:00
wake_up ( & ctxt - > wait ) ;
2017-03-16 22:18:50 -08:00
closure_put ( & ctxt - > cl ) ;
}
2023-01-02 17:53:02 -05:00
void bch2_moving_ctxt_do_pending_writes ( struct moving_context * ctxt ,
struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
struct moving_io * io ;
2022-02-15 23:40:30 -05:00
if ( trans )
bch2_trans_unlock ( trans ) ;
2023-01-02 17:53:02 -05:00
while ( ( io = bch2_moving_ctxt_next_pending_write ( ctxt ) ) ) {
2017-03-16 22:18:50 -08:00
list_del ( & io - > list ) ;
2022-10-29 02:47:33 -04:00
move_write ( io ) ;
2017-03-16 22:18:50 -08:00
}
}
2022-02-15 23:40:30 -05:00
static void bch2_move_ctxt_wait_for_io ( struct moving_context * ctxt ,
struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
unsigned sectors_pending = atomic_read ( & ctxt - > write_sectors ) ;
2022-02-15 23:40:30 -05:00
move_ctxt_wait_event ( ctxt , trans ,
2017-03-16 22:18:50 -08:00
! atomic_read ( & ctxt - > write_sectors ) | |
atomic_read ( & ctxt - > write_sectors ) ! = sectors_pending ) ;
}
2022-06-20 15:40:26 -04:00
void bch2_moving_ctxt_exit ( struct moving_context * ctxt )
{
move_ctxt_wait_event ( ctxt , NULL , list_empty ( & ctxt - > reads ) ) ;
closure_sync ( & ctxt - > cl ) ;
EBUG_ON ( atomic_read ( & ctxt - > write_sectors ) ) ;
2022-06-29 17:14:06 -04:00
if ( ctxt - > stats ) {
progress_list_del ( ctxt - > c , ctxt - > stats ) ;
trace_move_data ( ctxt - > c ,
atomic64_read ( & ctxt - > stats - > sectors_moved ) ,
atomic64_read ( & ctxt - > stats - > keys_moved ) ) ;
}
2022-06-20 15:40:26 -04:00
}
void bch2_moving_ctxt_init ( struct moving_context * ctxt ,
struct bch_fs * c ,
struct bch_ratelimit * rate ,
struct bch_move_stats * stats ,
struct write_point_specifier wp ,
bool wait_on_copygc )
{
memset ( ctxt , 0 , sizeof ( * ctxt ) ) ;
ctxt - > c = c ;
ctxt - > rate = rate ;
ctxt - > stats = stats ;
ctxt - > wp = wp ;
ctxt - > wait_on_copygc = wait_on_copygc ;
closure_init_stack ( & ctxt - > cl ) ;
INIT_LIST_HEAD ( & ctxt - > reads ) ;
init_waitqueue_head ( & ctxt - > wait ) ;
2022-06-29 17:14:06 -04:00
if ( stats ) {
progress_list_add ( c , stats ) ;
2022-06-20 15:40:26 -04:00
stats - > data_type = BCH_DATA_user ;
2022-06-29 17:14:06 -04:00
}
2022-06-20 15:40:26 -04:00
}
2022-11-13 20:01:42 -05:00
void bch2_move_stats_init ( struct bch_move_stats * stats , char * name )
2022-06-20 15:40:26 -04:00
{
memset ( stats , 0 , sizeof ( * stats ) ) ;
scnprintf ( stats - > name , sizeof ( stats - > name ) , " %s " , name ) ;
}
2022-10-09 03:32:17 -04:00
static int bch2_extent_drop_ptrs ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k ,
struct data_update_opts data_opts )
{
struct bch_fs * c = trans - > c ;
struct bkey_i * n ;
int ret ;
2022-11-23 22:13:19 -05:00
n = bch2_bkey_make_mut ( trans , k ) ;
2022-10-09 03:32:17 -04:00
ret = PTR_ERR_OR_ZERO ( n ) ;
if ( ret )
return ret ;
while ( data_opts . kill_ptrs ) {
unsigned i = 0 , drop = __fls ( data_opts . kill_ptrs ) ;
struct bch_extent_ptr * ptr ;
bch2_bkey_drop_ptrs ( bkey_i_to_s ( n ) , ptr , i + + = = drop ) ;
data_opts . kill_ptrs ^ = 1U < < drop ;
}
/*
* If the new extent no longer has any pointers , bch2_extent_normalize ( )
* will do the appropriate thing with it ( turning it into a
* KEY_TYPE_error key , or just a discard if it was a cached extent )
*/
bch2_extent_normalize ( c , bkey_i_to_s ( n ) ) ;
/*
* Since we ' re not inserting through an extent iterator
* ( BTREE_ITER_ALL_SNAPSHOTS iterators aren ' t extent iterators ) ,
* we aren ' t using the extent overwrite path to delete , we ' re
* just using the normal key deletion path :
*/
if ( bkey_deleted ( & n - > k ) )
n - > k . size = 0 ;
return bch2_trans_update ( trans , iter , n , BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE ) ? :
bch2_trans_commit ( trans , NULL , NULL , BTREE_INSERT_NOFAIL ) ;
}
2020-10-16 21:39:16 -04:00
static int bch2_move_extent ( struct btree_trans * trans ,
2022-10-09 03:32:17 -04:00
struct btree_iter * iter ,
2017-03-16 22:18:50 -08:00
struct moving_context * ctxt ,
struct bch_io_opts io_opts ,
2019-08-16 09:59:56 -04:00
enum btree_id btree_id ,
2019-07-25 13:52:14 -04:00
struct bkey_s_c k ,
2022-06-13 19:17:45 -04:00
struct data_update_opts data_opts )
2017-03-16 22:18:50 -08:00
{
2020-10-16 21:39:16 -04:00
struct bch_fs * c = trans - > c ;
2019-07-25 13:52:14 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2017-03-16 22:18:50 -08:00
struct moving_io * io ;
2018-09-27 21:08:39 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2019-07-25 13:52:14 -04:00
unsigned sectors = k . k - > size , pages ;
2017-03-16 22:18:50 -08:00
int ret = - ENOMEM ;
2022-10-09 03:32:17 -04:00
bch2_data_update_opts_normalize ( k , & data_opts ) ;
if ( ! data_opts . rewrite_ptrs & &
! data_opts . extra_replicas ) {
if ( data_opts . kill_ptrs )
return bch2_extent_drop_ptrs ( trans , iter , k , data_opts ) ;
return 0 ;
}
2023-02-09 12:21:45 -05:00
if ( ! bch2_write_ref_tryget ( c , BCH_WRITE_REF_move ) )
2022-12-11 20:37:11 -05:00
return - BCH_ERR_erofs_no_writes ;
2022-06-20 19:43:35 -04:00
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-02 17:12:00 -04:00
/*
* Before memory allocations & taking nocow locks in
* bch2_data_update_init ( ) :
*/
bch2_trans_unlock ( trans ) ;
2017-03-16 22:18:50 -08:00
/* write path might have to decompress data: */
2019-07-25 13:52:14 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
2018-09-27 21:08:39 -04:00
sectors = max_t ( unsigned , sectors , p . crc . uncompressed_size ) ;
2017-03-16 22:18:50 -08:00
pages = DIV_ROUND_UP ( sectors , PAGE_SECTORS ) ;
io = kzalloc ( sizeof ( struct moving_io ) +
sizeof ( struct bio_vec ) * pages , GFP_KERNEL ) ;
if ( ! io )
goto err ;
io - > write . ctxt = ctxt ;
2019-07-25 13:52:14 -04:00
io - > read_sectors = k . k - > size ;
io - > write_sectors = k . k - > size ;
2017-03-16 22:18:50 -08:00
bio_init ( & io - > write . op . wbio . bio , NULL , io - > bi_inline_vecs , pages , 0 ) ;
bio_set_prio ( & io - > write . op . wbio . bio ,
IOPRIO_PRIO_VALUE ( IOPRIO_CLASS_IDLE , 0 ) ) ;
if ( bch2_bio_alloc_pages ( & io - > write . op . wbio . bio , sectors < < 9 ,
GFP_KERNEL ) )
goto err_free ;
2019-09-07 13:16:41 -04:00
io - > rbio . c = c ;
io - > rbio . opts = io_opts ;
2017-03-16 22:18:50 -08:00
bio_init ( & io - > rbio . bio , NULL , io - > bi_inline_vecs , pages , 0 ) ;
io - > rbio . bio . bi_vcnt = pages ;
bio_set_prio ( & io - > rbio . bio , IOPRIO_PRIO_VALUE ( IOPRIO_CLASS_IDLE , 0 ) ) ;
io - > rbio . bio . bi_iter . bi_size = sectors < < 9 ;
io - > rbio . bio . bi_opf = REQ_OP_READ ;
2019-07-25 13:52:14 -04:00
io - > rbio . bio . bi_iter . bi_sector = bkey_start_offset ( k . k ) ;
2017-03-16 22:18:50 -08:00
io - > rbio . bio . bi_end_io = move_read_endio ;
2023-01-02 17:53:02 -05:00
ret = bch2_data_update_init ( trans , ctxt , & io - > write , ctxt - > wp ,
io_opts , data_opts , btree_id , k ) ;
2022-11-14 01:31:10 -05:00
if ( ret & & ret ! = - BCH_ERR_unwritten_extent_update )
2017-03-16 22:18:50 -08:00
goto err_free_pages ;
2022-06-13 19:17:45 -04:00
io - > write . ctxt = ctxt ;
2022-10-29 02:47:33 -04:00
io - > write . op . end_io = move_write_done ;
2017-03-16 22:18:50 -08:00
atomic64_inc ( & ctxt - > stats - > keys_moved ) ;
2019-07-25 13:52:14 -04:00
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_moved ) ;
2022-11-14 01:31:10 -05:00
if ( ret = = - BCH_ERR_unwritten_extent_update ) {
bch2_update_unwritten_extent ( trans , & io - > write ) ;
move_free ( io ) ;
return 0 ;
}
BUG_ON ( ret ) ;
2022-03-15 21:36:33 +13:00
this_cpu_add ( c - > counters [ BCH_COUNTER_io_move ] , k . k - > size ) ;
2022-08-27 12:48:36 -04:00
this_cpu_add ( c - > counters [ BCH_COUNTER_move_extent_read ] , k . k - > size ) ;
trace_move_extent_read ( k . k ) ;
2017-03-16 22:18:50 -08:00
atomic_add ( io - > read_sectors , & ctxt - > read_sectors ) ;
list_add_tail ( & io - > list , & ctxt - > reads ) ;
/*
* dropped by move_read_endio ( ) - guards against use after free of
* ctxt when doing wakeup
*/
closure_get ( & ctxt - > cl ) ;
2021-03-14 21:30:08 -04:00
bch2_read_extent ( trans , & io - > rbio ,
bkey_start_pos ( k . k ) ,
btree_id , k , 0 ,
2017-03-16 22:18:50 -08:00
BCH_READ_NODECODE |
BCH_READ_LAST_FRAGMENT ) ;
return 0 ;
err_free_pages :
bio_free_pages ( & io - > write . op . wbio . bio ) ;
err_free :
kfree ( io ) ;
err :
2023-02-09 12:21:45 -05:00
bch2_write_ref_put ( c , BCH_WRITE_REF_move ) ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , move_extent_alloc_mem_fail , k . k ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
2021-03-16 18:08:10 -04:00
static int lookup_inode ( struct btree_trans * trans , struct bpos pos ,
struct bch_inode_unpacked * inode )
{
2021-08-30 15:18:31 -04:00
struct btree_iter iter ;
2021-03-16 18:08:10 -04:00
struct bkey_s_c k ;
int ret ;
2021-08-30 15:18:31 -04:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_inodes , pos ,
BTREE_ITER_ALL_SNAPSHOTS ) ;
k = bch2_btree_iter_peek ( & iter ) ;
2021-03-16 18:08:10 -04:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2022-11-24 03:12:22 -05:00
if ( ! k . k | | ! bkey_eq ( k . k - > p , pos ) ) {
2021-05-23 18:42:51 -04:00
ret = - ENOENT ;
goto err ;
}
2021-10-29 21:14:23 -04:00
ret = bkey_is_inode ( k . k ) ? 0 : - EIO ;
2021-03-16 18:08:10 -04:00
if ( ret )
goto err ;
2021-10-29 21:14:23 -04:00
ret = bch2_inode_unpack ( k , inode ) ;
2021-03-16 18:08:10 -04:00
if ( ret )
goto err ;
err :
2021-08-30 15:18:31 -04:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-03-16 18:08:10 -04:00
return ret ;
}
2022-06-16 02:06:43 +12:00
static int move_ratelimit ( struct btree_trans * trans ,
2022-06-20 15:40:26 -04:00
struct moving_context * ctxt )
2022-06-16 02:06:43 +12:00
{
struct bch_fs * c = trans - > c ;
u64 delay ;
2022-06-20 15:40:26 -04:00
if ( ctxt - > wait_on_copygc ) {
2022-06-16 02:06:43 +12:00
bch2_trans_unlock ( trans ) ;
wait_event_killable ( c - > copygc_running_wq ,
! c - > copygc_running | |
kthread_should_stop ( ) ) ;
}
do {
2022-06-20 15:40:26 -04:00
delay = ctxt - > rate ? bch2_ratelimit_delay ( ctxt - > rate ) : 0 ;
2022-06-16 02:06:43 +12:00
if ( delay ) {
bch2_trans_unlock ( trans ) ;
set_current_state ( TASK_INTERRUPTIBLE ) ;
}
if ( ( current - > flags & PF_KTHREAD ) & & kthread_should_stop ( ) ) {
__set_current_state ( TASK_RUNNING ) ;
return 1 ;
}
if ( delay )
schedule_timeout ( delay ) ;
if ( unlikely ( freezing ( current ) ) ) {
move_ctxt_wait_event ( ctxt , trans , list_empty ( & ctxt - > reads ) ) ;
try_to_freeze ( ) ;
}
} while ( delay ) ;
move_ctxt_wait_event ( ctxt , trans ,
atomic_read ( & ctxt - > write_sectors ) <
c - > opts . move_bytes_in_flight > > 9 ) ;
move_ctxt_wait_event ( ctxt , trans ,
atomic_read ( & ctxt - > read_sectors ) <
c - > opts . move_bytes_in_flight > > 9 ) ;
return 0 ;
}
2022-03-18 00:42:09 -04:00
static int move_get_io_opts ( struct btree_trans * trans ,
struct bch_io_opts * io_opts ,
struct bkey_s_c k , u64 * cur_inum )
{
struct bch_inode_unpacked inode ;
int ret ;
if ( * cur_inum = = k . k - > p . inode )
return 0 ;
ret = lookup_inode ( trans ,
SPOS ( 0 , k . k - > p . inode , k . k - > p . snapshot ) ,
& inode ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ret ;
if ( ! ret )
bch2_inode_opts_get ( io_opts , trans - > c , & inode ) ;
else
* io_opts = bch2_opts_to_inode_opts ( trans - > c - > opts ) ;
* cur_inum = k . k - > p . inode ;
return 0 ;
}
2022-06-20 15:40:26 -04:00
static int __bch2_move_data ( struct moving_context * ctxt ,
2022-06-16 02:06:43 +12:00
struct bpos start ,
struct bpos end ,
move_pred_fn pred , void * arg ,
2022-06-20 15:40:26 -04:00
enum btree_id btree_id )
2017-03-16 22:18:50 -08:00
{
2022-06-20 15:40:26 -04:00
struct bch_fs * c = ctxt - > c ;
2017-03-16 22:18:50 -08:00
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
2020-12-17 15:08:58 -05:00
struct bkey_buf sk ;
2019-03-25 15:10:15 -04:00
struct btree_trans trans ;
2021-08-30 15:18:31 -04:00
struct btree_iter iter ;
2017-03-16 22:18:50 -08:00
struct bkey_s_c k ;
2022-06-13 19:17:45 -04:00
struct data_update_opts data_opts ;
2022-06-16 02:06:43 +12:00
u64 cur_inum = U64_MAX ;
2017-03-16 22:18:50 -08:00
int ret = 0 , ret2 ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_init ( & sk ) ;
2019-05-15 10:54:43 -04:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2019-03-25 15:10:15 -04:00
2022-06-20 15:40:26 -04:00
ctxt - > stats - > data_type = BCH_DATA_user ;
ctxt - > stats - > btree_id = btree_id ;
ctxt - > stats - > pos = start ;
2019-03-25 15:10:15 -04:00
2021-08-30 15:18:31 -04:00
bch2_trans_iter_init ( & trans , & iter , btree_id , start ,
2021-03-16 00:28:17 -04:00
BTREE_ITER_PREFETCH |
BTREE_ITER_ALL_SNAPSHOTS ) ;
2017-03-16 22:18:50 -08:00
2022-06-20 15:40:26 -04:00
if ( ctxt - > rate )
bch2_ratelimit_reset ( ctxt - > rate ) ;
2017-03-16 22:18:50 -08:00
2022-06-20 15:40:26 -04:00
while ( ! move_ratelimit ( & trans , ctxt ) ) {
2021-07-24 20:24:10 -04:00
bch2_trans_begin ( & trans ) ;
2021-08-30 15:18:31 -04:00
k = bch2_btree_iter_peek ( & iter ) ;
2017-03-16 22:18:50 -08:00
if ( ! k . k )
break ;
2022-01-09 20:52:10 -05:00
2019-03-27 22:03:30 -04:00
ret = bkey_err ( k ) ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2022-01-09 20:52:10 -05:00
continue ;
2017-03-16 22:18:50 -08:00
if ( ret )
break ;
2022-01-09 20:52:10 -05:00
2022-11-24 03:12:22 -05:00
if ( bkey_ge ( bkey_start_pos ( k . k ) , end ) )
2017-03-16 22:18:50 -08:00
break ;
2022-06-20 15:40:26 -04:00
ctxt - > stats - > pos = iter . pos ;
2022-01-09 20:52:10 -05:00
2019-09-07 16:13:20 -04:00
if ( ! bkey_extent_is_direct_data ( k . k ) )
2017-03-16 22:18:50 -08:00
goto next_nondata ;
2022-03-18 00:42:09 -04:00
ret = move_get_io_opts ( & trans , & io_opts , k , & cur_inum ) ;
if ( ret )
continue ;
2017-03-16 22:18:50 -08:00
2022-06-13 19:17:45 -04:00
memset ( & data_opts , 0 , sizeof ( data_opts ) ) ;
if ( ! pred ( c , arg , k , & io_opts , & data_opts ) )
2017-03-16 22:18:50 -08:00
goto next ;
2022-02-15 00:06:59 -05:00
/*
* The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @ k elsewhere :
2022-10-19 18:31:33 -04:00
*/
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
2019-11-09 16:01:15 -05:00
k = bkey_i_to_s_c ( sk . k ) ;
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-02 17:12:00 -04:00
bch2_trans_unlock ( & trans ) ;
2017-03-16 22:18:50 -08:00
2022-10-09 03:32:17 -04:00
ret2 = bch2_move_extent ( & trans , & iter , ctxt , io_opts ,
2022-06-13 19:17:45 -04:00
btree_id , k , data_opts ) ;
2017-03-16 22:18:50 -08:00
if ( ret2 ) {
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret2 , BCH_ERR_transaction_restart ) )
2020-12-20 21:42:19 -05:00
continue ;
2017-03-16 22:18:50 -08:00
if ( ret2 = = - ENOMEM ) {
/* memory allocation failure, wait for some IO to finish */
2022-02-15 23:40:30 -05:00
bch2_move_ctxt_wait_for_io ( ctxt , & trans ) ;
2017-03-16 22:18:50 -08:00
continue ;
}
/* XXX signal failure */
goto next ;
}
2022-06-20 15:40:26 -04:00
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate , k . k - > size ) ;
2017-03-16 22:18:50 -08:00
next :
2022-06-20 15:40:26 -04:00
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_seen ) ;
2017-03-16 22:18:50 -08:00
next_nondata :
2021-08-30 15:18:31 -04:00
bch2_btree_iter_advance ( & iter ) ;
2017-03-16 22:18:50 -08:00
}
2021-03-19 20:29:11 -04:00
2021-08-30 15:18:31 -04:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-10-19 15:08:00 -04:00
bch2_trans_exit ( & trans ) ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_exit ( & sk , c ) ;
2019-08-16 09:59:56 -04:00
return ret ;
}
int bch2_move_data ( struct bch_fs * c ,
2021-03-14 19:01:14 -04:00
enum btree_id start_btree_id , struct bpos start_pos ,
enum btree_id end_btree_id , struct bpos end_pos ,
2019-08-16 09:59:56 -04:00
struct bch_ratelimit * rate ,
2022-06-16 02:06:43 +12:00
struct bch_move_stats * stats ,
2022-06-20 15:40:26 -04:00
struct write_point_specifier wp ,
bool wait_on_copygc ,
move_pred_fn pred , void * arg )
2019-08-16 09:59:56 -04:00
{
2022-06-20 15:40:26 -04:00
struct moving_context ctxt ;
2021-03-14 19:01:14 -04:00
enum btree_id id ;
2019-08-16 09:59:56 -04:00
int ret ;
2022-06-20 15:40:26 -04:00
bch2_moving_ctxt_init ( & ctxt , c , rate , stats , wp , wait_on_copygc ) ;
2019-08-16 09:59:56 -04:00
2021-03-14 19:01:14 -04:00
for ( id = start_btree_id ;
id < = min_t ( unsigned , end_btree_id , BTREE_ID_NR - 1 ) ;
id + + ) {
stats - > btree_id = id ;
2021-02-20 19:27:37 -05:00
if ( id ! = BTREE_ID_extents & &
id ! = BTREE_ID_reflink )
2021-03-14 19:01:14 -04:00
continue ;
2022-06-20 15:40:26 -04:00
ret = __bch2_move_data ( & ctxt ,
2021-03-14 19:01:14 -04:00
id = = start_btree_id ? start_pos : POS_MIN ,
id = = end_btree_id ? end_pos : POS_MAX ,
2022-06-20 15:40:26 -04:00
pred , arg , id ) ;
2021-03-14 19:01:14 -04:00
if ( ret )
break ;
}
2022-06-20 15:40:26 -04:00
bch2_moving_ctxt_exit ( & ctxt ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
2022-03-18 00:42:09 -04:00
static int verify_bucket_evacuated ( struct btree_trans * trans , struct bpos bucket , int gen )
{
struct bch_fs * c = trans - > c ;
struct btree_iter iter ;
struct bkey_s_c k ;
struct printbuf buf = PRINTBUF ;
struct bch_backpointer bp ;
u64 bp_offset = 0 ;
int ret ;
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc ,
bucket , BTREE_ITER_CACHED ) ;
again :
k = bch2_btree_iter_peek_slot ( & iter ) ;
ret = bkey_err ( k ) ;
if ( ! ret & & k . k - > type = = KEY_TYPE_alloc_v4 ) {
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4 ( k ) ;
if ( a . v - > gen = = gen & &
a . v - > dirty_sectors ) {
if ( a . v - > data_type = = BCH_DATA_btree ) {
bch2_trans_unlock ( trans ) ;
if ( bch2_btree_interior_updates_flush ( c ) )
goto again ;
goto failed_to_evacuate ;
}
}
}
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
failed_to_evacuate :
bch2_trans_iter_exit ( trans , & iter ) ;
prt_printf ( & buf , bch2_log_msg ( c , " failed to evacuate bucket " ) ) ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
while ( 1 ) {
bch2_trans_begin ( trans ) ;
ret = bch2_get_next_backpointer ( trans , bucket , gen ,
2022-10-14 07:02:36 -04:00
& bp_offset , & bp ,
BTREE_ITER_CACHED ) ;
2022-03-18 00:42:09 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
break ;
if ( bp_offset = = U64_MAX )
break ;
k = bch2_backpointer_get_key ( trans , & iter ,
bucket , bp_offset , bp ) ;
ret = bkey_err ( k ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
break ;
if ( ! k . k )
continue ;
prt_newline ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
}
bch2_print_string_as_lines ( KERN_ERR , buf . buf ) ;
printbuf_exit ( & buf ) ;
return 0 ;
}
int __bch2_evacuate_bucket ( struct moving_context * ctxt ,
struct bpos bucket , int gen ,
struct data_update_opts _data_opts )
{
struct bch_fs * c = ctxt - > c ;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_buf sk ;
struct bch_backpointer bp ;
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
struct bkey_s_c k ;
struct data_update_opts data_opts ;
unsigned dirty_sectors , bucket_size ;
u64 bp_offset = 0 , cur_inum = U64_MAX ;
int ret = 0 ;
bch2_bkey_buf_init ( & sk ) ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
bch2_trans_iter_init ( & trans , & iter , BTREE_ID_alloc ,
bucket , BTREE_ITER_CACHED ) ;
ret = lockrestart_do ( & trans ,
bkey_err ( k = bch2_btree_iter_peek_slot ( & iter ) ) ) ;
bch2_trans_iter_exit ( & trans , & iter ) ;
if ( ret ) {
bch_err ( c , " %s: error looking up alloc key: %s " , __func__ , bch2_err_str ( ret ) ) ;
goto err ;
}
a = bch2_alloc_to_v4 ( k , & a_convert ) ;
dirty_sectors = a - > dirty_sectors ;
bucket_size = bch_dev_bkey_exists ( c , bucket . inode ) - > mi . bucket_size ;
ret = bch2_btree_write_buffer_flush ( & trans ) ;
if ( ret ) {
bch_err ( c , " %s: error flushing btree write buffer: %s " , __func__ , bch2_err_str ( ret ) ) ;
goto err ;
}
while ( ! ( ret = move_ratelimit ( & trans , ctxt ) ) ) {
bch2_trans_begin ( & trans ) ;
ret = bch2_get_next_backpointer ( & trans , bucket , gen ,
2022-10-14 07:02:36 -04:00
& bp_offset , & bp ,
BTREE_ITER_CACHED ) ;
2022-03-18 00:42:09 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( bp_offset = = U64_MAX )
break ;
if ( ! bp . level ) {
const struct bch_extent_ptr * ptr ;
struct bkey_s_c k ;
unsigned i = 0 ;
k = bch2_backpointer_get_key ( & trans , & iter ,
bucket , bp_offset , bp ) ;
ret = bkey_err ( k ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ! k . k )
goto next ;
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
k = bkey_i_to_s_c ( sk . k ) ;
ret = move_get_io_opts ( & trans , & io_opts , k , & cur_inum ) ;
if ( ret ) {
bch2_trans_iter_exit ( & trans , & iter ) ;
continue ;
}
data_opts = _data_opts ;
data_opts . target = io_opts . background_target ;
data_opts . rewrite_ptrs = 0 ;
bkey_for_each_ptr ( bch2_bkey_ptrs_c ( k ) , ptr ) {
if ( ptr - > dev = = bucket . inode )
data_opts . rewrite_ptrs | = 1U < < i ;
i + + ;
}
ret = bch2_move_extent ( & trans , & iter , ctxt , io_opts ,
bp . btree_id , k , data_opts ) ;
bch2_trans_iter_exit ( & trans , & iter ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret = = - ENOMEM ) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io ( ctxt , & trans ) ;
continue ;
}
if ( ret )
goto err ;
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate , k . k - > size ) ;
atomic64_add ( k . k - > size , & ctxt - > stats - > sectors_seen ) ;
} else {
struct btree * b ;
b = bch2_backpointer_get_node ( & trans , & iter ,
bucket , bp_offset , bp ) ;
ret = PTR_ERR_OR_ZERO ( b ) ;
if ( ret = = - BCH_ERR_backpointer_to_overwritten_btree_node )
continue ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ! b )
goto next ;
ret = bch2_btree_node_rewrite ( & trans , & iter , b , 0 ) ;
bch2_trans_iter_exit ( & trans , & iter ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
if ( ctxt - > rate )
bch2_ratelimit_increment ( ctxt - > rate ,
c - > opts . btree_node_size > > 9 ) ;
atomic64_add ( c - > opts . btree_node_size > > 9 , & ctxt - > stats - > sectors_seen ) ;
atomic64_add ( c - > opts . btree_node_size > > 9 , & ctxt - > stats - > sectors_moved ) ;
}
next :
bp_offset + + ;
}
trace_evacuate_bucket ( c , & bucket , dirty_sectors , bucket_size , ret ) ;
if ( IS_ENABLED ( CONFIG_BCACHEFS_DEBUG ) & & gen > = 0 ) {
bch2_trans_unlock ( & trans ) ;
move_ctxt_wait_event ( ctxt , NULL , list_empty ( & ctxt - > reads ) ) ;
closure_sync ( & ctxt - > cl ) ;
if ( ! ctxt - > write_error )
lockrestart_do ( & trans , verify_bucket_evacuated ( & trans , bucket , gen ) ) ;
}
err :
bch2_trans_exit ( & trans ) ;
bch2_bkey_buf_exit ( & sk , c ) ;
return ret ;
}
int bch2_evacuate_bucket ( struct bch_fs * c ,
struct bpos bucket , int gen ,
struct data_update_opts data_opts ,
struct bch_ratelimit * rate ,
struct bch_move_stats * stats ,
struct write_point_specifier wp ,
bool wait_on_copygc )
{
struct moving_context ctxt ;
int ret ;
bch2_moving_ctxt_init ( & ctxt , c , rate , stats , wp , wait_on_copygc ) ;
ret = __bch2_evacuate_bucket ( & ctxt , bucket , gen , data_opts ) ;
bch2_moving_ctxt_exit ( & ctxt ) ;
return ret ;
}
2022-06-13 19:17:45 -04:00
typedef bool ( * move_btree_pred ) ( struct bch_fs * , void * ,
struct btree * , struct bch_io_opts * ,
struct data_update_opts * ) ;
2021-03-14 19:01:14 -04:00
2017-03-16 22:18:50 -08:00
static int bch2_move_btree ( struct bch_fs * c ,
2021-03-14 19:01:14 -04:00
enum btree_id start_btree_id , struct bpos start_pos ,
enum btree_id end_btree_id , struct bpos end_pos ,
move_btree_pred pred , void * arg ,
2017-03-16 22:18:50 -08:00
struct bch_move_stats * stats )
{
2021-03-14 19:01:14 -04:00
bool kthread = ( current - > flags & PF_KTHREAD ) ! = 0 ;
2017-03-16 22:18:50 -08:00
struct bch_io_opts io_opts = bch2_opts_to_inode_opts ( c - > opts ) ;
2019-03-25 15:10:15 -04:00
struct btree_trans trans ;
2021-08-30 15:18:31 -04:00
struct btree_iter iter ;
2017-03-16 22:18:50 -08:00
struct btree * b ;
2021-03-14 19:01:14 -04:00
enum btree_id id ;
2022-06-13 19:17:45 -04:00
struct data_update_opts data_opts ;
2017-03-16 22:18:50 -08:00
int ret = 0 ;
2019-05-15 10:54:43 -04:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-07-23 13:57:19 -06:00
progress_list_add ( c , stats ) ;
2019-03-25 15:10:15 -04:00
2020-07-09 18:28:11 -04:00
stats - > data_type = BCH_DATA_btree ;
2017-03-16 22:18:50 -08:00
2021-03-14 19:01:14 -04:00
for ( id = start_btree_id ;
id < = min_t ( unsigned , end_btree_id , BTREE_ID_NR - 1 ) ;
id + + ) {
2019-03-25 15:10:15 -04:00
stats - > btree_id = id ;
2021-10-07 18:08:01 -04:00
bch2_trans_node_iter_init ( & trans , & iter , id , POS_MIN , 0 , 0 ,
BTREE_ITER_PREFETCH ) ;
2021-10-19 14:20:50 -04:00
retry :
2021-10-19 15:11:45 -04:00
ret = 0 ;
2021-10-07 18:08:01 -04:00
while ( bch2_trans_begin ( & trans ) ,
2021-10-19 14:20:50 -04:00
( b = bch2_btree_iter_peek_node ( & iter ) ) & &
! ( ret = PTR_ERR_OR_ZERO ( b ) ) ) {
2021-03-14 19:01:14 -04:00
if ( kthread & & kthread_should_stop ( ) )
2021-04-20 20:21:39 -04:00
break ;
2021-03-14 19:01:14 -04:00
if ( ( cmp_int ( id , end_btree_id ) ? :
2021-07-05 22:02:07 -04:00
bpos_cmp ( b - > key . k . p , end_pos ) ) > 0 )
2021-03-14 19:01:14 -04:00
break ;
2021-08-30 15:18:31 -04:00
stats - > pos = iter . pos ;
2019-03-25 15:10:15 -04:00
2022-06-13 19:17:45 -04:00
if ( ! pred ( c , arg , b , & io_opts , & data_opts ) )
2017-03-16 22:18:50 -08:00
goto next ;
2021-10-24 16:59:33 -04:00
ret = bch2_btree_node_rewrite ( & trans , & iter , b , 0 ) ? : ret ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-10-24 16:59:33 -04:00
continue ;
if ( ret )
break ;
2017-03-16 22:18:50 -08:00
next :
2021-10-07 18:08:01 -04:00
bch2_btree_iter_next_node ( & iter ) ;
2017-03-16 22:18:50 -08:00
}
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-10-19 14:20:50 -04:00
goto retry ;
2021-08-30 15:18:31 -04:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2017-03-16 22:18:50 -08:00
2021-04-20 20:21:39 -04:00
if ( kthread & & kthread_should_stop ( ) )
break ;
2017-03-16 22:18:50 -08:00
}
2021-04-20 20:21:39 -04:00
2019-03-25 15:10:15 -04:00
bch2_trans_exit ( & trans ) ;
2021-04-04 21:57:35 -04:00
if ( ret )
2022-07-18 19:42:58 -04:00
bch_err ( c , " error in %s(): %s " , __func__ , bch2_err_str ( ret ) ) ;
2021-04-04 21:57:35 -04:00
2022-04-17 17:30:49 -04:00
bch2_btree_interior_updates_flush ( c ) ;
2021-10-24 17:00:33 -04:00
2021-07-23 13:57:19 -06:00
progress_list_del ( c , stats ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
2022-06-13 19:17:45 -04:00
static bool rereplicate_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
unsigned nr_good = bch2_bkey_durability ( c , k ) ;
2021-10-12 14:15:45 -04:00
unsigned replicas = bkey_is_btree_ptr ( k . k )
? c - > opts . metadata_replicas
: io_opts - > data_replicas ;
2017-03-16 22:18:50 -08:00
if ( ! nr_good | | nr_good > = replicas )
2022-06-13 19:17:45 -04:00
return false ;
2017-03-16 22:18:50 -08:00
data_opts - > target = 0 ;
2022-06-13 19:17:45 -04:00
data_opts - > extra_replicas = replicas - nr_good ;
2018-11-01 15:10:01 -04:00
data_opts - > btree_insert_flags = 0 ;
2022-06-13 19:17:45 -04:00
return true ;
2017-03-16 22:18:50 -08:00
}
2022-06-13 19:17:45 -04:00
static bool migrate_pred ( struct bch_fs * c , void * arg ,
struct bkey_s_c k ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2017-03-16 22:18:50 -08:00
{
2022-06-13 19:17:45 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
2017-03-16 22:18:50 -08:00
struct bch_ioctl_data * op = arg ;
2022-06-13 19:17:45 -04:00
unsigned i = 0 ;
2017-03-16 22:18:50 -08:00
2022-06-13 19:17:45 -04:00
data_opts - > rewrite_ptrs = 0 ;
2017-03-16 22:18:50 -08:00
data_opts - > target = 0 ;
2022-06-13 19:17:45 -04:00
data_opts - > extra_replicas = 0 ;
2017-03-16 22:18:50 -08:00
data_opts - > btree_insert_flags = 0 ;
2022-06-13 19:17:45 -04:00
bkey_for_each_ptr ( ptrs , ptr ) {
if ( ptr - > dev = = op - > migrate . dev )
data_opts - > rewrite_ptrs | = 1U < < i ;
i + + ;
}
2022-10-19 18:31:33 -04:00
return data_opts - > rewrite_ptrs ! = 0 ;
2017-03-16 22:18:50 -08:00
}
2022-06-13 19:17:45 -04:00
static bool rereplicate_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-14 19:01:14 -04:00
{
return rereplicate_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2022-06-13 19:17:45 -04:00
static bool migrate_btree_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-14 19:01:14 -04:00
{
return migrate_pred ( c , arg , bkey_i_to_s_c ( & b - > key ) , io_opts , data_opts ) ;
}
2021-03-20 23:55:36 -04:00
static bool bformat_needs_redo ( struct bkey_format * f )
{
unsigned i ;
for ( i = 0 ; i < f - > nr_fields ; i + + ) {
unsigned unpacked_bits = bch2_bkey_format_current . bits_per_field [ i ] ;
u64 unpacked_mask = ~ ( ( ~ 0ULL < < 1 ) < < ( unpacked_bits - 1 ) ) ;
u64 field_offset = le64_to_cpu ( f - > field_offset [ i ] ) ;
if ( f - > bits_per_field [ i ] > unpacked_bits )
return true ;
if ( ( f - > bits_per_field [ i ] = = unpacked_bits ) & & field_offset )
return true ;
if ( ( ( field_offset + ( ( 1ULL < < f - > bits_per_field [ i ] ) - 1 ) ) &
unpacked_mask ) <
field_offset )
return true ;
}
return false ;
}
2022-06-13 19:17:45 -04:00
static bool rewrite_old_nodes_pred ( struct bch_fs * c , void * arg ,
struct btree * b ,
struct bch_io_opts * io_opts ,
struct data_update_opts * data_opts )
2021-03-14 19:01:14 -04:00
{
if ( b - > version_ondisk ! = c - > sb . version | |
2021-03-20 23:55:36 -04:00
btree_node_need_rewrite ( b ) | |
bformat_needs_redo ( & b - > format ) ) {
2021-03-14 19:01:14 -04:00
data_opts - > target = 0 ;
2022-06-13 19:17:45 -04:00
data_opts - > extra_replicas = 0 ;
2021-03-14 19:01:14 -04:00
data_opts - > btree_insert_flags = 0 ;
2022-06-13 19:17:45 -04:00
return true ;
2021-03-14 19:01:14 -04:00
}
2022-06-13 19:17:45 -04:00
return false ;
2021-03-14 19:01:14 -04:00
}
2021-03-22 18:39:16 -04:00
int bch2_scan_old_btree_nodes ( struct bch_fs * c , struct bch_move_stats * stats )
{
int ret ;
ret = bch2_move_btree ( c ,
0 , POS_MIN ,
2021-07-05 22:02:07 -04:00
BTREE_ID_NR , SPOS_MAX ,
2021-03-22 18:39:16 -04:00
rewrite_old_nodes_pred , c , stats ) ;
if ( ! ret ) {
mutex_lock ( & c - > sb_lock ) ;
2021-05-23 17:04:13 -04:00
c - > disk_sb . sb - > compat [ 0 ] | = cpu_to_le64 ( 1ULL < < BCH_COMPAT_extents_above_btree_updates_done ) ;
c - > disk_sb . sb - > compat [ 0 ] | = cpu_to_le64 ( 1ULL < < BCH_COMPAT_bformat_overflow_done ) ;
2021-03-22 18:39:16 -04:00
c - > disk_sb . sb - > version_min = c - > disk_sb . sb - > version ;
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
}
return ret ;
}
2017-03-16 22:18:50 -08:00
int bch2_data_job ( struct bch_fs * c ,
struct bch_move_stats * stats ,
struct bch_ioctl_data op )
{
int ret = 0 ;
switch ( op . op ) {
case BCH_DATA_OP_REREPLICATE :
2022-11-13 20:01:42 -05:00
bch2_move_stats_init ( stats , " rereplicate " ) ;
2020-07-09 18:28:11 -04:00
stats - > data_type = BCH_DATA_journal ;
2017-03-16 22:18:50 -08:00
ret = bch2_journal_flush_device_pins ( & c - > journal , - 1 ) ;
2021-03-14 19:01:14 -04:00
ret = bch2_move_btree ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
rereplicate_btree_pred , c , stats ) ? : ret ;
2019-04-30 17:15:39 -04:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-16 22:18:50 -08:00
2021-03-14 19:01:14 -04:00
ret = bch2_move_data ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
2022-06-20 15:40:26 -04:00
NULL ,
stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
rereplicate_pred , c ) ? : ret ;
2019-04-30 17:15:39 -04:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_DATA_OP_MIGRATE :
if ( op . migrate . dev > = c - > sb . nr_devices )
return - EINVAL ;
2022-11-13 20:01:42 -05:00
bch2_move_stats_init ( stats , " migrate " ) ;
2020-07-09 18:28:11 -04:00
stats - > data_type = BCH_DATA_journal ;
2017-03-16 22:18:50 -08:00
ret = bch2_journal_flush_device_pins ( & c - > journal , op . migrate . dev ) ;
2021-03-14 19:01:14 -04:00
ret = bch2_move_btree ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
migrate_btree_pred , & op , stats ) ? : ret ;
2019-04-30 17:15:39 -04:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-16 22:18:50 -08:00
2021-03-14 19:01:14 -04:00
ret = bch2_move_data ( c ,
op . start_btree , op . start_pos ,
op . end_btree , op . end_pos ,
2022-06-20 15:40:26 -04:00
NULL ,
stats ,
writepoint_hashed ( ( unsigned long ) current ) ,
true ,
migrate_pred , & op ) ? : ret ;
2019-04-30 17:15:39 -04:00
ret = bch2_replicas_gc2 ( c ) ? : ret ;
2017-03-16 22:18:50 -08:00
break ;
2021-03-14 19:01:14 -04:00
case BCH_DATA_OP_REWRITE_OLD_NODES :
2022-11-13 20:01:42 -05:00
bch2_move_stats_init ( stats , " rewrite_old_nodes " ) ;
2021-03-22 18:39:16 -04:00
ret = bch2_scan_old_btree_nodes ( c , stats ) ;
2021-03-14 19:01:14 -04:00
break ;
2017-03-16 22:18:50 -08:00
default :
ret = - EINVAL ;
}
return ret ;
}