2023-09-10 18:05:17 -04:00
// SPDX-License-Identifier: GPL-2.0
/*
* io_misc . c - fallocate , fpunch , truncate :
*/
# include "bcachefs.h"
# include "alloc_foreground.h"
# include "bkey_buf.h"
# include "btree_update.h"
# include "buckets.h"
# include "clock.h"
2023-09-04 05:38:30 -04:00
# include "error.h"
2023-09-10 18:05:17 -04:00
# include "extents.h"
2023-09-04 05:38:30 -04:00
# include "extent_update.h"
# include "inode.h"
2023-09-10 18:05:17 -04:00
# include "io_misc.h"
# include "io_write.h"
2023-09-10 16:42:30 -04:00
# include "logged_ops.h"
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 13:33:14 -04:00
# include "rebalance.h"
2023-09-10 18:05:17 -04:00
# include "subvolume.h"
/* Overwrites whatever was present with zeroes: */
int bch2_extent_fallocate ( struct btree_trans * trans ,
subvol_inum inum ,
struct btree_iter * iter ,
2023-10-27 17:19:31 -04:00
u64 sectors ,
2023-09-10 18:05:17 -04:00
struct bch_io_opts opts ,
s64 * i_sectors_delta ,
struct write_point_specifier write_point )
{
struct bch_fs * c = trans - > c ;
struct disk_reservation disk_res = { 0 } ;
struct closure cl ;
struct open_buckets open_buckets = { 0 } ;
struct bkey_s_c k ;
struct bkey_buf old , new ;
2023-12-19 09:02:15 -05:00
unsigned sectors_allocated = 0 , new_replicas ;
2023-09-10 18:05:17 -04:00
bool unwritten = opts . nocow & &
c - > sb . version > = bcachefs_metadata_version_unwritten_extents ;
int ret ;
bch2_bkey_buf_init ( & old ) ;
bch2_bkey_buf_init ( & new ) ;
closure_init_stack ( & cl ) ;
k = bch2_btree_iter_peek_slot ( iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
return ret ;
sectors = min_t ( u64 , sectors , k . k - > p . offset - iter - > pos . offset ) ;
2023-12-19 09:02:15 -05:00
new_replicas = max ( 0 , ( int ) opts . data_replicas -
( int ) bch2_bkey_nr_ptrs_fully_allocated ( k ) ) ;
2023-09-10 18:05:17 -04:00
2023-12-19 09:02:15 -05:00
/*
* Get a disk reservation before ( in the nocow case ) calling
* into the allocator :
*/
ret = bch2_disk_reservation_get ( c , & disk_res , sectors , new_replicas , 0 ) ;
if ( unlikely ( ret ) )
goto err_noprint ;
2023-09-10 18:05:17 -04:00
2023-12-19 09:02:15 -05:00
bch2_bkey_buf_reassemble ( & old , c , k ) ;
2023-09-10 18:05:17 -04:00
2023-12-19 09:02:15 -05:00
if ( ! unwritten ) {
2023-09-10 18:05:17 -04:00
struct bkey_i_reservation * reservation ;
bch2_bkey_buf_realloc ( & new , c , sizeof ( * reservation ) / sizeof ( u64 ) ) ;
reservation = bkey_reservation_init ( new . k ) ;
reservation - > k . p = iter - > pos ;
bch2_key_resize ( & reservation - > k , sectors ) ;
reservation - > v . nr_replicas = opts . data_replicas ;
} else {
struct bkey_i_extent * e ;
struct bch_devs_list devs_have ;
struct write_point * wp ;
devs_have . nr = 0 ;
bch2_bkey_buf_realloc ( & new , c , BKEY_EXTENT_U64s_MAX ) ;
e = bkey_extent_init ( new . k ) ;
e - > k . p = iter - > pos ;
ret = bch2_alloc_sectors_start_trans ( trans ,
opts . foreground_target ,
false ,
write_point ,
& devs_have ,
opts . data_replicas ,
opts . data_replicas ,
BCH_WATERMARK_normal , 0 , & cl , & wp ) ;
if ( bch2_err_matches ( ret , BCH_ERR_operation_blocked ) )
ret = - BCH_ERR_transaction_restart_nested ;
if ( ret )
goto err ;
2023-10-27 17:19:31 -04:00
sectors = min_t ( u64 , sectors , wp - > sectors_free ) ;
2023-09-10 18:05:17 -04:00
sectors_allocated = sectors ;
bch2_key_resize ( & e - > k , sectors ) ;
bch2_open_bucket_get ( c , wp , & open_buckets ) ;
bch2_alloc_sectors_append_ptrs ( c , wp , & e - > k_i , sectors , false ) ;
bch2_alloc_sectors_done ( c , wp ) ;
extent_for_each_ptr ( extent_i_to_s ( e ) , ptr )
ptr - > unwritten = true ;
}
ret = bch2_extent_update ( trans , inum , iter , new . k , & disk_res ,
0 , i_sectors_delta , true ) ;
err :
if ( ! ret & & sectors_allocated )
bch2_increment_clock ( c , sectors_allocated , WRITE ) ;
2023-12-19 18:08:19 -05:00
if ( should_print_err ( ret ) )
bch_err_inum_offset_ratelimited ( c ,
inum . inum ,
iter - > pos . offset < < 9 ,
" %s(): error: %s " , __func__ , bch2_err_str ( ret ) ) ;
err_noprint :
2023-09-10 18:05:17 -04:00
bch2_open_buckets_put ( c , & open_buckets ) ;
bch2_disk_reservation_put ( c , & disk_res ) ;
bch2_bkey_buf_exit ( & new , c ) ;
bch2_bkey_buf_exit ( & old , c ) ;
if ( closure_nr_remaining ( & cl ) ! = 1 ) {
bch2_trans_unlock ( trans ) ;
closure_sync ( & cl ) ;
}
return ret ;
}
/*
* Returns - BCH_ERR_transacton_restart if we had to drop locks :
*/
int bch2_fpunch_at ( struct btree_trans * trans , struct btree_iter * iter ,
subvol_inum inum , u64 end ,
s64 * i_sectors_delta )
{
struct bch_fs * c = trans - > c ;
unsigned max_sectors = KEY_SIZE_MAX & ( ~ 0 < < c - > block_bits ) ;
struct bpos end_pos = POS ( inum . inum , end ) ;
struct bkey_s_c k ;
int ret = 0 , ret2 = 0 ;
u32 snapshot ;
while ( ! ret | |
bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ) {
struct disk_reservation disk_res =
bch2_disk_reservation_init ( c , 0 ) ;
struct bkey_i delete ;
if ( ret )
ret2 = ret ;
bch2_trans_begin ( trans ) ;
ret = bch2_subvolume_get_snapshot ( trans , inum . subvol , & snapshot ) ;
if ( ret )
continue ;
bch2_btree_iter_set_snapshot ( iter , snapshot ) ;
/*
* peek_upto ( ) doesn ' t have ideal semantics for extents :
*/
k = bch2_btree_iter_peek_upto ( iter , end_pos ) ;
if ( ! k . k )
break ;
ret = bkey_err ( k ) ;
if ( ret )
continue ;
bkey_init ( & delete . k ) ;
delete . k . p = iter - > pos ;
/* create the biggest key we can */
bch2_key_resize ( & delete . k , max_sectors ) ;
bch2_cut_back ( end_pos , & delete ) ;
ret = bch2_extent_update ( trans , inum , iter , & delete ,
& disk_res , 0 , i_sectors_delta , false ) ;
bch2_disk_reservation_put ( c , & disk_res ) ;
}
return ret ? : ret2 ;
}
int bch2_fpunch ( struct bch_fs * c , subvol_inum inum , u64 start , u64 end ,
s64 * i_sectors_delta )
{
2023-09-12 17:16:02 -04:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2023-09-10 18:05:17 -04:00
struct btree_iter iter ;
int ret ;
2023-09-12 17:16:02 -04:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_extents ,
2023-09-10 18:05:17 -04:00
POS ( inum . inum , start ) ,
BTREE_ITER_INTENT ) ;
2023-09-12 17:16:02 -04:00
ret = bch2_fpunch_at ( trans , & iter , inum , end , i_sectors_delta ) ;
2023-09-10 18:05:17 -04:00
2023-09-12 17:16:02 -04:00
bch2_trans_iter_exit ( trans , & iter ) ;
bch2_trans_put ( trans ) ;
2023-09-10 18:05:17 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
ret = 0 ;
return ret ;
}
2023-09-04 05:38:30 -04:00
2023-09-10 16:42:30 -04:00
/* truncate: */
void bch2_logged_op_truncate_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate ( k ) ;
prt_printf ( out , " subvol=%u " , le32_to_cpu ( op . v - > subvol ) ) ;
prt_printf ( out , " inum=%llu " , le64_to_cpu ( op . v - > inum ) ) ;
prt_printf ( out , " new_i_size=%llu " , le64_to_cpu ( op . v - > new_i_size ) ) ;
}
2023-09-04 05:38:30 -04:00
static int truncate_set_isize ( struct btree_trans * trans ,
subvol_inum inum ,
u64 new_i_size )
{
struct btree_iter iter = { NULL } ;
struct bch_inode_unpacked inode_u ;
int ret ;
ret = bch2_inode_peek ( trans , & iter , & inode_u , inum , BTREE_ITER_INTENT ) ? :
( inode_u . bi_size = new_i_size , 0 ) ? :
bch2_inode_write ( trans , & iter , & inode_u ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2023-09-10 16:42:30 -04:00
static int __bch2_resume_logged_op_truncate ( struct btree_trans * trans ,
struct bkey_i * op_k ,
u64 * i_sectors_delta )
2023-09-04 05:38:30 -04:00
{
2023-09-10 16:42:30 -04:00
struct bch_fs * c = trans - > c ;
2023-09-04 05:38:30 -04:00
struct btree_iter fpunch_iter ;
2023-09-10 16:42:30 -04:00
struct bkey_i_logged_op_truncate * op = bkey_i_to_logged_op_truncate ( op_k ) ;
subvol_inum inum = { le32_to_cpu ( op - > v . subvol ) , le64_to_cpu ( op - > v . inum ) } ;
u64 new_i_size = le64_to_cpu ( op - > v . new_i_size ) ;
2023-09-04 05:38:30 -04:00
int ret ;
2023-11-11 16:31:50 -05:00
ret = commit_do ( trans , NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-10 16:42:30 -04:00
truncate_set_isize ( trans , inum , new_i_size ) ) ;
2023-09-04 05:38:30 -04:00
if ( ret )
goto err ;
2023-09-10 16:42:30 -04:00
bch2_trans_iter_init ( trans , & fpunch_iter , BTREE_ID_extents ,
POS ( inum . inum , round_up ( new_i_size , block_bytes ( c ) ) > > 9 ) ,
BTREE_ITER_INTENT ) ;
ret = bch2_fpunch_at ( trans , & fpunch_iter , inum , U64_MAX , i_sectors_delta ) ;
bch2_trans_iter_exit ( trans , & fpunch_iter ) ;
2023-09-04 05:38:30 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
ret = 0 ;
err :
2023-09-10 16:42:30 -04:00
bch2_logged_op_finish ( trans , op_k ) ;
2023-09-04 05:38:30 -04:00
return ret ;
}
2023-09-10 16:42:30 -04:00
int bch2_resume_logged_op_truncate ( struct btree_trans * trans , struct bkey_i * op_k )
{
return __bch2_resume_logged_op_truncate ( trans , op_k , NULL ) ;
}
int bch2_truncate ( struct bch_fs * c , subvol_inum inum , u64 new_i_size , u64 * i_sectors_delta )
{
struct bkey_i_logged_op_truncate op ;
bkey_logged_op_truncate_init ( & op . k_i ) ;
op . v . subvol = cpu_to_le32 ( inum . subvol ) ;
op . v . inum = cpu_to_le64 ( inum . inum ) ;
op . v . new_i_size = cpu_to_le64 ( new_i_size ) ;
2023-09-29 01:15:33 -04:00
/*
* Logged ops aren ' t atomic w . r . t . snapshot creation : creating a
* snapshot while they ' re in progress , then crashing , will result in the
* resume only proceeding in one of the snapshots
*/
down_read ( & c - > snapshot_create_lock ) ;
int ret = bch2_trans_run ( c ,
2023-09-12 17:16:02 -04:00
bch2_logged_op_start ( trans , & op . k_i ) ? :
__bch2_resume_logged_op_truncate ( trans , & op . k_i , i_sectors_delta ) ) ;
2023-09-29 01:15:33 -04:00
up_read ( & c - > snapshot_create_lock ) ;
return ret ;
2023-09-10 16:42:30 -04:00
}
2023-09-10 19:11:47 -04:00
/* finsert/fcollapse: */
void bch2_logged_op_finsert_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert ( k ) ;
prt_printf ( out , " subvol=%u " , le32_to_cpu ( op . v - > subvol ) ) ;
prt_printf ( out , " inum=%llu " , le64_to_cpu ( op . v - > inum ) ) ;
prt_printf ( out , " dst_offset=%lli " , le64_to_cpu ( op . v - > dst_offset ) ) ;
prt_printf ( out , " src_offset=%llu " , le64_to_cpu ( op . v - > src_offset ) ) ;
}
2023-09-04 05:38:30 -04:00
static int adjust_i_size ( struct btree_trans * trans , subvol_inum inum , u64 offset , s64 len )
{
struct btree_iter iter ;
struct bch_inode_unpacked inode_u ;
int ret ;
offset < < = 9 ;
len < < = 9 ;
ret = bch2_inode_peek ( trans , & iter , & inode_u , inum , BTREE_ITER_INTENT ) ;
if ( ret )
return ret ;
if ( len > 0 ) {
if ( MAX_LFS_FILESIZE - inode_u . bi_size < len ) {
ret = - EFBIG ;
goto err ;
}
if ( offset > = inode_u . bi_size ) {
ret = - EINVAL ;
goto err ;
}
}
inode_u . bi_size + = len ;
inode_u . bi_mtime = inode_u . bi_ctime = bch2_current_time ( trans - > c ) ;
ret = bch2_inode_write ( trans , & iter , & inode_u ) ;
err :
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2023-09-10 19:11:47 -04:00
static int __bch2_resume_logged_op_finsert ( struct btree_trans * trans ,
struct bkey_i * op_k ,
u64 * i_sectors_delta )
2023-09-04 05:38:30 -04:00
{
2023-09-10 19:11:47 -04:00
struct bch_fs * c = trans - > c ;
struct btree_iter iter ;
struct bkey_i_logged_op_finsert * op = bkey_i_to_logged_op_finsert ( op_k ) ;
subvol_inum inum = { le32_to_cpu ( op - > v . subvol ) , le64_to_cpu ( op - > v . inum ) } ;
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 13:33:14 -04:00
struct bch_io_opts opts ;
2023-09-10 19:11:47 -04:00
u64 dst_offset = le64_to_cpu ( op - > v . dst_offset ) ;
u64 src_offset = le64_to_cpu ( op - > v . src_offset ) ;
s64 shift = dst_offset - src_offset ;
u64 len = abs ( shift ) ;
u64 pos = le64_to_cpu ( op - > v . pos ) ;
bool insert = shift > 0 ;
2023-09-04 05:38:30 -04:00
int ret = 0 ;
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 13:33:14 -04:00
ret = bch2_inum_opts_get ( trans , inum , & opts ) ;
if ( ret )
return ret ;
2023-09-10 19:11:47 -04:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_extents ,
POS ( inum . inum , 0 ) ,
2023-09-04 05:38:30 -04:00
BTREE_ITER_INTENT ) ;
2023-09-10 19:11:47 -04:00
switch ( op - > v . state ) {
case LOGGED_OP_FINSERT_start :
op - > v . state = LOGGED_OP_FINSERT_shift_extents ;
2023-09-04 05:38:30 -04:00
if ( insert ) {
2023-11-11 16:31:50 -05:00
ret = commit_do ( trans , NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-10 19:11:47 -04:00
adjust_i_size ( trans , inum , src_offset , len ) ? :
bch2_logged_op_update ( trans , & op - > k_i ) ) ;
2023-09-04 05:38:30 -04:00
if ( ret )
goto err ;
} else {
2023-09-10 19:11:47 -04:00
bch2_btree_iter_set_pos ( & iter , POS ( inum . inum , src_offset ) ) ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
ret = bch2_fpunch_at ( trans , & iter , inum , src_offset + len , i_sectors_delta ) ;
2023-09-04 05:38:30 -04:00
if ( ret & & ! bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
goto err ;
2023-11-11 16:31:50 -05:00
ret = commit_do ( trans , NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-10 19:11:47 -04:00
bch2_logged_op_update ( trans , & op - > k_i ) ) ;
2023-09-04 05:38:30 -04:00
}
2023-09-10 19:11:47 -04:00
fallthrough ;
case LOGGED_OP_FINSERT_shift_extents :
while ( 1 ) {
2023-09-04 05:38:30 -04:00
struct disk_reservation disk_res =
bch2_disk_reservation_init ( c , 0 ) ;
2023-09-10 19:11:47 -04:00
struct bkey_i delete , * copy ;
2023-09-04 05:38:30 -04:00
struct bkey_s_c k ;
2023-09-10 19:11:47 -04:00
struct bpos src_pos = POS ( inum . inum , src_offset ) ;
2023-09-04 05:38:30 -04:00
u32 snapshot ;
2023-09-10 19:11:47 -04:00
bch2_trans_begin ( trans ) ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
ret = bch2_subvolume_get_snapshot ( trans , inum . subvol , & snapshot ) ;
2023-09-04 05:38:30 -04:00
if ( ret )
2023-09-10 19:11:47 -04:00
goto btree_err ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
bch2_btree_iter_set_snapshot ( & iter , snapshot ) ;
bch2_btree_iter_set_pos ( & iter , SPOS ( inum . inum , pos , snapshot ) ) ;
2023-09-04 05:38:30 -04:00
k = insert
2023-09-10 19:11:47 -04:00
? bch2_btree_iter_peek_prev ( & iter )
: bch2_btree_iter_peek_upto ( & iter , POS ( inum . inum , U64_MAX ) ) ;
2023-09-04 05:38:30 -04:00
if ( ( ret = bkey_err ( k ) ) )
2023-09-10 19:11:47 -04:00
goto btree_err ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
if ( ! k . k | |
k . k - > p . inode ! = inum . inum | |
bkey_le ( k . k - > p , POS ( inum . inum , src_offset ) ) )
2023-09-04 05:38:30 -04:00
break ;
2023-09-10 19:11:47 -04:00
copy = bch2_bkey_make_mut_noupdate ( trans , k ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( copy ) ) )
goto btree_err ;
2023-09-04 05:38:30 -04:00
if ( insert & &
2023-09-10 19:11:47 -04:00
bkey_lt ( bkey_start_pos ( k . k ) , src_pos ) ) {
bch2_cut_front ( src_pos , copy ) ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
/* Splitting compressed extent? */
bch2_disk_reservation_add ( c , & disk_res ,
copy - > k . size *
bch2_bkey_nr_ptrs_allocated ( bkey_i_to_s_c ( copy ) ) ,
BCH_DISK_RESERVATION_NOFAIL ) ;
2023-09-04 05:38:30 -04:00
}
bkey_init ( & delete . k ) ;
2023-09-10 19:11:47 -04:00
delete . k . p = copy - > k . p ;
delete . k . p . snapshot = snapshot ;
delete . k . size = copy - > k . size ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
copy - > k . p . offset + = shift ;
copy - > k . p . snapshot = snapshot ;
2023-09-04 05:38:30 -04:00
2023-09-10 19:11:47 -04:00
op - > v . pos = cpu_to_le64 ( insert ? bkey_start_offset ( & delete . k ) : delete . k . p . offset ) ;
2023-09-04 05:38:30 -04:00
2024-01-16 16:20:21 -05:00
ret = bch2_bkey_set_needs_rebalance ( c , copy , & opts ) ? :
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 13:33:14 -04:00
bch2_btree_insert_trans ( trans , BTREE_ID_extents , & delete , 0 ) ? :
2023-09-10 19:11:47 -04:00
bch2_btree_insert_trans ( trans , BTREE_ID_extents , copy , 0 ) ? :
bch2_logged_op_update ( trans , & op - > k_i ) ? :
2023-11-11 16:31:50 -05:00
bch2_trans_commit ( trans , & disk_res , NULL , BCH_TRANS_COMMIT_no_enospc ) ;
2023-09-10 19:11:47 -04:00
btree_err :
2023-09-04 05:38:30 -04:00
bch2_disk_reservation_put ( c , & disk_res ) ;
2023-09-10 19:11:47 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
goto err ;
pos = le64_to_cpu ( op - > v . pos ) ;
2023-09-04 05:38:30 -04:00
}
2023-09-10 19:11:47 -04:00
op - > v . state = LOGGED_OP_FINSERT_finish ;
2023-09-04 05:38:30 -04:00
if ( ! insert ) {
2023-11-11 16:31:50 -05:00
ret = commit_do ( trans , NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-10 19:11:47 -04:00
adjust_i_size ( trans , inum , src_offset , shift ) ? :
bch2_logged_op_update ( trans , & op - > k_i ) ) ;
2023-09-04 05:38:30 -04:00
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
2023-11-11 16:31:50 -05:00
ret = commit_do ( trans , NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-10 19:11:47 -04:00
adjust_i_size ( trans , inum , 0 , 0 ) ? :
bch2_logged_op_update ( trans , & op - > k_i ) ) ;
}
2023-09-19 13:38:31 -07:00
break ;
2023-09-10 19:11:47 -04:00
case LOGGED_OP_FINSERT_finish :
2023-09-19 13:38:31 -07:00
break ;
2023-09-04 05:38:30 -04:00
}
err :
2023-09-10 19:11:47 -04:00
bch2_logged_op_finish ( trans , op_k ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
2023-09-04 05:38:30 -04:00
return ret ;
}
2023-09-10 19:11:47 -04:00
int bch2_resume_logged_op_finsert ( struct btree_trans * trans , struct bkey_i * op_k )
{
return __bch2_resume_logged_op_finsert ( trans , op_k , NULL ) ;
}
int bch2_fcollapse_finsert ( struct bch_fs * c , subvol_inum inum ,
u64 offset , u64 len , bool insert ,
s64 * i_sectors_delta )
{
struct bkey_i_logged_op_finsert op ;
s64 shift = insert ? len : - len ;
bkey_logged_op_finsert_init ( & op . k_i ) ;
op . v . subvol = cpu_to_le32 ( inum . subvol ) ;
op . v . inum = cpu_to_le64 ( inum . inum ) ;
op . v . dst_offset = cpu_to_le64 ( offset + shift ) ;
op . v . src_offset = cpu_to_le64 ( offset ) ;
op . v . pos = cpu_to_le64 ( insert ? U64_MAX : offset ) ;
2023-09-29 01:15:33 -04:00
/*
* Logged ops aren ' t atomic w . r . t . snapshot creation : creating a
* snapshot while they ' re in progress , then crashing , will result in the
* resume only proceeding in one of the snapshots
*/
down_read ( & c - > snapshot_create_lock ) ;
int ret = bch2_trans_run ( c ,
2023-09-12 17:16:02 -04:00
bch2_logged_op_start ( trans , & op . k_i ) ? :
__bch2_resume_logged_op_finsert ( trans , & op . k_i , i_sectors_delta ) ) ;
2023-09-29 01:15:33 -04:00
up_read ( & c - > snapshot_create_lock ) ;
return ret ;
2023-09-10 19:11:47 -04:00
}