2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright ( C ) 2010 Kent Overstreet < kent . overstreet @ gmail . com >
* Copyright ( C ) 2014 Datera Inc .
*/
# include "bcachefs.h"
2018-10-06 00:46:55 -04:00
# include "alloc_background.h"
2018-10-06 04:12:42 -04:00
# include "alloc_foreground.h"
2024-03-25 19:26:05 -04:00
# include "backpointers.h"
2017-03-16 22:18:50 -08:00
# include "bkey_methods.h"
2020-12-17 15:08:58 -05:00
# include "bkey_buf.h"
2023-08-05 16:08:44 -04:00
# include "btree_journal_iter.h"
2021-12-25 20:39:19 -05:00
# include "btree_key_cache.h"
2017-03-16 22:18:50 -08:00
# include "btree_locking.h"
2024-03-16 22:45:30 -04:00
# include "btree_node_scan.h"
2017-03-16 22:18:50 -08:00
# include "btree_update_interior.h"
# include "btree_io.h"
# include "btree_gc.h"
# include "buckets.h"
# include "clock.h"
# include "debug.h"
2018-11-01 15:13:19 -04:00
# include "ec.h"
2017-03-16 22:18:50 -08:00
# include "error.h"
# include "extents.h"
# include "journal.h"
# include "keylist.h"
# include "move.h"
2024-03-23 20:07:46 -04:00
# include "recovery_passes.h"
2021-05-23 02:31:33 -04:00
# include "reflink.h"
2017-03-16 22:18:50 -08:00
# include "replicas.h"
# include "super-io.h"
# include "trace.h"
# include <linux/slab.h>
# include <linux/bitops.h>
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/preempt.h>
# include <linux/rcupdate.h>
# include <linux/sched/task.h>
2021-06-07 13:28:50 -04:00
# define DROP_THIS_NODE 10
# define DROP_PREV_NODE 11
2024-03-16 22:45:30 -04:00
# define DID_FILL_FROM_SCAN 12
2021-06-07 13:28:50 -04:00
2023-12-27 23:19:09 -05:00
static struct bkey_s unsafe_bkey_s_c_to_s ( struct bkey_s_c k )
{
return ( struct bkey_s ) { { {
( struct bkey * ) k . k ,
( struct bch_val * ) k . v
} } } ;
}
2018-10-21 10:56:11 -04:00
static inline void __gc_pos_set ( struct bch_fs * c , struct gc_pos new_pos )
{
preempt_disable ( ) ;
write_seqcount_begin ( & c - > gc_pos_lock ) ;
c - > gc_pos = new_pos ;
write_seqcount_end ( & c - > gc_pos_lock ) ;
preempt_enable ( ) ;
}
static inline void gc_pos_set ( struct bch_fs * c , struct gc_pos new_pos )
{
2024-04-06 23:58:01 -04:00
BUG_ON ( gc_pos_cmp ( new_pos , c - > gc_pos ) < 0 ) ;
2018-10-21 10:56:11 -04:00
__gc_pos_set ( c , new_pos ) ;
}
2021-04-24 16:32:35 -04:00
static void btree_ptr_to_v2 ( struct btree * b , struct bkey_i_btree_ptr_v2 * dst )
{
switch ( b - > key . k . type ) {
case KEY_TYPE_btree_ptr : {
struct bkey_i_btree_ptr * src = bkey_i_to_btree_ptr ( & b - > key ) ;
dst - > k . p = src - > k . p ;
dst - > v . mem_ptr = 0 ;
dst - > v . seq = b - > data - > keys . seq ;
dst - > v . sectors_written = 0 ;
dst - > v . flags = 0 ;
dst - > v . min_key = b - > data - > min_key ;
set_bkey_val_bytes ( & dst - > k , sizeof ( dst - > v ) + bkey_val_bytes ( & src - > k ) ) ;
memcpy ( dst - > v . start , src - > v . start , bkey_val_bytes ( & src - > k ) ) ;
break ;
}
case KEY_TYPE_btree_ptr_v2 :
bkey_copy ( & dst - > k_i , & b - > key ) ;
break ;
default :
BUG ( ) ;
}
}
static int set_node_min ( struct bch_fs * c , struct btree * b , struct bpos new_min )
{
struct bkey_i_btree_ptr_v2 * new ;
int ret ;
2024-03-16 22:45:30 -04:00
if ( c - > opts . verbose ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
prt_str ( & buf , " -> " ) ;
bch2_bpos_to_text ( & buf , new_min ) ;
bch_info ( c , " %s(): %s " , __func__ , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
2022-10-22 15:59:53 -04:00
new = kmalloc_array ( BKEY_BTREE_PTR_U64s_MAX , sizeof ( u64 ) , GFP_KERNEL ) ;
2021-04-24 16:32:35 -04:00
if ( ! new )
2023-03-14 15:35:57 -04:00
return - BCH_ERR_ENOMEM_gc_repair_key ;
2021-04-24 16:32:35 -04:00
btree_ptr_to_v2 ( b , new ) ;
b - > data - > min_key = new_min ;
new - > v . min_key = new_min ;
SET_BTREE_PTR_RANGE_UPDATED ( & new - > v , true ) ;
2021-12-23 21:35:28 -05:00
ret = bch2_journal_key_insert_take ( c , b - > c . btree_id , b - > c . level + 1 , & new - > k_i ) ;
2021-04-24 16:32:35 -04:00
if ( ret ) {
kfree ( new ) ;
return ret ;
2020-03-29 16:48:53 -04:00
}
2017-03-16 22:18:50 -08:00
2021-04-24 16:32:35 -04:00
bch2_btree_node_drop_keys_outside_node ( b ) ;
2022-04-07 17:41:02 -04:00
bkey_copy ( & b - > key , & new - > k_i ) ;
2021-04-24 16:32:35 -04:00
return 0 ;
}
static int set_node_max ( struct bch_fs * c , struct btree * b , struct bpos new_max )
{
struct bkey_i_btree_ptr_v2 * new ;
int ret ;
2024-03-16 22:45:30 -04:00
if ( c - > opts . verbose ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
prt_str ( & buf , " -> " ) ;
bch2_bpos_to_text ( & buf , new_max ) ;
bch_info ( c , " %s(): %s " , __func__ , buf . buf ) ;
printbuf_exit ( & buf ) ;
}
2021-04-24 16:32:35 -04:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id , b - > c . level + 1 , b - > key . k . p ) ;
if ( ret )
return ret ;
2022-10-22 15:59:53 -04:00
new = kmalloc_array ( BKEY_BTREE_PTR_U64s_MAX , sizeof ( u64 ) , GFP_KERNEL ) ;
2021-04-24 16:32:35 -04:00
if ( ! new )
2023-03-14 15:35:57 -04:00
return - BCH_ERR_ENOMEM_gc_repair_key ;
2021-04-24 16:32:35 -04:00
btree_ptr_to_v2 ( b , new ) ;
b - > data - > max_key = new_max ;
new - > k . p = new_max ;
SET_BTREE_PTR_RANGE_UPDATED ( & new - > v , true ) ;
2021-12-23 21:35:28 -05:00
ret = bch2_journal_key_insert_take ( c , b - > c . btree_id , b - > c . level + 1 , & new - > k_i ) ;
2021-04-24 16:32:35 -04:00
if ( ret ) {
kfree ( new ) ;
return ret ;
}
bch2_btree_node_drop_keys_outside_node ( b ) ;
mutex_lock ( & c - > btree_cache . lock ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
bkey_copy ( & b - > key , & new - > k_i ) ;
ret = __bch2_btree_node_hash_insert ( & c - > btree_cache , b ) ;
BUG_ON ( ret ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
return 0 ;
}
2024-03-16 22:45:30 -04:00
static int btree_check_node_boundaries ( struct bch_fs * c , struct btree * b ,
struct btree * prev , struct btree * cur ,
struct bpos * pulled_from_scan )
2021-04-24 16:32:35 -04:00
{
struct bpos expected_start = ! prev
? b - > data - > min_key
: bpos_successor ( prev - > key . k . p ) ;
2024-03-16 22:45:30 -04:00
struct printbuf buf = PRINTBUF ;
2021-04-24 16:32:35 -04:00
int ret = 0 ;
2024-03-16 22:45:30 -04:00
BUG_ON ( b - > key . k . type = = KEY_TYPE_btree_ptr_v2 & &
! bpos_eq ( bkey_i_to_btree_ptr_v2 ( & b - > key ) - > v . min_key ,
b - > data - > min_key ) ) ;
if ( bpos_eq ( expected_start , cur - > data - > min_key ) )
return 0 ;
prt_printf ( & buf , " at btree %s level %u: \n parent: " ,
bch2_btree_id_str ( b - > c . btree_id ) , b - > c . level ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
if ( prev ) {
prt_printf ( & buf , " \n prev: " ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & prev - > key ) ) ;
2021-04-24 16:32:35 -04:00
}
2024-03-16 22:45:30 -04:00
prt_str ( & buf , " \n next: " ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & cur - > key ) ) ;
2021-06-07 13:28:50 -04:00
2024-03-16 22:45:30 -04:00
if ( bpos_lt ( expected_start , cur - > data - > min_key ) ) { /* gap */
if ( b - > c . level = = 1 & &
bpos_lt ( * pulled_from_scan , cur - > data - > min_key ) ) {
ret = bch2_get_scanned_nodes ( c , b - > c . btree_id , 0 ,
expected_start ,
bpos_predecessor ( cur - > data - > min_key ) ) ;
if ( ret )
goto err ;
* pulled_from_scan = cur - > data - > min_key ;
ret = DID_FILL_FROM_SCAN ;
} else {
if ( mustfix_fsck_err ( c , btree_node_topology_bad_min_key ,
" btree node with incorrect min_key%s " , buf . buf ) )
ret = set_node_min ( c , cur , expected_start ) ;
}
} else { /* overlap */
if ( prev & & BTREE_NODE_SEQ ( cur - > data ) > BTREE_NODE_SEQ ( prev - > data ) ) { /* cur overwrites prev */
if ( bpos_ge ( prev - > data - > min_key , cur - > data - > min_key ) ) { /* fully? */
if ( mustfix_fsck_err ( c , btree_node_topology_overwritten_by_next_node ,
" btree node overwritten by next node%s " , buf . buf ) )
ret = DROP_PREV_NODE ;
} else {
if ( mustfix_fsck_err ( c , btree_node_topology_bad_max_key ,
" btree node with incorrect max_key%s " , buf . buf ) )
ret = set_node_max ( c , prev ,
bpos_predecessor ( cur - > data - > min_key ) ) ;
}
} else {
if ( bpos_ge ( expected_start , cur - > data - > max_key ) ) { /* fully? */
if ( mustfix_fsck_err ( c , btree_node_topology_overwritten_by_prev_node ,
" btree node overwritten by prev node%s " , buf . buf ) )
ret = DROP_THIS_NODE ;
} else {
if ( mustfix_fsck_err ( c , btree_node_topology_bad_min_key ,
" btree node with incorrect min_key%s " , buf . buf ) )
ret = set_node_min ( c , cur , expected_start ) ;
}
2022-02-25 13:18:19 -05:00
}
2021-04-24 16:32:35 -04:00
}
2024-03-16 22:45:30 -04:00
err :
2021-04-24 16:32:35 -04:00
fsck_err :
2024-03-16 22:45:30 -04:00
printbuf_exit ( & buf ) ;
2021-04-24 16:32:35 -04:00
return ret ;
}
static int btree_repair_node_end ( struct bch_fs * c , struct btree * b ,
2024-03-16 22:45:30 -04:00
struct btree * child , struct bpos * pulled_from_scan )
2021-04-24 16:32:35 -04:00
{
2024-03-16 22:45:30 -04:00
struct printbuf buf = PRINTBUF ;
2021-04-24 16:32:35 -04:00
int ret = 0 ;
2024-03-16 22:45:30 -04:00
if ( bpos_eq ( child - > key . k . p , b - > key . k . p ) )
return 0 ;
prt_printf ( & buf , " at btree %s level %u: \n parent: " ,
bch2_btree_id_str ( b - > c . btree_id ) , b - > c . level ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
prt_str ( & buf , " \n child: " ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & child - > key ) ) ;
if ( mustfix_fsck_err ( c , btree_node_topology_bad_max_key ,
" btree node with incorrect max_key%s " , buf . buf ) ) {
if ( b - > c . level = = 1 & &
bpos_lt ( * pulled_from_scan , b - > key . k . p ) ) {
ret = bch2_get_scanned_nodes ( c , b - > c . btree_id , 0 ,
bpos_successor ( child - > key . k . p ) , b - > key . k . p ) ;
if ( ret )
goto err ;
* pulled_from_scan = b - > key . k . p ;
ret = DID_FILL_FROM_SCAN ;
} else {
ret = set_node_max ( c , child , b - > key . k . p ) ;
}
2021-04-24 16:32:35 -04:00
}
2022-02-25 13:18:19 -05:00
err :
2021-04-24 16:32:35 -04:00
fsck_err :
2024-03-16 22:45:30 -04:00
printbuf_exit ( & buf ) ;
2021-04-24 16:32:35 -04:00
return ret ;
}
2021-01-26 16:04:38 -05:00
2024-03-16 22:45:30 -04:00
static int bch2_btree_repair_topology_recurse ( struct btree_trans * trans , struct btree * b ,
struct bpos * pulled_from_scan )
2021-04-24 16:32:35 -04:00
{
2022-08-21 14:29:43 -04:00
struct bch_fs * c = trans - > c ;
2021-04-24 16:32:35 -04:00
struct btree_and_journal_iter iter ;
struct bkey_s_c k ;
2021-06-07 13:28:50 -04:00
struct bkey_buf prev_k , cur_k ;
2021-04-24 16:32:35 -04:00
struct btree * prev = NULL , * cur = NULL ;
2024-03-16 22:45:30 -04:00
bool have_child , new_pass = false ;
2022-04-07 17:41:02 -04:00
struct printbuf buf = PRINTBUF ;
2021-04-24 16:32:35 -04:00
int ret = 0 ;
if ( ! b - > c . level )
return 0 ;
2024-03-16 22:45:30 -04:00
2021-06-07 13:28:50 -04:00
bch2_bkey_buf_init ( & prev_k ) ;
bch2_bkey_buf_init ( & cur_k ) ;
2024-03-16 22:45:30 -04:00
again :
cur = prev = NULL ;
have_child = new_pass = false ;
2024-01-22 14:37:42 -05:00
bch2_btree_and_journal_iter_init_node_iter ( trans , & iter , b ) ;
2024-01-22 14:25:00 -05:00
iter . prefetch = true ;
2021-04-24 16:32:35 -04:00
while ( ( k = bch2_btree_and_journal_iter_peek ( & iter ) ) . k ) {
2022-11-24 03:12:22 -05:00
BUG_ON ( bpos_lt ( k . k - > p , b - > data - > min_key ) ) ;
BUG_ON ( bpos_gt ( k . k - > p , b - > data - > max_key ) ) ;
2021-01-26 20:59:00 -05:00
2021-04-24 16:32:35 -04:00
bch2_btree_and_journal_iter_advance ( & iter ) ;
2021-06-07 13:28:50 -04:00
bch2_bkey_buf_reassemble ( & cur_k , c , k ) ;
2021-04-24 16:32:35 -04:00
2022-08-21 14:29:43 -04:00
cur = bch2_btree_node_get_noiter ( trans , cur_k . k ,
2021-04-24 16:32:35 -04:00
b - > c . btree_id , b - > c . level - 1 ,
false ) ;
ret = PTR_ERR_OR_ZERO ( cur ) ;
2022-02-25 13:18:19 -05:00
printbuf_reset ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( cur_k . k ) ) ;
2024-02-06 17:24:18 -05:00
if ( mustfix_fsck_err_on ( bch2_err_matches ( ret , EIO ) , c ,
2023-10-24 20:44:36 -04:00
btree_node_unreadable ,
2022-04-07 17:41:02 -04:00
" Topology repair: unreadable btree node at btree %s level %u: \n "
2021-04-24 16:32:35 -04:00
" %s " ,
2023-10-19 22:49:08 -04:00
bch2_btree_id_str ( b - > c . btree_id ) ,
2021-04-24 16:32:35 -04:00
b - > c . level - 1 ,
2022-02-25 13:18:19 -05:00
buf . buf ) ) {
2022-08-21 14:29:43 -04:00
bch2_btree_node_evict ( trans , cur_k . k ) ;
2022-06-22 21:33:43 -04:00
cur = NULL ;
2024-04-09 00:49:39 -04:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
b - > c . level , cur_k . k - > k . p ) ;
2021-01-26 20:59:00 -05:00
if ( ret )
2021-06-07 13:28:50 -04:00
break ;
2024-04-09 00:49:39 -04:00
if ( ! btree_id_is_alloc ( b - > c . btree_id ) ) {
ret = bch2_run_explicit_recovery_pass ( c , BCH_RECOVERY_PASS_scan_for_btree_nodes ) ;
if ( ret )
break ;
}
2021-04-24 16:32:35 -04:00
continue ;
2021-01-26 20:59:00 -05:00
}
2023-12-16 22:43:41 -05:00
bch_err_msg ( c , ret , " getting btree node " ) ;
if ( ret )
2021-04-24 16:32:35 -04:00
break ;
2021-01-26 20:59:00 -05:00
2024-03-16 22:45:30 -04:00
if ( bch2_btree_node_is_stale ( c , cur ) ) {
bch_info ( c , " btree node %s older than nodes found by scanning " , buf . buf ) ;
six_unlock_read ( & cur - > c . lock ) ;
bch2_btree_node_evict ( trans , cur_k . k ) ;
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
b - > c . level , cur_k . k - > k . p ) ;
cur = NULL ;
if ( ret )
break ;
continue ;
}
ret = btree_check_node_boundaries ( c , b , prev , cur , pulled_from_scan ) ;
if ( ret = = DID_FILL_FROM_SCAN ) {
new_pass = true ;
ret = 0 ;
}
2021-06-07 13:28:50 -04:00
if ( ret = = DROP_THIS_NODE ) {
six_unlock_read ( & cur - > c . lock ) ;
2022-08-21 14:29:43 -04:00
bch2_btree_node_evict ( trans , cur_k . k ) ;
2021-06-07 13:28:50 -04:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
b - > c . level , cur_k . k - > k . p ) ;
2022-06-22 21:33:43 -04:00
cur = NULL ;
2021-06-07 13:28:50 -04:00
if ( ret )
break ;
continue ;
}
2021-04-24 16:32:35 -04:00
if ( prev )
six_unlock_read ( & prev - > c . lock ) ;
2021-06-07 13:28:50 -04:00
prev = NULL ;
2021-04-24 16:32:35 -04:00
2021-06-07 13:28:50 -04:00
if ( ret = = DROP_PREV_NODE ) {
2024-03-23 19:29:19 -04:00
bch_info ( c , " dropped prev node " ) ;
2022-08-21 14:29:43 -04:00
bch2_btree_node_evict ( trans , prev_k . k ) ;
2021-06-07 13:28:50 -04:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
b - > c . level , prev_k . k - > k . p ) ;
if ( ret )
break ;
bch2_btree_and_journal_iter_exit ( & iter ) ;
goto again ;
} else if ( ret )
2021-04-24 16:32:35 -04:00
break ;
2021-06-07 13:28:50 -04:00
prev = cur ;
cur = NULL ;
bch2_bkey_buf_copy ( & prev_k , c , cur_k . k ) ;
2021-04-24 16:32:35 -04:00
}
if ( ! ret & & ! IS_ERR_OR_NULL ( prev ) ) {
BUG_ON ( cur ) ;
2024-03-16 22:45:30 -04:00
ret = btree_repair_node_end ( c , b , prev , pulled_from_scan ) ;
if ( ret = = DID_FILL_FROM_SCAN ) {
new_pass = true ;
ret = 0 ;
}
2021-04-24 16:32:35 -04:00
}
if ( ! IS_ERR_OR_NULL ( prev ) )
six_unlock_read ( & prev - > c . lock ) ;
prev = NULL ;
if ( ! IS_ERR_OR_NULL ( cur ) )
six_unlock_read ( & cur - > c . lock ) ;
cur = NULL ;
2021-01-26 20:59:00 -05:00
2021-04-24 16:32:35 -04:00
if ( ret )
goto err ;
bch2_btree_and_journal_iter_exit ( & iter ) ;
2024-03-16 22:45:30 -04:00
if ( new_pass )
goto again ;
2024-01-22 14:37:42 -05:00
bch2_btree_and_journal_iter_init_node_iter ( trans , & iter , b ) ;
2024-01-22 14:25:00 -05:00
iter . prefetch = true ;
2021-04-24 16:32:35 -04:00
while ( ( k = bch2_btree_and_journal_iter_peek ( & iter ) ) . k ) {
2021-06-07 13:28:50 -04:00
bch2_bkey_buf_reassemble ( & cur_k , c , k ) ;
2021-04-24 16:32:35 -04:00
bch2_btree_and_journal_iter_advance ( & iter ) ;
2021-01-26 20:59:00 -05:00
2022-08-21 14:29:43 -04:00
cur = bch2_btree_node_get_noiter ( trans , cur_k . k ,
2021-04-24 16:32:35 -04:00
b - > c . btree_id , b - > c . level - 1 ,
false ) ;
ret = PTR_ERR_OR_ZERO ( cur ) ;
2021-01-26 20:59:00 -05:00
2023-12-16 22:43:41 -05:00
bch_err_msg ( c , ret , " getting btree node " ) ;
if ( ret )
2021-04-24 16:32:35 -04:00
goto err ;
2021-01-26 20:59:00 -05:00
2024-03-16 22:45:30 -04:00
ret = bch2_btree_repair_topology_recurse ( trans , cur , pulled_from_scan ) ;
2021-04-24 16:32:35 -04:00
six_unlock_read ( & cur - > c . lock ) ;
cur = NULL ;
if ( ret = = DROP_THIS_NODE ) {
2022-08-21 14:29:43 -04:00
bch2_btree_node_evict ( trans , cur_k . k ) ;
2021-04-24 16:32:35 -04:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
2021-06-07 13:28:50 -04:00
b - > c . level , cur_k . k - > k . p ) ;
2024-03-16 22:45:30 -04:00
new_pass = true ;
2021-01-26 20:59:00 -05:00
}
2021-04-24 16:32:35 -04:00
if ( ret )
goto err ;
have_child = true ;
2021-01-26 20:59:00 -05:00
}
2021-04-24 16:32:35 -04:00
2022-02-25 13:18:19 -05:00
printbuf_reset ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
2021-04-24 16:32:35 -04:00
if ( mustfix_fsck_err_on ( ! have_child , c ,
2023-10-24 20:44:36 -04:00
btree_node_topology_interior_node_empty ,
2021-04-24 16:32:35 -04:00
" empty interior btree node at btree %s level %u \n "
" %s " ,
2023-10-19 22:49:08 -04:00
bch2_btree_id_str ( b - > c . btree_id ) ,
2022-02-25 13:18:19 -05:00
b - > c . level , buf . buf ) )
2021-04-24 16:32:35 -04:00
ret = DROP_THIS_NODE ;
err :
2020-03-29 16:48:53 -04:00
fsck_err :
2021-04-24 16:32:35 -04:00
if ( ! IS_ERR_OR_NULL ( prev ) )
six_unlock_read ( & prev - > c . lock ) ;
if ( ! IS_ERR_OR_NULL ( cur ) )
six_unlock_read ( & cur - > c . lock ) ;
bch2_btree_and_journal_iter_exit ( & iter ) ;
2024-03-16 22:45:30 -04:00
if ( ! ret & & new_pass )
2021-04-24 16:32:35 -04:00
goto again ;
2024-03-16 22:45:30 -04:00
BUG_ON ( ! ret & & bch2_btree_node_check_topology ( trans , b ) ) ;
bch2_bkey_buf_exit ( & prev_k , c ) ;
bch2_bkey_buf_exit ( & cur_k , c ) ;
2022-02-25 13:18:19 -05:00
printbuf_exit ( & buf ) ;
2021-04-24 16:32:35 -04:00
return ret ;
}
2023-07-16 23:19:49 -04:00
int bch2_check_topology ( struct bch_fs * c )
2021-04-24 16:32:35 -04:00
{
2023-09-12 17:16:02 -04:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2024-03-16 22:45:30 -04:00
struct bpos pulled_from_scan = POS_MIN ;
2021-04-24 16:32:35 -04:00
int ret = 0 ;
2024-03-16 22:45:30 -04:00
for ( unsigned i = 0 ; i < btree_id_nr_alive ( c ) & & ! ret ; i + + ) {
2023-06-28 22:09:13 -04:00
struct btree_root * r = bch2_btree_id_root ( c , i ) ;
2024-03-16 22:45:30 -04:00
bool reconstructed_root = false ;
2023-06-28 22:09:13 -04:00
2024-03-16 22:45:30 -04:00
if ( r - > error ) {
ret = bch2_run_explicit_recovery_pass ( c , BCH_RECOVERY_PASS_scan_for_btree_nodes ) ;
if ( ret )
break ;
reconstruct_root :
bch_info ( c , " btree root %s unreadable, must recover from scan " , bch2_btree_id_str ( i ) ) ;
r - > alive = false ;
r - > error = 0 ;
if ( ! bch2_btree_has_scanned_nodes ( c , i ) ) {
mustfix_fsck_err ( c , btree_root_unreadable_and_scan_found_nothing ,
" no nodes found for btree %s, continue? " , bch2_btree_id_str ( i ) ) ;
2024-04-14 18:42:42 -04:00
bch2_btree_root_alloc_fake_trans ( trans , i , 0 ) ;
2024-03-16 22:45:30 -04:00
} else {
2024-04-14 18:42:42 -04:00
bch2_btree_root_alloc_fake_trans ( trans , i , 1 ) ;
2024-04-09 00:02:47 -04:00
bch2_shoot_down_journal_keys ( c , i , 1 , BTREE_MAX_DEPTH , POS_MIN , SPOS_MAX ) ;
2024-03-16 22:45:30 -04:00
ret = bch2_get_scanned_nodes ( c , i , 0 , POS_MIN , SPOS_MAX ) ;
if ( ret )
break ;
}
reconstructed_root = true ;
}
2023-06-28 22:09:13 -04:00
2024-03-16 22:45:30 -04:00
struct btree * b = r - > b ;
2021-04-24 16:32:35 -04:00
2023-09-12 17:16:02 -04:00
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_read ) ;
2024-03-16 22:45:30 -04:00
ret = bch2_btree_repair_topology_recurse ( trans , b , & pulled_from_scan ) ;
2021-04-24 16:32:35 -04:00
six_unlock_read ( & b - > c . lock ) ;
if ( ret = = DROP_THIS_NODE ) {
2024-03-16 22:45:30 -04:00
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
mutex_lock ( & c - > btree_cache . lock ) ;
list_move ( & b - > list , & c - > btree_cache . freeable ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
r - > b = NULL ;
if ( ! reconstructed_root )
goto reconstruct_root ;
bch_err ( c , " empty btree root %s " , bch2_btree_id_str ( i ) ) ;
2024-04-14 18:42:42 -04:00
bch2_btree_root_alloc_fake_trans ( trans , i , 0 ) ;
2024-03-16 22:45:30 -04:00
r - > alive = false ;
ret = 0 ;
2021-04-24 16:32:35 -04:00
}
}
2024-03-16 22:45:30 -04:00
fsck_err :
2023-09-12 17:16:02 -04:00
bch2_trans_put ( trans ) ;
2020-03-29 16:48:53 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2018-10-21 10:56:11 -04:00
/* marking of btree keys/nodes: */
2021-10-29 18:43:18 -04:00
static int bch2_gc_mark_key ( struct btree_trans * trans , enum btree_id btree_id ,
2024-04-06 23:58:01 -04:00
unsigned level , struct btree * * prev ,
struct btree_iter * iter , struct bkey_s_c k ,
2022-02-13 01:58:12 -05:00
bool initial )
2018-10-21 10:56:11 -04:00
{
2021-10-29 18:43:18 -04:00
struct bch_fs * c = trans - > c ;
2024-04-06 23:58:01 -04:00
if ( iter ) {
struct btree_path * path = btree_iter_path ( trans , iter ) ;
struct btree * b = path_l ( path ) - > b ;
if ( * prev ! = b ) {
int ret = bch2_btree_node_check_topology ( trans , b ) ;
if ( ret )
return ret ;
}
* prev = b ;
}
2021-11-28 14:31:19 -05:00
struct bkey deleted = KEY ( 0 , 0 , 0 ) ;
struct bkey_s_c old = ( struct bkey_s_c ) { & deleted , NULL } ;
2024-04-12 18:45:47 -04:00
struct printbuf buf = PRINTBUF ;
2018-10-21 10:56:11 -04:00
int ret = 0 ;
2024-04-07 19:07:09 -04:00
deleted . p = k . k - > p ;
2021-11-28 14:31:19 -05:00
2018-11-12 17:26:36 -05:00
if ( initial ) {
2020-11-02 18:20:44 -05:00
BUG_ON ( bch2_journal_seq_verify & &
2024-04-07 19:07:09 -04:00
k . k - > version . lo > atomic64_read ( & c - > journal . seq ) ) ;
2018-11-12 17:26:36 -05:00
2024-04-07 19:07:09 -04:00
if ( fsck_err_on ( k . k - > version . lo > atomic64_read ( & c - > key_version ) , c ,
2023-10-24 20:44:36 -04:00
bkey_version_in_future ,
2020-02-18 20:02:41 -05:00
" key version number higher than recorded: %llu > %llu " ,
2024-04-07 19:07:09 -04:00
k . k - > version . lo ,
2020-02-18 20:02:41 -05:00
atomic64_read ( & c - > key_version ) ) )
2024-04-07 19:07:09 -04:00
atomic64_set ( & c - > key_version , k . k - > version . lo ) ;
2018-10-21 10:56:11 -04:00
}
2024-04-07 19:07:09 -04:00
if ( mustfix_fsck_err_on ( level & & ! bch2_dev_btree_bitmap_marked ( c , k ) ,
2024-04-12 18:45:47 -04:00
c , btree_bitmap_not_marked ,
" btree ptr not marked in member info btree allocated bitmap \n %s " ,
2024-04-07 19:07:09 -04:00
( bch2_bkey_val_to_text ( & buf , c , k ) ,
2024-04-12 18:45:47 -04:00
buf . buf ) ) ) {
mutex_lock ( & c - > sb_lock ) ;
2024-04-07 19:07:09 -04:00
bch2_dev_btree_bitmap_mark ( c , k ) ;
2024-04-12 18:45:47 -04:00
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
}
2024-04-07 19:07:09 -04:00
/*
* We require a commit before key_trigger ( ) because
* key_trigger ( BTREE_TRIGGER_GC ) is not idempotant ; we ' ll calculate the
* wrong result if we run it multiple times .
*/
2024-04-06 23:58:01 -04:00
unsigned flags = ! iter ? BTREE_TRIGGER_is_root : 0 ;
2024-04-07 19:07:09 -04:00
ret = bch2_key_trigger ( trans , btree_id , level , old , unsafe_bkey_s_c_to_s ( k ) ,
BTREE_TRIGGER_check_repair | flags ) ;
if ( ret )
goto out ;
if ( trans - > nr_updates ) {
ret = bch2_trans_commit ( trans , NULL , NULL , 0 ) ? :
- BCH_ERR_transaction_restart_nested ;
goto out ;
}
ret = bch2_key_trigger ( trans , btree_id , level , old , unsafe_bkey_s_c_to_s ( k ) ,
BTREE_TRIGGER_gc | flags ) ;
out :
2018-11-12 17:26:36 -05:00
fsck_err :
2024-04-12 18:45:47 -04:00
printbuf_exit ( & buf ) ;
2023-12-16 22:43:41 -05:00
bch_err_fn ( c , ret ) ;
2018-10-21 10:56:11 -04:00
return ret ;
}
2024-04-06 23:58:01 -04:00
static int bch2_gc_btree ( struct btree_trans * trans , enum btree_id btree , bool initial )
2017-03-16 22:18:50 -08:00
{
2021-10-29 18:43:18 -04:00
struct bch_fs * c = trans - > c ;
2024-04-06 23:58:01 -04:00
int level = 0 , target_depth = btree_node_type_needs_gc ( __btree_node_type ( 0 , btree ) ) ? 0 : 1 ;
2017-03-16 22:18:50 -08:00
int ret = 0 ;
2024-04-06 23:58:01 -04:00
/* We need to make sure every leaf node is readable before going RW */
if ( initial )
target_depth = 0 ;
2017-03-16 22:18:50 -08:00
2024-04-06 23:58:01 -04:00
/* root */
2017-03-16 22:18:50 -08:00
mutex_lock ( & c - > btree_root_lock ) ;
2024-04-06 23:58:01 -04:00
struct btree * b = bch2_btree_id_root ( c , btree ) - > b ;
if ( ! btree_node_fake ( b ) ) {
gc_pos_set ( c , gc_pos_btree ( btree , b - > c . level + 1 , SPOS_MAX ) ) ;
2024-04-07 19:07:09 -04:00
ret = lockrestart_do ( trans ,
bch2_gc_mark_key ( trans , b - > c . btree_id , b - > c . level + 1 ,
2024-04-06 23:58:01 -04:00
NULL , NULL , bkey_i_to_s_c ( & b - > key ) , initial ) ) ;
level = b - > c . level ;
}
2017-03-16 22:18:50 -08:00
mutex_unlock ( & c - > btree_root_lock ) ;
2019-03-21 19:12:52 -04:00
2024-03-23 19:29:19 -04:00
if ( ret )
return ret ;
2024-04-06 23:58:01 -04:00
for ( ; level > = target_depth ; - - level ) {
struct btree * prev = NULL ;
struct btree_iter iter ;
bch2_trans_node_iter_init ( trans , & iter , btree , POS_MIN , 0 , level ,
BTREE_ITER_prefetch ) ;
2021-01-26 20:59:00 -05:00
2024-04-06 23:58:01 -04:00
ret = for_each_btree_key_continue ( trans , iter , 0 , k , ( {
gc_pos_set ( c , gc_pos_btree ( btree , level , k . k - > p ) ) ;
bch2_gc_mark_key ( trans , btree , level , & prev , & iter , k , initial ) ;
} ) ) ;
if ( ret )
break ;
2020-03-15 23:29:43 -04:00
}
2024-04-06 23:39:12 -04:00
2020-03-15 23:29:43 -04:00
return ret ;
}
2018-11-01 15:13:19 -04:00
static inline int btree_id_gc_phase_cmp ( enum btree_id l , enum btree_id r )
{
return ( int ) btree_id_to_gc_phase ( l ) -
( int ) btree_id_to_gc_phase ( r ) ;
}
2024-04-06 23:58:01 -04:00
static int bch2_gc_btrees ( struct bch_fs * c )
2018-10-21 10:56:11 -04:00
{
2023-09-12 17:16:02 -04:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2018-11-01 15:13:19 -04:00
enum btree_id ids [ BTREE_ID_NR ] ;
2018-10-21 10:56:11 -04:00
unsigned i ;
2021-04-24 16:32:35 -04:00
int ret = 0 ;
2018-10-21 10:56:11 -04:00
2018-11-01 15:13:19 -04:00
for ( i = 0 ; i < BTREE_ID_NR ; i + + )
ids [ i ] = i ;
bubble_sort ( ids , BTREE_ID_NR , btree_id_gc_phase_cmp ) ;
2024-04-06 22:40:12 -04:00
for ( i = 0 ; i < btree_id_nr_alive ( c ) & & ! ret ; i + + ) {
unsigned btree = i < BTREE_ID_NR ? ids [ i ] : i ;
2018-10-21 10:56:11 -04:00
2024-04-06 22:40:12 -04:00
if ( IS_ERR_OR_NULL ( bch2_btree_id_root ( c , btree ) - > b ) )
2023-06-28 22:09:13 -04:00
continue ;
2024-04-06 23:58:01 -04:00
ret = bch2_gc_btree ( trans , btree , true ) ;
2023-06-28 22:09:13 -04:00
2024-04-06 23:20:49 -04:00
if ( mustfix_fsck_err_on ( bch2_err_matches ( ret , EIO ) ,
c , btree_node_read_error ,
" btree node read error for %s " ,
bch2_btree_id_str ( btree ) ) )
ret = bch2_run_explicit_recovery_pass ( c , BCH_RECOVERY_PASS_check_topology ) ;
}
fsck_err :
2023-09-12 17:16:02 -04:00
bch2_trans_put ( trans ) ;
2023-12-16 22:43:41 -05:00
bch_err_fn ( c , ret ) ;
2021-04-24 16:32:35 -04:00
return ret ;
2018-10-21 10:56:11 -04:00
}
2024-04-07 02:11:03 -04:00
static int bch2_mark_superblocks ( struct bch_fs * c )
2017-03-16 22:18:50 -08:00
{
mutex_lock ( & c - > sb_lock ) ;
gc_pos_set ( c , gc_phase ( GC_PHASE_SB ) ) ;
2024-04-07 18:05:34 -04:00
int ret = bch2_trans_mark_dev_sbs_flags ( c , BTREE_TRIGGER_gc ) ;
2017-03-16 22:18:50 -08:00
mutex_unlock ( & c - > sb_lock ) ;
2024-04-07 02:11:03 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2018-07-23 05:32:01 -04:00
static void bch2_gc_free ( struct bch_fs * c )
{
2021-12-04 23:07:33 -05:00
genradix_free ( & c - > reflink_gc_table ) ;
genradix_free ( & c - > gc_stripes ) ;
2018-11-24 17:09:44 -05:00
2023-12-16 23:47:29 -05:00
for_each_member_device ( c , ca ) {
2024-02-01 06:35:46 -05:00
kvfree ( rcu_dereference_protected ( ca - > buckets_gc , 1 ) ) ;
2022-02-10 19:26:55 -05:00
ca - > buckets_gc = NULL ;
2018-07-23 05:32:01 -04:00
2021-01-21 21:52:06 -05:00
free_percpu ( ca - > usage_gc ) ;
ca - > usage_gc = NULL ;
2018-07-23 05:32:01 -04:00
}
2019-02-10 19:34:47 -05:00
free_percpu ( c - > usage_gc ) ;
c - > usage_gc = NULL ;
2018-12-01 11:32:12 -05:00
}
2024-04-06 22:40:12 -04:00
static int bch2_gc_done ( struct bch_fs * c )
2018-07-23 05:32:01 -04:00
{
2021-05-07 20:43:43 -04:00
struct bch_dev * ca = NULL ;
2022-02-25 13:18:19 -05:00
struct printbuf buf = PRINTBUF ;
2023-12-16 23:47:29 -05:00
unsigned i ;
2019-03-28 03:13:27 -04:00
int ret = 0 ;
2018-07-23 05:32:01 -04:00
2021-12-25 20:39:19 -05:00
percpu_down_write ( & c - > mark_lock ) ;
2024-04-06 22:40:12 -04:00
# define copy_field(_err, _f, _msg, ...) \
if ( fsck_err_on ( dst - > _f ! = src - > _f , c , _err , \
_msg " : got %llu, should be %llu " , # # __VA_ARGS__ , \
dst - > _f , src - > _f ) ) \
2022-07-11 21:06:52 -04:00
dst - > _f = src - > _f
2024-04-06 22:40:12 -04:00
# define copy_dev_field(_err, _f, _msg, ...) \
2023-12-16 23:47:29 -05:00
copy_field ( _err , _f , " dev %u has wrong " _msg , ca - > dev_idx , # # __VA_ARGS__ )
2024-04-06 22:40:12 -04:00
# define copy_fs_field(_err, _f, _msg, ...) \
2023-10-24 20:44:36 -04:00
copy_field ( _err , _f , " fs has wrong " _msg , # # __VA_ARGS__ )
2018-07-23 05:32:01 -04:00
2021-01-21 21:52:06 -05:00
for ( i = 0 ; i < ARRAY_SIZE ( c - > usage ) ; i + + )
bch2_fs_usage_acc_to_base ( c , i ) ;
2023-12-16 23:47:29 -05:00
__for_each_member_device ( c , ca ) {
2021-12-25 20:39:19 -05:00
struct bch_dev_usage * dst = ca - > usage_base ;
struct bch_dev_usage * src = ( void * )
2023-07-06 22:47:42 -04:00
bch2_acc_percpu_u64s ( ( u64 __percpu * ) ca - > usage_gc ,
2021-12-25 20:39:19 -05:00
dev_usage_u64s ( ) ) ;
for ( i = 0 ; i < BCH_DATA_NR ; i + + ) {
2023-10-24 20:44:36 -04:00
copy_dev_field ( dev_usage_buckets_wrong ,
2024-01-06 20:57:43 -05:00
d [ i ] . buckets , " %s buckets " , bch2_data_type_str ( i ) ) ;
2023-10-24 20:44:36 -04:00
copy_dev_field ( dev_usage_sectors_wrong ,
2024-01-06 20:57:43 -05:00
d [ i ] . sectors , " %s sectors " , bch2_data_type_str ( i ) ) ;
2023-10-24 20:44:36 -04:00
copy_dev_field ( dev_usage_fragmented_wrong ,
2024-01-06 20:57:43 -05:00
d [ i ] . fragmented , " %s fragmented " , bch2_data_type_str ( i ) ) ;
2021-01-21 21:52:06 -05:00
}
2023-09-13 08:57:56 +08:00
}
2018-07-23 05:32:01 -04:00
{
2019-02-14 20:39:17 -05:00
unsigned nr = fs_usage_u64s ( c ) ;
2019-02-10 19:34:47 -05:00
struct bch_fs_usage * dst = c - > usage_base ;
2018-12-17 08:44:56 -05:00
struct bch_fs_usage * src = ( void * )
2023-07-06 22:47:42 -04:00
bch2_acc_percpu_u64s ( ( u64 __percpu * ) c - > usage_gc , nr ) ;
2018-07-23 05:32:01 -04:00
2023-10-24 20:44:36 -04:00
copy_fs_field ( fs_usage_hidden_wrong ,
2023-11-17 00:03:45 -05:00
b . hidden , " hidden " ) ;
2023-10-24 20:44:36 -04:00
copy_fs_field ( fs_usage_btree_wrong ,
2023-11-17 00:03:45 -05:00
b . btree , " btree " ) ;
2018-12-01 11:32:12 -05:00
2024-04-06 22:40:12 -04:00
copy_fs_field ( fs_usage_data_wrong ,
b . data , " data " ) ;
copy_fs_field ( fs_usage_cached_wrong ,
b . cached , " cached " ) ;
copy_fs_field ( fs_usage_reserved_wrong ,
b . reserved , " reserved " ) ;
copy_fs_field ( fs_usage_nr_inodes_wrong ,
b . nr_inodes , " nr_inodes " ) ;
for ( i = 0 ; i < BCH_REPLICAS_MAX ; i + + )
copy_fs_field ( fs_usage_persistent_reserved_wrong ,
persistent_reserved [ i ] ,
" persistent_reserved[%i] " , i ) ;
2018-07-23 05:32:01 -04:00
2019-01-21 15:32:13 -05:00
for ( i = 0 ; i < c - > replicas . nr ; i + + ) {
2023-11-09 13:52:35 -05:00
struct bch_replicas_entry_v1 * e =
2019-02-12 15:03:47 -05:00
cpu_replicas_entry ( & c - > replicas , i ) ;
2022-02-25 13:18:19 -05:00
printbuf_reset ( & buf ) ;
bch2_replicas_entry_to_text ( & buf , e ) ;
2019-02-12 15:03:47 -05:00
2023-10-24 20:44:36 -04:00
copy_fs_field ( fs_usage_replicas_wrong ,
replicas [ i ] , " %s " , buf . buf ) ;
2019-01-21 15:32:13 -05:00
}
2018-07-23 05:32:01 -04:00
}
2019-02-11 19:27:33 -05:00
2018-07-23 05:32:01 -04:00
# undef copy_fs_field
# undef copy_dev_field
2018-11-24 17:09:44 -05:00
# undef copy_stripe_field
# undef copy_field
2019-03-28 03:13:27 -04:00
fsck_err :
2024-05-03 17:39:16 -04:00
bch2_dev_put ( ca ) ;
2023-12-16 22:43:41 -05:00
bch_err_fn ( c , ret ) ;
2021-12-25 20:39:19 -05:00
percpu_up_write ( & c - > mark_lock ) ;
2022-02-25 13:18:19 -05:00
printbuf_exit ( & buf ) ;
2019-03-28 03:13:27 -04:00
return ret ;
2018-07-23 05:32:01 -04:00
}
2022-12-13 14:43:03 -05:00
static int bch2_gc_start ( struct bch_fs * c )
2018-07-23 05:32:01 -04:00
{
2019-02-10 19:34:47 -05:00
BUG_ON ( c - > usage_gc ) ;
2018-07-23 05:32:01 -04:00
2019-02-10 19:34:47 -05:00
c - > usage_gc = __alloc_percpu_gfp ( fs_usage_u64s ( c ) * sizeof ( u64 ) ,
2019-02-14 20:39:17 -05:00
sizeof ( u64 ) , GFP_KERNEL ) ;
2020-04-29 12:57:04 -04:00
if ( ! c - > usage_gc ) {
bch_err ( c , " error allocating c->usage_gc " ) ;
2023-03-14 15:35:57 -04:00
return - BCH_ERR_ENOMEM_gc_start ;
2020-04-29 12:57:04 -04:00
}
2018-07-23 05:32:01 -04:00
2023-12-16 23:47:29 -05:00
for_each_member_device ( c , ca ) {
2021-01-21 21:52:06 -05:00
BUG_ON ( ca - > usage_gc ) ;
2018-07-23 05:32:01 -04:00
2021-01-21 21:52:06 -05:00
ca - > usage_gc = alloc_percpu ( struct bch_dev_usage ) ;
if ( ! ca - > usage_gc ) {
bch_err ( c , " error allocating ca->usage_gc " ) ;
2024-05-03 17:39:16 -04:00
bch2_dev_put ( ca ) ;
2023-03-14 15:35:57 -04:00
return - BCH_ERR_ENOMEM_gc_start ;
2017-03-16 22:18:50 -08:00
}
2022-04-01 01:29:59 -04:00
this_cpu_write ( ca - > usage_gc - > d [ BCH_DATA_free ] . buckets ,
ca - > mi . nbuckets - ca - > mi . first_bucket ) ;
2017-03-16 22:18:50 -08:00
}
2018-07-23 05:32:01 -04:00
2021-12-25 20:39:19 -05:00
return 0 ;
}
2021-12-31 20:03:29 -05:00
/* returns true if not equal */
static inline bool bch2_alloc_v4_cmp ( struct bch_alloc_v4 l ,
struct bch_alloc_v4 r )
{
return l . gen ! = r . gen | |
l . oldest_gen ! = r . oldest_gen | |
l . data_type ! = r . data_type | |
l . dirty_sectors ! = r . dirty_sectors | |
l . cached_sectors ! = r . cached_sectors | |
l . stripe_redundancy ! = r . stripe_redundancy | |
l . stripe ! = r . stripe ;
}
2021-12-25 20:39:19 -05:00
static int bch2_alloc_write_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
2024-04-30 15:44:24 -04:00
struct bch_dev * ca ,
2024-04-06 22:40:12 -04:00
struct bkey_s_c k )
2021-12-25 20:39:19 -05:00
{
struct bch_fs * c = trans - > c ;
2021-12-31 20:03:29 -05:00
struct bkey_i_alloc_v4 * a ;
2024-04-30 00:29:24 -04:00
struct bch_alloc_v4 old_gc , gc , old_convert , new ;
2023-01-30 20:58:43 -05:00
const struct bch_alloc_v4 * old ;
2021-12-25 20:39:19 -05:00
int ret ;
2023-01-30 20:58:43 -05:00
old = bch2_alloc_to_v4 ( k , & old_convert ) ;
2024-04-30 00:29:24 -04:00
gc = new = * old ;
2021-12-25 20:39:19 -05:00
percpu_down_read ( & c - > mark_lock ) ;
2024-04-30 00:29:24 -04:00
__bucket_m_to_alloc ( & gc , * gc_bucket ( ca , iter - > pos . offset ) ) ;
old_gc = gc ;
2024-03-10 14:54:09 -04:00
if ( ( old - > data_type = = BCH_DATA_sb | |
old - > data_type = = BCH_DATA_journal ) & &
! bch2_dev_is_online ( ca ) ) {
2024-04-30 00:29:24 -04:00
gc . data_type = old - > data_type ;
gc . dirty_sectors = old - > dirty_sectors ;
2024-03-10 14:54:09 -04:00
}
2022-04-01 01:29:59 -04:00
/*
2024-04-30 00:29:24 -04:00
* gc . data_type doesn ' t yet include need_discard & need_gc_gen states -
2022-04-01 01:29:59 -04:00
* fix that here :
*/
2024-04-30 00:29:24 -04:00
alloc_data_type_set ( & gc , gc . data_type ) ;
2021-12-25 20:39:19 -05:00
2024-03-10 14:54:09 -04:00
if ( gc . data_type ! = old_gc . data_type | |
gc . dirty_sectors ! = old_gc . dirty_sectors )
2024-04-30 00:29:24 -04:00
bch2_dev_usage_update ( c , ca , & old_gc , & gc , 0 , true ) ;
2024-03-16 22:45:46 -04:00
percpu_up_read ( & c - > mark_lock ) ;
2024-03-10 14:54:09 -04:00
2024-03-11 21:15:26 -04:00
if ( fsck_err_on ( new . data_type ! = gc . data_type , c ,
2023-10-24 20:44:36 -04:00
alloc_key_data_type_wrong ,
2023-03-01 21:47:07 -05:00
" bucket %llu:%llu gen %u has wrong data_type "
" : got %s, should be %s " ,
iter - > pos . inode , iter - > pos . offset ,
gc . gen ,
2024-01-06 20:57:43 -05:00
bch2_data_type_str ( new . data_type ) ,
bch2_data_type_str ( gc . data_type ) ) )
2023-03-01 21:47:07 -05:00
new . data_type = gc . data_type ;
2023-10-24 20:44:36 -04:00
# define copy_bucket_field(_errtype, _f) \
2024-03-11 21:15:26 -04:00
if ( fsck_err_on ( new . _f ! = gc . _f , c , _errtype , \
2021-12-25 20:39:19 -05:00
" bucket %llu:%llu gen %u data type %s has wrong " # _f \
" : got %u, should be %u " , \
iter - > pos . inode , iter - > pos . offset , \
2022-02-14 00:07:38 -05:00
gc . gen , \
2024-01-06 20:57:43 -05:00
bch2_data_type_str ( gc . data_type ) , \
2021-12-31 20:03:29 -05:00
new . _f , gc . _f ) ) \
new . _f = gc . _f ; \
2021-12-25 20:39:19 -05:00
2023-10-24 20:44:36 -04:00
copy_bucket_field ( alloc_key_gen_wrong ,
gen ) ;
copy_bucket_field ( alloc_key_dirty_sectors_wrong ,
dirty_sectors ) ;
copy_bucket_field ( alloc_key_cached_sectors_wrong ,
cached_sectors ) ;
copy_bucket_field ( alloc_key_stripe_wrong ,
stripe ) ;
copy_bucket_field ( alloc_key_stripe_redundancy_wrong ,
stripe_redundancy ) ;
2021-12-25 20:39:19 -05:00
# undef copy_bucket_field
2023-01-30 20:58:43 -05:00
if ( ! bch2_alloc_v4_cmp ( * old , new ) )
2021-12-25 20:39:19 -05:00
return 0 ;
2021-12-31 20:03:29 -05:00
a = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
return ret ;
a - > v = new ;
2021-12-25 20:39:19 -05:00
2022-04-10 19:59:26 -04:00
/*
* The trigger normally makes sure this is set , but we ' re not running
* triggers :
*/
if ( a - > v . data_type = = BCH_DATA_cached & & ! a - > v . io_time [ READ ] )
a - > v . io_time [ READ ] = max_t ( u64 , 1 , atomic64_read ( & c - > io_clock [ READ ] . now ) ) ;
2024-04-07 18:05:34 -04:00
ret = bch2_trans_update ( trans , iter , & a - > k_i , BTREE_TRIGGER_norun ) ;
2021-12-25 20:39:19 -05:00
fsck_err :
return ret ;
}
2024-04-06 22:40:12 -04:00
static int bch2_gc_alloc_done ( struct bch_fs * c )
2021-12-25 20:39:19 -05:00
{
int ret = 0 ;
2023-12-16 23:47:29 -05:00
for_each_member_device ( c , ca ) {
ret = bch2_trans_run ( c ,
for_each_btree_key_upto_commit ( trans , iter , BTREE_ID_alloc ,
POS ( ca - > dev_idx , ca - > mi . first_bucket ) ,
POS ( ca - > dev_idx , ca - > mi . nbuckets - 1 ) ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_slots | BTREE_ITER_prefetch , k ,
2023-12-16 23:47:29 -05:00
NULL , NULL , BCH_TRANS_COMMIT_lazy_rw ,
2024-04-30 15:44:24 -04:00
bch2_alloc_write_key ( trans , & iter , ca , k ) ) ) ;
2023-12-16 23:47:29 -05:00
if ( ret ) {
2024-05-03 17:39:16 -04:00
bch2_dev_put ( ca ) ;
2021-12-25 20:39:19 -05:00
break ;
}
}
2019-03-29 22:22:45 -04:00
2023-12-16 23:47:29 -05:00
bch_err_fn ( c , ret ) ;
return ret ;
2021-12-25 20:39:19 -05:00
}
2021-04-16 16:54:11 -04:00
2024-04-06 22:40:12 -04:00
static int bch2_gc_alloc_start ( struct bch_fs * c )
2021-12-25 20:39:19 -05:00
{
2023-12-16 23:47:29 -05:00
for_each_member_device ( c , ca ) {
2024-02-01 06:35:46 -05:00
struct bucket_array * buckets = kvmalloc ( sizeof ( struct bucket_array ) +
2021-12-25 20:39:19 -05:00
ca - > mi . nbuckets * sizeof ( struct bucket ) ,
GFP_KERNEL | __GFP_ZERO ) ;
if ( ! buckets ) {
2024-05-03 17:39:16 -04:00
bch2_dev_put ( ca ) ;
2021-12-25 20:39:19 -05:00
bch_err ( c , " error allocating ca->buckets[gc] " ) ;
2023-12-16 23:47:29 -05:00
return - BCH_ERR_ENOMEM_gc_alloc_start ;
2019-02-20 17:57:06 -05:00
}
2018-07-23 05:32:01 -04:00
2021-12-25 20:39:19 -05:00
buckets - > first_bucket = ca - > mi . first_bucket ;
buckets - > nbuckets = ca - > mi . nbuckets ;
2022-02-10 19:26:55 -05:00
rcu_assign_pointer ( ca - > buckets_gc , buckets ) ;
2023-09-13 08:57:56 +08:00
}
2019-10-04 19:14:43 -04:00
2024-04-30 22:54:50 -04:00
struct bch_dev * ca = NULL ;
2023-12-16 23:47:29 -05:00
int ret = bch2_trans_run ( c ,
for_each_btree_key ( trans , iter , BTREE_ID_alloc , POS_MIN ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_prefetch , k , ( {
2024-04-30 22:54:50 -04:00
ca = bch2_dev_iterate ( c , ca , k . k - > p . inode ) ;
if ( ! ca ) {
bch2_btree_iter_set_pos ( & iter , POS ( k . k - > p . inode + 1 , 0 ) ) ;
continue ;
}
2023-11-28 16:31:48 -05:00
2023-12-16 23:47:29 -05:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2024-04-30 22:54:50 -04:00
struct bucket * g = gc_bucket ( ca , k . k - > p . offset ) ;
2023-12-16 23:47:29 -05:00
g - > gen_valid = 1 ;
g - > gen = a - > gen ;
0 ;
} ) ) ) ;
2024-04-30 22:54:50 -04:00
bch2_dev_put ( ca ) ;
2023-12-16 22:43:41 -05:00
bch_err_fn ( c , ret ) ;
2022-02-10 19:26:55 -05:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2022-07-17 00:44:19 -04:00
static int bch2_gc_write_reflink_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k ,
size_t * idx )
2021-05-23 02:31:33 -04:00
{
2022-07-17 00:44:19 -04:00
struct bch_fs * c = trans - > c ;
const __le64 * refcount = bkey_refcount_c ( k ) ;
2022-02-25 13:18:19 -05:00
struct printbuf buf = PRINTBUF ;
2022-07-17 00:44:19 -04:00
struct reflink_gc * r ;
2021-05-23 02:31:33 -04:00
int ret = 0 ;
2022-07-17 00:44:19 -04:00
if ( ! refcount )
2021-05-23 02:31:33 -04:00
return 0 ;
2022-07-17 00:44:19 -04:00
while ( ( r = genradix_ptr ( & c - > reflink_gc_table , * idx ) ) & &
r - > offset < k . k - > p . offset )
+ + * idx ;
2021-10-29 18:43:18 -04:00
2022-07-17 00:44:19 -04:00
if ( ! r | |
r - > offset ! = k . k - > p . offset | |
r - > size ! = k . k - > size ) {
bch_err ( c , " unexpected inconsistency walking reflink table at gc finish " ) ;
return - EINVAL ;
}
2021-05-23 02:31:33 -04:00
2022-07-17 00:44:19 -04:00
if ( fsck_err_on ( r - > refcount ! = le64_to_cpu ( * refcount ) , c ,
2023-10-24 20:44:36 -04:00
reflink_v_refcount_wrong ,
2022-07-17 00:44:19 -04:00
" reflink key has wrong refcount: \n "
" %s \n "
" should be %u " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ,
r - > refcount ) ) {
2024-03-10 16:24:16 -04:00
struct bkey_i * new = bch2_bkey_make_mut_noupdate ( trans , k ) ;
2022-07-17 00:44:19 -04:00
ret = PTR_ERR_OR_ZERO ( new ) ;
if ( ret )
2024-04-17 02:17:21 -04:00
goto out ;
2021-05-23 02:31:33 -04:00
2022-07-17 00:44:19 -04:00
if ( ! r - > refcount )
new - > k . type = KEY_TYPE_deleted ;
else
2023-12-27 23:19:09 -05:00
* bkey_refcount ( bkey_i_to_s ( new ) ) = cpu_to_le64 ( r - > refcount ) ;
2024-03-10 16:24:16 -04:00
ret = bch2_trans_update ( trans , iter , new , 0 ) ;
2022-07-17 00:44:19 -04:00
}
2024-04-17 02:17:21 -04:00
out :
2022-07-17 00:44:19 -04:00
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2021-05-23 02:31:33 -04:00
2024-04-06 22:40:12 -04:00
static int bch2_gc_reflink_done ( struct bch_fs * c )
2022-07-17 00:44:19 -04:00
{
size_t idx = 0 ;
2021-05-23 02:31:33 -04:00
2023-12-16 22:30:09 -05:00
int ret = bch2_trans_run ( c ,
for_each_btree_key_commit ( trans , iter ,
BTREE_ID_reflink , POS_MIN ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_prefetch , k ,
2023-12-16 22:30:09 -05:00
NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
bch2_gc_write_reflink_key ( trans , & iter , k , & idx ) ) ) ;
2021-05-23 02:31:33 -04:00
c - > reflink_gc_nr = 0 ;
return ret ;
}
2024-04-06 22:40:12 -04:00
static int bch2_gc_reflink_start ( struct bch_fs * c )
2022-01-01 23:16:15 -05:00
{
c - > reflink_gc_nr = 0 ;
2023-12-16 22:30:09 -05:00
int ret = bch2_trans_run ( c ,
2023-12-07 23:33:11 -05:00
for_each_btree_key ( trans , iter , BTREE_ID_reflink , POS_MIN ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_prefetch , k , ( {
2023-12-07 23:28:26 -05:00
const __le64 * refcount = bkey_refcount_c ( k ) ;
2022-01-01 23:16:15 -05:00
2023-12-07 23:28:26 -05:00
if ( ! refcount )
continue ;
2022-01-01 23:16:15 -05:00
2023-12-16 22:30:09 -05:00
struct reflink_gc * r = genradix_ptr_alloc ( & c - > reflink_gc_table ,
c - > reflink_gc_nr + + , GFP_KERNEL ) ;
2023-12-07 23:28:26 -05:00
if ( ! r ) {
ret = - BCH_ERR_ENOMEM_gc_reflink_start ;
break ;
}
2022-01-01 23:16:15 -05:00
2023-12-07 23:28:26 -05:00
r - > offset = k . k - > p . offset ;
r - > size = k . k - > size ;
r - > refcount = 0 ;
0 ;
} ) ) ) ;
2022-01-01 23:16:15 -05:00
2023-12-07 23:28:26 -05:00
bch_err_fn ( c , ret ) ;
2022-01-01 23:16:15 -05:00
return ret ;
}
2022-07-17 00:44:19 -04:00
static int bch2_gc_write_stripes_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
2021-12-04 23:07:33 -05:00
{
2022-07-17 00:44:19 -04:00
struct bch_fs * c = trans - > c ;
2022-02-25 13:18:19 -05:00
struct printbuf buf = PRINTBUF ;
2022-07-17 00:44:19 -04:00
const struct bch_stripe * s ;
struct gc_stripe * m ;
2023-03-03 23:08:11 -05:00
bool bad = false ;
2021-12-04 23:07:33 -05:00
unsigned i ;
int ret = 0 ;
2022-07-17 00:44:19 -04:00
if ( k . k - > type ! = KEY_TYPE_stripe )
2021-12-04 23:07:33 -05:00
return 0 ;
2022-07-17 00:44:19 -04:00
s = bkey_s_c_to_stripe ( k ) . v ;
m = genradix_ptr ( & c - > gc_stripes , k . k - > p . offset ) ;
2021-12-25 20:07:00 -05:00
2023-03-03 23:08:11 -05:00
for ( i = 0 ; i < s - > nr_blocks ; i + + ) {
u32 old = stripe_blockcount_get ( s , i ) ;
u32 new = ( m ? m - > block_sectors [ i ] : 0 ) ;
if ( old ! = new ) {
prt_printf ( & buf , " stripe block %u has wrong sector count: got %u, should be %u \n " ,
i , old , new ) ;
bad = true ;
}
}
if ( bad )
bch2_bkey_val_to_text ( & buf , c , k ) ;
2023-10-24 20:44:36 -04:00
if ( fsck_err_on ( bad , c , stripe_sector_count_wrong ,
" %s " , buf . buf ) ) {
2022-07-17 00:44:19 -04:00
struct bkey_i_stripe * new ;
new = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
ret = PTR_ERR_OR_ZERO ( new ) ;
if ( ret )
return ret ;
2021-12-04 23:07:33 -05:00
2022-07-17 00:44:19 -04:00
bkey_reassemble ( & new - > k_i , k ) ;
2021-12-04 23:07:33 -05:00
2022-07-17 00:44:19 -04:00
for ( i = 0 ; i < new - > v . nr_blocks ; i + + )
stripe_blockcount_set ( & new - > v , i , m ? m - > block_sectors [ i ] : 0 ) ;
2021-12-04 23:07:33 -05:00
2022-07-17 00:44:19 -04:00
ret = bch2_trans_update ( trans , iter , & new - > k_i , 0 ) ;
2021-12-04 23:07:33 -05:00
}
fsck_err :
2022-07-17 00:44:19 -04:00
printbuf_exit ( & buf ) ;
return ret ;
}
2021-12-04 23:07:33 -05:00
2024-04-06 22:40:12 -04:00
static int bch2_gc_stripes_done ( struct bch_fs * c )
2022-07-17 00:44:19 -04:00
{
2023-12-16 22:30:09 -05:00
return bch2_trans_run ( c ,
for_each_btree_key_commit ( trans , iter ,
BTREE_ID_stripes , POS_MIN ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_prefetch , k ,
2023-12-16 22:30:09 -05:00
NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
bch2_gc_write_stripes_key ( trans , & iter , k ) ) ) ;
2021-12-04 23:07:33 -05:00
}
2017-03-16 22:18:50 -08:00
/**
2024-04-06 23:58:01 -04:00
* bch2_check_allocations - walk all references to buckets , and recompute them :
2018-07-23 05:32:01 -04:00
*
2023-09-12 18:41:22 -04:00
* @ c : filesystem object
*
* Returns : 0 on success , or standard errcode on failure
*
2018-07-23 05:32:01 -04:00
* Order matters here :
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node ( ) ,
* gc_will_visit_root ( )
*
* - also , references move around in the course of index updates and
* various other crap : everything needs to agree on the ordering
* references are allowed to move around in - e . g . , we ' re allowed to
* start with a reference owned by an open_bucket ( the allocator ) and
* move it to the btree , but not the reverse .
*
* This is necessary to ensure that gc doesn ' t miss references that
* move around - if references move backwards in the ordering GC
* uses , GC could skip past them
2017-03-16 22:18:50 -08:00
*/
2024-04-06 23:58:01 -04:00
int bch2_check_allocations ( struct bch_fs * c )
2017-03-16 22:18:50 -08:00
{
2018-10-21 10:56:11 -04:00
int ret ;
2017-03-16 22:18:50 -08:00
2020-06-15 14:58:47 -04:00
lockdep_assert_held ( & c - > state_lock ) ;
2017-03-16 22:18:50 -08:00
down_write ( & c - > gc_lock ) ;
2020-05-25 14:57:06 -04:00
2022-04-17 17:30:49 -04:00
bch2_btree_interior_updates_flush ( c ) ;
2022-01-01 23:16:15 -05:00
2022-12-13 14:43:03 -05:00
ret = bch2_gc_start ( c ) ? :
2024-04-06 22:40:12 -04:00
bch2_gc_alloc_start ( c ) ? :
bch2_gc_reflink_start ( c ) ;
2018-07-23 05:32:01 -04:00
if ( ret )
2017-03-16 22:18:50 -08:00
goto out ;
2024-04-16 22:35:02 -04:00
2022-01-01 23:16:15 -05:00
gc_pos_set ( c , gc_phase ( GC_PHASE_START ) ) ;
2017-03-16 22:18:50 -08:00
2024-04-07 02:11:03 -04:00
ret = bch2_mark_superblocks ( c ) ;
BUG_ON ( ret ) ;
2017-03-16 22:18:50 -08:00
2024-04-06 23:58:01 -04:00
ret = bch2_gc_btrees ( c ) ;
2018-07-23 05:32:01 -04:00
if ( ret )
2018-10-21 10:56:11 -04:00
goto out ;
2017-03-16 22:18:50 -08:00
c - > gc_count + + ;
2021-01-26 20:59:00 -05:00
2024-04-16 22:35:02 -04:00
bch2_journal_block ( & c - > journal ) ;
2021-01-26 20:59:00 -05:00
out :
2024-04-16 22:35:02 -04:00
ret = bch2_gc_alloc_done ( c ) ? :
bch2_gc_done ( c ) ? :
bch2_gc_stripes_done ( c ) ? :
bch2_gc_reflink_done ( c ) ;
2018-07-23 05:32:01 -04:00
2024-04-16 22:35:02 -04:00
bch2_journal_unblock ( & c - > journal ) ;
2019-02-10 19:34:47 -05:00
2021-12-25 20:39:19 -05:00
percpu_down_write ( & c - > mark_lock ) ;
2018-07-23 05:32:01 -04:00
/* Indicates that gc is no longer in progress: */
2018-11-24 17:09:44 -05:00
__gc_pos_set ( c , gc_phase ( GC_PHASE_NOT_RUNNING ) ) ;
2018-07-23 05:32:01 -04:00
bch2_gc_free ( c ) ;
2019-03-28 03:08:40 -04:00
percpu_up_write ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
up_write ( & c - > gc_lock ) ;
2018-07-23 05:32:01 -04:00
2017-03-16 22:18:50 -08:00
/*
* At startup , allocations can happen directly instead of via the
* allocator thread - issue wakeup in case they blocked on gc_lock :
*/
closure_wake_up ( & c - > freelist_wait ) ;
2023-12-16 22:43:41 -05:00
bch_err_fn ( c , ret ) ;
2018-07-23 05:32:01 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2022-07-15 20:51:09 -04:00
static int gc_btree_gens_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
2020-10-13 03:58:50 -04:00
{
2022-07-15 20:51:09 -04:00
struct bch_fs * c = trans - > c ;
2020-10-13 03:58:50 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2022-07-15 20:51:09 -04:00
struct bkey_i * u ;
int ret ;
2020-10-13 03:58:50 -04:00
2024-04-19 22:44:12 -04:00
if ( unlikely ( test_bit ( BCH_FS_going_ro , & c - > flags ) ) )
return - EROFS ;
2020-10-13 03:58:50 -04:00
percpu_down_read ( & c - > mark_lock ) ;
2024-04-30 22:54:50 -04:00
rcu_read_lock ( ) ;
2020-10-13 03:58:50 -04:00
bkey_for_each_ptr ( ptrs , ptr ) {
2024-04-30 22:54:50 -04:00
struct bch_dev * ca = bch2_dev_rcu ( c , ptr - > dev ) ;
if ( ! ca )
continue ;
2020-10-13 03:58:50 -04:00
2024-04-30 20:56:54 -04:00
if ( dev_ptr_stale ( ca , ptr ) > 16 ) {
2024-04-30 22:54:50 -04:00
rcu_read_unlock ( ) ;
2020-10-13 03:58:50 -04:00
percpu_up_read ( & c - > mark_lock ) ;
2022-07-15 20:51:09 -04:00
goto update ;
2020-10-13 03:58:50 -04:00
}
}
bkey_for_each_ptr ( ptrs , ptr ) {
2024-04-30 22:54:50 -04:00
struct bch_dev * ca = bch2_dev_rcu ( c , ptr - > dev ) ;
if ( ! ca )
continue ;
2020-10-13 03:58:50 -04:00
2024-04-30 22:54:50 -04:00
u8 * gen = & ca - > oldest_gen [ PTR_BUCKET_NR ( ca , ptr ) ] ;
2021-12-24 04:51:10 -05:00
if ( gen_after ( * gen , ptr - > gen ) )
* gen = ptr - > gen ;
2020-10-13 03:58:50 -04:00
}
2024-04-30 22:54:50 -04:00
rcu_read_unlock ( ) ;
2020-10-13 03:58:50 -04:00
percpu_up_read ( & c - > mark_lock ) ;
2022-07-15 20:51:09 -04:00
return 0 ;
update :
2023-06-26 18:36:24 -04:00
u = bch2_bkey_make_mut ( trans , iter , & k , 0 ) ;
2022-07-15 20:51:09 -04:00
ret = PTR_ERR_OR_ZERO ( u ) ;
if ( ret )
return ret ;
2020-10-13 03:58:50 -04:00
2022-07-15 20:51:09 -04:00
bch2_extent_normalize ( c , bkey_i_to_s ( u ) ) ;
2023-04-30 19:21:06 -04:00
return 0 ;
2020-06-15 15:10:54 -04:00
}
2024-04-30 22:54:50 -04:00
static int bch2_alloc_write_oldest_gen ( struct btree_trans * trans , struct bch_dev * ca ,
struct btree_iter * iter , struct bkey_s_c k )
2021-12-24 04:51:10 -05:00
{
2023-01-30 20:58:43 -05:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2021-12-31 20:03:29 -05:00
struct bkey_i_alloc_v4 * a_mut ;
2021-12-24 04:51:10 -05:00
int ret ;
2023-01-30 20:58:43 -05:00
if ( a - > oldest_gen = = ca - > oldest_gen [ iter - > pos . offset ] )
2021-12-24 04:51:10 -05:00
return 0 ;
2021-12-31 20:03:29 -05:00
a_mut = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a_mut ) ;
if ( ret )
return ret ;
a_mut - > v . oldest_gen = ca - > oldest_gen [ iter - > pos . offset ] ;
2024-04-30 02:47:30 -04:00
alloc_data_type_set ( & a_mut - > v , a_mut - > v . data_type ) ;
2021-12-24 04:51:10 -05:00
2021-12-31 20:03:29 -05:00
return bch2_trans_update ( trans , iter , & a_mut - > k_i , 0 ) ;
2021-12-24 04:51:10 -05:00
}
2020-06-15 15:10:54 -04:00
int bch2_gc_gens ( struct bch_fs * c )
{
2021-12-24 04:51:10 -05:00
u64 b , start_time = local_clock ( ) ;
2020-06-15 15:10:54 -04:00
int ret ;
2020-06-17 17:30:38 -04:00
/*
* Ideally we would be using state_lock and not gc_lock here , but that
* introduces a deadlock in the RO path - we currently take the state
* lock at the start of going RO , thus the gc thread may get stuck :
*/
2021-12-24 04:51:10 -05:00
if ( ! mutex_trylock ( & c - > gc_gens_lock ) )
return 0 ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , gc_gens_start , c ) ;
2020-06-17 17:30:38 -04:00
down_read ( & c - > gc_lock ) ;
2020-06-15 15:10:54 -04:00
2023-12-16 23:47:29 -05:00
for_each_member_device ( c , ca ) {
2023-10-20 12:24:36 -04:00
struct bucket_gens * gens = bucket_gens ( ca ) ;
2021-12-24 04:51:10 -05:00
BUG_ON ( ca - > oldest_gen ) ;
2023-10-20 12:24:36 -04:00
ca - > oldest_gen = kvmalloc ( gens - > nbuckets , GFP_KERNEL ) ;
2021-12-24 04:51:10 -05:00
if ( ! ca - > oldest_gen ) {
2024-05-03 17:39:16 -04:00
bch2_dev_put ( ca ) ;
2023-03-14 15:35:57 -04:00
ret = - BCH_ERR_ENOMEM_gc_gens ;
2021-12-24 04:51:10 -05:00
goto err ;
}
for ( b = gens - > first_bucket ;
b < gens - > nbuckets ; b + + )
ca - > oldest_gen [ b ] = gens - > b [ b ] ;
2020-06-15 15:10:54 -04:00
}
2023-12-16 23:47:29 -05:00
for ( unsigned i = 0 ; i < BTREE_ID_NR ; i + + )
2022-10-09 02:25:53 -04:00
if ( btree_type_has_ptrs ( i ) ) {
2021-04-13 15:00:40 -04:00
c - > gc_gens_btree = i ;
c - > gc_gens_pos = POS_MIN ;
2023-09-12 18:41:22 -04:00
2023-12-16 23:47:29 -05:00
ret = bch2_trans_run ( c ,
for_each_btree_key_commit ( trans , iter , i ,
POS_MIN ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_prefetch | BTREE_ITER_all_snapshots ,
2023-12-16 23:47:29 -05:00
k ,
NULL , NULL ,
BCH_TRANS_COMMIT_no_enospc ,
gc_btree_gens_key ( trans , & iter , k ) ) ) ;
2022-10-12 16:11:31 -04:00
if ( ret )
2020-06-15 15:10:54 -04:00
goto err ;
}
2024-04-30 22:54:50 -04:00
struct bch_dev * ca = NULL ;
2023-12-16 23:47:29 -05:00
ret = bch2_trans_run ( c ,
for_each_btree_key_commit ( trans , iter , BTREE_ID_alloc ,
POS_MIN ,
2024-04-07 18:05:34 -04:00
BTREE_ITER_prefetch ,
2023-12-16 23:47:29 -05:00
k ,
NULL , NULL ,
2024-04-30 22:54:50 -04:00
BCH_TRANS_COMMIT_no_enospc , ( {
ca = bch2_dev_iterate ( c , ca , k . k - > p . inode ) ;
if ( ! ca ) {
bch2_btree_iter_set_pos ( & iter , POS ( k . k - > p . inode + 1 , 0 ) ) ;
continue ;
}
bch2_alloc_write_oldest_gen ( trans , ca , & iter , k ) ;
} ) ) ) ;
bch2_dev_put ( ca ) ;
2022-10-12 16:11:31 -04:00
if ( ret )
2022-07-15 20:51:09 -04:00
goto err ;
2020-07-21 17:12:39 -04:00
2021-04-13 15:00:40 -04:00
c - > gc_gens_btree = 0 ;
c - > gc_gens_pos = POS_MIN ;
2020-07-21 17:12:39 -04:00
c - > gc_count + + ;
2021-12-10 15:41:38 -05:00
bch2_time_stats_update ( & c - > times [ BCH_TIME_btree_gc ] , start_time ) ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , gc_gens_end , c ) ;
2020-06-15 15:10:54 -04:00
err :
2023-12-16 23:47:29 -05:00
for_each_member_device ( c , ca ) {
2021-12-24 04:51:10 -05:00
kvfree ( ca - > oldest_gen ) ;
ca - > oldest_gen = NULL ;
}
2020-06-17 17:30:38 -04:00
up_read ( & c - > gc_lock ) ;
2021-12-24 04:51:10 -05:00
mutex_unlock ( & c - > gc_gens_lock ) ;
2023-12-16 23:47:29 -05:00
if ( ! bch2_err_matches ( ret , EROFS ) )
bch_err_fn ( c , ret ) ;
2020-06-15 15:10:54 -04:00
return ret ;
}
2024-04-19 22:44:12 -04:00
static void bch2_gc_gens_work ( struct work_struct * work )
2017-03-16 22:18:50 -08:00
{
2024-04-19 22:44:12 -04:00
struct bch_fs * c = container_of ( work , struct bch_fs , gc_gens_work ) ;
bch2_gc_gens ( c ) ;
bch2_write_ref_put ( c , BCH_WRITE_REF_gc_gens ) ;
2017-03-16 22:18:50 -08:00
}
2024-04-19 22:44:12 -04:00
void bch2_gc_gens_async ( struct bch_fs * c )
2017-03-16 22:18:50 -08:00
{
2024-04-19 22:44:12 -04:00
if ( bch2_write_ref_tryget ( c , BCH_WRITE_REF_gc_gens ) & &
! queue_work ( c - > write_ref_wq , & c - > gc_gens_work ) )
bch2_write_ref_put ( c , BCH_WRITE_REF_gc_gens ) ;
2017-03-16 22:18:50 -08:00
}
2024-04-19 22:44:12 -04:00
void bch2_fs_gc_init ( struct bch_fs * c )
2017-03-16 22:18:50 -08:00
{
2024-04-19 22:44:12 -04:00
seqcount_init ( & c - > gc_pos_lock ) ;
2017-03-16 22:18:50 -08:00
2024-04-19 22:44:12 -04:00
INIT_WORK ( & c - > gc_gens_work , bch2_gc_gens_work ) ;
2017-03-16 22:18:50 -08:00
}