2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright ( C ) 2010 Kent Overstreet < kent . overstreet @ gmail . com >
* Copyright ( C ) 2014 Datera Inc .
*/
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_background.h"
2018-10-06 11:12:42 +03:00
# include "alloc_foreground.h"
2017-03-17 09:18:50 +03:00
# include "bkey_methods.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2021-12-26 04:39:19 +03:00
# include "btree_key_cache.h"
2017-03-17 09:18:50 +03:00
# include "btree_locking.h"
# include "btree_update_interior.h"
# include "btree_io.h"
# include "btree_gc.h"
# include "buckets.h"
# include "clock.h"
# include "debug.h"
2018-11-01 22:13:19 +03:00
# include "ec.h"
2017-03-17 09:18:50 +03:00
# include "error.h"
# include "extents.h"
# include "journal.h"
# include "keylist.h"
# include "move.h"
2019-04-12 05:39:39 +03:00
# include "recovery.h"
2021-05-23 09:31:33 +03:00
# include "reflink.h"
2017-03-17 09:18:50 +03:00
# include "replicas.h"
# include "super-io.h"
# include "trace.h"
# include <linux/slab.h>
# include <linux/bitops.h>
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/preempt.h>
# include <linux/rcupdate.h>
# include <linux/sched/task.h>
2021-06-07 20:28:50 +03:00
# define DROP_THIS_NODE 10
# define DROP_PREV_NODE 11
2018-10-21 17:56:11 +03:00
static inline void __gc_pos_set ( struct bch_fs * c , struct gc_pos new_pos )
{
preempt_disable ( ) ;
write_seqcount_begin ( & c - > gc_pos_lock ) ;
c - > gc_pos = new_pos ;
write_seqcount_end ( & c - > gc_pos_lock ) ;
preempt_enable ( ) ;
}
static inline void gc_pos_set ( struct bch_fs * c , struct gc_pos new_pos )
{
BUG_ON ( gc_pos_cmp ( new_pos , c - > gc_pos ) < = 0 ) ;
__gc_pos_set ( c , new_pos ) ;
}
2021-01-27 04:59:00 +03:00
/*
* Missing : if an interior btree node is empty , we need to do something -
* perhaps just kill it
*/
2020-03-29 23:48:53 +03:00
static int bch2_gc_check_topology ( struct bch_fs * c ,
2021-01-27 00:04:38 +03:00
struct btree * b ,
struct bkey_buf * prev ,
struct bkey_buf cur ,
2020-03-29 23:48:53 +03:00
bool is_last )
2017-03-17 09:18:50 +03:00
{
2021-01-27 00:04:38 +03:00
struct bpos node_start = b - > data - > min_key ;
struct bpos node_end = b - > data - > max_key ;
struct bpos expected_start = bkey_deleted ( & prev - > k - > k )
? node_start
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-25 01:02:16 +03:00
: bpos_successor ( prev - > k - > k . p ) ;
2022-02-25 21:18:19 +03:00
struct printbuf buf1 = PRINTBUF , buf2 = PRINTBUF ;
2020-03-29 23:48:53 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2021-01-27 00:04:38 +03:00
if ( cur . k - > k . type = = KEY_TYPE_btree_ptr_v2 ) {
struct bkey_i_btree_ptr_v2 * bp = bkey_i_to_btree_ptr_v2 ( cur . k ) ;
2017-03-17 09:18:50 +03:00
2021-04-24 23:32:35 +03:00
if ( bpos_cmp ( expected_start , bp - > v . min_key ) ) {
bch2_topology_error ( c ) ;
2022-02-25 21:18:19 +03:00
if ( bkey_deleted ( & prev - > k - > k ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( & buf1 , " start of node: " ) ;
2022-02-25 21:18:19 +03:00
bch2_bpos_to_text ( & buf1 , node_start ) ;
} else {
bch2_bkey_val_to_text ( & buf1 , c , bkey_i_to_s_c ( prev - > k ) ) ;
}
bch2_bkey_val_to_text ( & buf2 , c , bkey_i_to_s_c ( cur . k ) ) ;
2021-06-21 23:28:43 +03:00
if ( __fsck_err ( c ,
FSCK_CAN_FIX |
FSCK_CAN_IGNORE |
FSCK_NO_RATELIMIT ,
" btree node with incorrect min_key at btree %s level %u: \n "
" prev %s \n "
" cur %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) & &
2021-06-23 03:44:54 +03:00
! test_bit ( BCH_FS_TOPOLOGY_REPAIR_DONE , & c - > flags ) ) {
2021-04-24 23:32:35 +03:00
bch_info ( c , " Halting mark and sweep to start topology repair pass " ) ;
2022-07-20 00:20:18 +03:00
ret = - BCH_ERR_need_topology_repair ;
2022-02-25 21:18:19 +03:00
goto err ;
2021-04-24 23:32:35 +03:00
} else {
set_bit ( BCH_FS_INITIAL_GC_UNFIXED , & c - > flags ) ;
}
}
}
if ( is_last & & bpos_cmp ( cur . k - > k . p , node_end ) ) {
bch2_topology_error ( c ) ;
2022-02-25 21:18:19 +03:00
printbuf_reset ( & buf1 ) ;
printbuf_reset ( & buf2 ) ;
bch2_bkey_val_to_text ( & buf1 , c , bkey_i_to_s_c ( cur . k ) ) ;
bch2_bpos_to_text ( & buf2 , node_end ) ;
2021-06-21 23:28:43 +03:00
if ( __fsck_err ( c ,
FSCK_CAN_FIX |
FSCK_CAN_IGNORE |
FSCK_NO_RATELIMIT ,
" btree node with incorrect max_key at btree %s level %u: \n "
" %s \n "
" expected %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) & &
2021-06-23 03:44:54 +03:00
! test_bit ( BCH_FS_TOPOLOGY_REPAIR_DONE , & c - > flags ) ) {
2021-04-24 23:32:35 +03:00
bch_info ( c , " Halting mark and sweep to start topology repair pass " ) ;
2022-07-20 00:20:18 +03:00
ret = - BCH_ERR_need_topology_repair ;
2022-02-25 21:18:19 +03:00
goto err ;
2021-04-24 23:32:35 +03:00
} else {
set_bit ( BCH_FS_INITIAL_GC_UNFIXED , & c - > flags ) ;
}
}
bch2_bkey_buf_copy ( prev , c , cur . k ) ;
2022-02-25 21:18:19 +03:00
err :
2021-04-24 23:32:35 +03:00
fsck_err :
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf2 ) ;
printbuf_exit ( & buf1 ) ;
2021-04-24 23:32:35 +03:00
return ret ;
}
static void btree_ptr_to_v2 ( struct btree * b , struct bkey_i_btree_ptr_v2 * dst )
{
switch ( b - > key . k . type ) {
case KEY_TYPE_btree_ptr : {
struct bkey_i_btree_ptr * src = bkey_i_to_btree_ptr ( & b - > key ) ;
dst - > k . p = src - > k . p ;
dst - > v . mem_ptr = 0 ;
dst - > v . seq = b - > data - > keys . seq ;
dst - > v . sectors_written = 0 ;
dst - > v . flags = 0 ;
dst - > v . min_key = b - > data - > min_key ;
set_bkey_val_bytes ( & dst - > k , sizeof ( dst - > v ) + bkey_val_bytes ( & src - > k ) ) ;
memcpy ( dst - > v . start , src - > v . start , bkey_val_bytes ( & src - > k ) ) ;
break ;
}
case KEY_TYPE_btree_ptr_v2 :
bkey_copy ( & dst - > k_i , & b - > key ) ;
break ;
default :
BUG ( ) ;
}
}
2022-08-21 21:29:43 +03:00
static void bch2_btree_node_update_key_early ( struct btree_trans * trans ,
2022-01-05 02:35:00 +03:00
enum btree_id btree , unsigned level ,
struct bkey_s_c old , struct bkey_i * new )
{
2022-08-21 21:29:43 +03:00
struct bch_fs * c = trans - > c ;
2022-01-05 02:35:00 +03:00
struct btree * b ;
struct bkey_buf tmp ;
int ret ;
bch2_bkey_buf_init ( & tmp ) ;
bch2_bkey_buf_reassemble ( & tmp , c , old ) ;
2022-08-21 21:29:43 +03:00
b = bch2_btree_node_get_noiter ( trans , tmp . k , btree , level , true ) ;
2022-01-05 02:35:00 +03:00
if ( ! IS_ERR_OR_NULL ( b ) ) {
mutex_lock ( & c - > btree_cache . lock ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
bkey_copy ( & b - > key , new ) ;
ret = __bch2_btree_node_hash_insert ( & c - > btree_cache , b ) ;
BUG_ON ( ret ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
six_unlock_read ( & b - > c . lock ) ;
}
bch2_bkey_buf_exit ( & tmp , c ) ;
}
2021-04-24 23:32:35 +03:00
static int set_node_min ( struct bch_fs * c , struct btree * b , struct bpos new_min )
{
struct bkey_i_btree_ptr_v2 * new ;
int ret ;
new = kmalloc ( BKEY_BTREE_PTR_U64s_MAX * sizeof ( u64 ) , GFP_KERNEL ) ;
if ( ! new )
return - ENOMEM ;
btree_ptr_to_v2 ( b , new ) ;
b - > data - > min_key = new_min ;
new - > v . min_key = new_min ;
SET_BTREE_PTR_RANGE_UPDATED ( & new - > v , true ) ;
2021-12-24 05:35:28 +03:00
ret = bch2_journal_key_insert_take ( c , b - > c . btree_id , b - > c . level + 1 , & new - > k_i ) ;
2021-04-24 23:32:35 +03:00
if ( ret ) {
kfree ( new ) ;
return ret ;
2020-03-29 23:48:53 +03:00
}
2017-03-17 09:18:50 +03:00
2021-04-24 23:32:35 +03:00
bch2_btree_node_drop_keys_outside_node ( b ) ;
2022-04-08 00:41:02 +03:00
bkey_copy ( & b - > key , & new - > k_i ) ;
2021-04-24 23:32:35 +03:00
return 0 ;
}
static int set_node_max ( struct bch_fs * c , struct btree * b , struct bpos new_max )
{
struct bkey_i_btree_ptr_v2 * new ;
int ret ;
ret = bch2_journal_key_delete ( c , b - > c . btree_id , b - > c . level + 1 , b - > key . k . p ) ;
if ( ret )
return ret ;
new = kmalloc ( BKEY_BTREE_PTR_U64s_MAX * sizeof ( u64 ) , GFP_KERNEL ) ;
if ( ! new )
return - ENOMEM ;
btree_ptr_to_v2 ( b , new ) ;
b - > data - > max_key = new_max ;
new - > k . p = new_max ;
SET_BTREE_PTR_RANGE_UPDATED ( & new - > v , true ) ;
2021-12-24 05:35:28 +03:00
ret = bch2_journal_key_insert_take ( c , b - > c . btree_id , b - > c . level + 1 , & new - > k_i ) ;
2021-04-24 23:32:35 +03:00
if ( ret ) {
kfree ( new ) ;
return ret ;
}
bch2_btree_node_drop_keys_outside_node ( b ) ;
mutex_lock ( & c - > btree_cache . lock ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
bkey_copy ( & b - > key , & new - > k_i ) ;
ret = __bch2_btree_node_hash_insert ( & c - > btree_cache , b ) ;
BUG_ON ( ret ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
return 0 ;
}
2021-06-07 20:28:50 +03:00
static int btree_repair_node_boundaries ( struct bch_fs * c , struct btree * b ,
struct btree * prev , struct btree * cur )
2021-04-24 23:32:35 +03:00
{
struct bpos expected_start = ! prev
? b - > data - > min_key
: bpos_successor ( prev - > key . k . p ) ;
2022-02-25 21:18:19 +03:00
struct printbuf buf1 = PRINTBUF , buf2 = PRINTBUF ;
2021-04-24 23:32:35 +03:00
int ret = 0 ;
if ( ! prev ) {
2023-02-04 05:01:40 +03:00
prt_printf ( & buf1 , " start of node: " ) ;
2022-02-25 21:18:19 +03:00
bch2_bpos_to_text ( & buf1 , b - > data - > min_key ) ;
2021-04-24 23:32:35 +03:00
} else {
2022-02-25 21:18:19 +03:00
bch2_bkey_val_to_text ( & buf1 , c , bkey_i_to_s_c ( & prev - > key ) ) ;
2021-04-24 23:32:35 +03:00
}
2022-02-25 21:18:19 +03:00
bch2_bkey_val_to_text ( & buf2 , c , bkey_i_to_s_c ( & cur - > key ) ) ;
2021-06-07 20:28:50 +03:00
if ( prev & &
bpos_cmp ( expected_start , cur - > data - > min_key ) > 0 & &
BTREE_NODE_SEQ ( cur - > data ) > BTREE_NODE_SEQ ( prev - > data ) ) {
/* cur overwrites prev: */
if ( mustfix_fsck_err_on ( bpos_cmp ( prev - > data - > min_key ,
cur - > data - > min_key ) > = 0 , c ,
" btree node overwritten by next node at btree %s level %u: \n "
" node %s \n "
" next %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) ) {
ret = DROP_PREV_NODE ;
goto out ;
}
2021-06-07 20:28:50 +03:00
if ( mustfix_fsck_err_on ( bpos_cmp ( prev - > key . k . p ,
bpos_predecessor ( cur - > data - > min_key ) ) , c ,
" btree node with incorrect max_key at btree %s level %u: \n "
" node %s \n "
" next %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) )
2021-04-24 23:32:35 +03:00
ret = set_node_max ( c , prev ,
2021-06-07 20:28:50 +03:00
bpos_predecessor ( cur - > data - > min_key ) ) ;
} else {
/* prev overwrites cur: */
if ( mustfix_fsck_err_on ( bpos_cmp ( expected_start ,
cur - > data - > max_key ) > = 0 , c ,
" btree node overwritten by prev node at btree %s level %u: \n "
" prev %s \n "
" node %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) ) {
ret = DROP_THIS_NODE ;
goto out ;
}
2021-06-07 20:28:50 +03:00
if ( mustfix_fsck_err_on ( bpos_cmp ( expected_start , cur - > data - > min_key ) , c ,
" btree node with incorrect min_key at btree %s level %u: \n "
" prev %s \n "
" node %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) )
2021-06-07 20:28:50 +03:00
ret = set_node_min ( c , cur , expected_start ) ;
2021-04-24 23:32:35 +03:00
}
2022-02-25 21:18:19 +03:00
out :
2021-04-24 23:32:35 +03:00
fsck_err :
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf2 ) ;
printbuf_exit ( & buf1 ) ;
2021-04-24 23:32:35 +03:00
return ret ;
}
static int btree_repair_node_end ( struct bch_fs * c , struct btree * b ,
struct btree * child )
{
2022-02-25 21:18:19 +03:00
struct printbuf buf1 = PRINTBUF , buf2 = PRINTBUF ;
2021-04-24 23:32:35 +03:00
int ret = 0 ;
2022-02-25 21:18:19 +03:00
bch2_bkey_val_to_text ( & buf1 , c , bkey_i_to_s_c ( & child - > key ) ) ;
bch2_bpos_to_text ( & buf2 , b - > key . k . p ) ;
2021-04-24 23:32:35 +03:00
if ( mustfix_fsck_err_on ( bpos_cmp ( child - > key . k . p , b - > key . k . p ) , c ,
2021-01-27 04:59:00 +03:00
" btree node with incorrect max_key at btree %s level %u: \n "
" %s \n "
" expected %s " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ,
2022-02-25 21:18:19 +03:00
buf1 . buf , buf2 . buf ) ) {
2021-04-24 23:32:35 +03:00
ret = set_node_max ( c , child , b - > key . k . p ) ;
if ( ret )
2022-02-25 21:18:19 +03:00
goto err ;
2021-04-24 23:32:35 +03:00
}
2022-02-25 21:18:19 +03:00
err :
2021-04-24 23:32:35 +03:00
fsck_err :
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf2 ) ;
printbuf_exit ( & buf1 ) ;
2021-04-24 23:32:35 +03:00
return ret ;
}
2021-01-27 00:04:38 +03:00
2022-08-21 21:29:43 +03:00
static int bch2_btree_repair_topology_recurse ( struct btree_trans * trans , struct btree * b )
2021-04-24 23:32:35 +03:00
{
2022-08-21 21:29:43 +03:00
struct bch_fs * c = trans - > c ;
2021-04-24 23:32:35 +03:00
struct btree_and_journal_iter iter ;
struct bkey_s_c k ;
2021-06-07 20:28:50 +03:00
struct bkey_buf prev_k , cur_k ;
2021-04-24 23:32:35 +03:00
struct btree * prev = NULL , * cur = NULL ;
bool have_child , dropped_children = false ;
2022-04-08 00:41:02 +03:00
struct printbuf buf = PRINTBUF ;
2021-04-24 23:32:35 +03:00
int ret = 0 ;
if ( ! b - > c . level )
return 0 ;
again :
2021-06-07 20:28:50 +03:00
prev = NULL ;
2021-04-24 23:32:35 +03:00
have_child = dropped_children = false ;
2021-06-07 20:28:50 +03:00
bch2_bkey_buf_init ( & prev_k ) ;
bch2_bkey_buf_init ( & cur_k ) ;
2021-04-24 23:32:35 +03:00
bch2_btree_and_journal_iter_init_node_iter ( & iter , c , b ) ;
while ( ( k = bch2_btree_and_journal_iter_peek ( & iter ) ) . k ) {
BUG_ON ( bpos_cmp ( k . k - > p , b - > data - > min_key ) < 0 ) ;
BUG_ON ( bpos_cmp ( k . k - > p , b - > data - > max_key ) > 0 ) ;
2021-01-27 04:59:00 +03:00
2021-04-24 23:32:35 +03:00
bch2_btree_and_journal_iter_advance ( & iter ) ;
2021-06-07 20:28:50 +03:00
bch2_bkey_buf_reassemble ( & cur_k , c , k ) ;
2021-04-24 23:32:35 +03:00
2022-08-21 21:29:43 +03:00
cur = bch2_btree_node_get_noiter ( trans , cur_k . k ,
2021-04-24 23:32:35 +03:00
b - > c . btree_id , b - > c . level - 1 ,
false ) ;
ret = PTR_ERR_OR_ZERO ( cur ) ;
2022-02-25 21:18:19 +03:00
printbuf_reset ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( cur_k . k ) ) ;
2021-04-24 23:32:35 +03:00
if ( mustfix_fsck_err_on ( ret = = - EIO , c ,
2022-04-08 00:41:02 +03:00
" Topology repair: unreadable btree node at btree %s level %u: \n "
2021-04-24 23:32:35 +03:00
" %s " ,
bch2_btree_ids [ b - > c . btree_id ] ,
b - > c . level - 1 ,
2022-02-25 21:18:19 +03:00
buf . buf ) ) {
2022-08-21 21:29:43 +03:00
bch2_btree_node_evict ( trans , cur_k . k ) ;
2021-01-27 04:59:00 +03:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
2021-06-07 20:28:50 +03:00
b - > c . level , cur_k . k - > k . p ) ;
2022-06-23 04:33:43 +03:00
cur = NULL ;
2021-01-27 04:59:00 +03:00
if ( ret )
2021-06-07 20:28:50 +03:00
break ;
2021-04-24 23:32:35 +03:00
continue ;
2021-01-27 04:59:00 +03:00
}
2021-04-24 23:32:35 +03:00
if ( ret ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " %s: error getting btree node: %s " ,
__func__ , bch2_err_str ( ret ) ) ;
2021-04-24 23:32:35 +03:00
break ;
2021-02-23 23:16:41 +03:00
}
2021-01-27 04:59:00 +03:00
2021-06-07 20:28:50 +03:00
ret = btree_repair_node_boundaries ( c , b , prev , cur ) ;
if ( ret = = DROP_THIS_NODE ) {
six_unlock_read ( & cur - > c . lock ) ;
2022-08-21 21:29:43 +03:00
bch2_btree_node_evict ( trans , cur_k . k ) ;
2021-06-07 20:28:50 +03:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
b - > c . level , cur_k . k - > k . p ) ;
2022-06-23 04:33:43 +03:00
cur = NULL ;
2021-06-07 20:28:50 +03:00
if ( ret )
break ;
continue ;
}
2021-04-24 23:32:35 +03:00
if ( prev )
six_unlock_read ( & prev - > c . lock ) ;
2021-06-07 20:28:50 +03:00
prev = NULL ;
2021-04-24 23:32:35 +03:00
2021-06-07 20:28:50 +03:00
if ( ret = = DROP_PREV_NODE ) {
2022-08-21 21:29:43 +03:00
bch2_btree_node_evict ( trans , prev_k . k ) ;
2021-06-07 20:28:50 +03:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
b - > c . level , prev_k . k - > k . p ) ;
if ( ret )
break ;
bch2_btree_and_journal_iter_exit ( & iter ) ;
bch2_bkey_buf_exit ( & prev_k , c ) ;
bch2_bkey_buf_exit ( & cur_k , c ) ;
goto again ;
} else if ( ret )
2021-04-24 23:32:35 +03:00
break ;
2021-06-07 20:28:50 +03:00
prev = cur ;
cur = NULL ;
bch2_bkey_buf_copy ( & prev_k , c , cur_k . k ) ;
2021-04-24 23:32:35 +03:00
}
if ( ! ret & & ! IS_ERR_OR_NULL ( prev ) ) {
BUG_ON ( cur ) ;
ret = btree_repair_node_end ( c , b , prev ) ;
}
if ( ! IS_ERR_OR_NULL ( prev ) )
six_unlock_read ( & prev - > c . lock ) ;
prev = NULL ;
if ( ! IS_ERR_OR_NULL ( cur ) )
six_unlock_read ( & cur - > c . lock ) ;
cur = NULL ;
2021-01-27 04:59:00 +03:00
2021-04-24 23:32:35 +03:00
if ( ret )
goto err ;
bch2_btree_and_journal_iter_exit ( & iter ) ;
bch2_btree_and_journal_iter_init_node_iter ( & iter , c , b ) ;
while ( ( k = bch2_btree_and_journal_iter_peek ( & iter ) ) . k ) {
2021-06-07 20:28:50 +03:00
bch2_bkey_buf_reassemble ( & cur_k , c , k ) ;
2021-04-24 23:32:35 +03:00
bch2_btree_and_journal_iter_advance ( & iter ) ;
2021-01-27 04:59:00 +03:00
2022-08-21 21:29:43 +03:00
cur = bch2_btree_node_get_noiter ( trans , cur_k . k ,
2021-04-24 23:32:35 +03:00
b - > c . btree_id , b - > c . level - 1 ,
false ) ;
ret = PTR_ERR_OR_ZERO ( cur ) ;
2021-01-27 04:59:00 +03:00
if ( ret ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " %s: error getting btree node: %s " ,
__func__ , bch2_err_str ( ret ) ) ;
2021-04-24 23:32:35 +03:00
goto err ;
2021-01-27 04:59:00 +03:00
}
2022-08-21 21:29:43 +03:00
ret = bch2_btree_repair_topology_recurse ( trans , cur ) ;
2021-04-24 23:32:35 +03:00
six_unlock_read ( & cur - > c . lock ) ;
cur = NULL ;
if ( ret = = DROP_THIS_NODE ) {
2022-08-21 21:29:43 +03:00
bch2_btree_node_evict ( trans , cur_k . k ) ;
2021-04-24 23:32:35 +03:00
ret = bch2_journal_key_delete ( c , b - > c . btree_id ,
2021-06-07 20:28:50 +03:00
b - > c . level , cur_k . k - > k . p ) ;
2021-04-24 23:32:35 +03:00
dropped_children = true ;
2021-01-27 04:59:00 +03:00
}
2021-04-24 23:32:35 +03:00
if ( ret )
goto err ;
have_child = true ;
2021-01-27 04:59:00 +03:00
}
2021-04-24 23:32:35 +03:00
2022-02-25 21:18:19 +03:00
printbuf_reset ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
2021-04-24 23:32:35 +03:00
if ( mustfix_fsck_err_on ( ! have_child , c ,
" empty interior btree node at btree %s level %u \n "
" %s " ,
bch2_btree_ids [ b - > c . btree_id ] ,
2022-02-25 21:18:19 +03:00
b - > c . level , buf . buf ) )
2021-04-24 23:32:35 +03:00
ret = DROP_THIS_NODE ;
err :
2020-03-29 23:48:53 +03:00
fsck_err :
2021-04-24 23:32:35 +03:00
if ( ! IS_ERR_OR_NULL ( prev ) )
six_unlock_read ( & prev - > c . lock ) ;
if ( ! IS_ERR_OR_NULL ( cur ) )
six_unlock_read ( & cur - > c . lock ) ;
bch2_btree_and_journal_iter_exit ( & iter ) ;
2021-06-07 20:28:50 +03:00
bch2_bkey_buf_exit ( & prev_k , c ) ;
bch2_bkey_buf_exit ( & cur_k , c ) ;
2021-04-24 23:32:35 +03:00
if ( ! ret & & dropped_children )
goto again ;
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf ) ;
2021-04-24 23:32:35 +03:00
return ret ;
}
static int bch2_repair_topology ( struct bch_fs * c )
{
2022-08-21 21:29:43 +03:00
struct btree_trans trans ;
2021-04-24 23:32:35 +03:00
struct btree * b ;
unsigned i ;
int ret = 0 ;
2022-08-21 21:29:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-04-24 23:32:35 +03:00
for ( i = 0 ; i < BTREE_ID_NR & & ! ret ; i + + ) {
b = c - > btree_roots [ i ] . b ;
if ( btree_node_fake ( b ) )
continue ;
2022-08-21 21:29:43 +03:00
btree_node_lock_nopath_nofail ( & trans , & b - > c , SIX_LOCK_read ) ;
ret = bch2_btree_repair_topology_recurse ( & trans , b ) ;
2021-04-24 23:32:35 +03:00
six_unlock_read ( & b - > c . lock ) ;
if ( ret = = DROP_THIS_NODE ) {
bch_err ( c , " empty btree root - repair unimplemented " ) ;
2022-07-20 00:20:18 +03:00
ret = - BCH_ERR_fsck_repair_unimplemented ;
2021-04-24 23:32:35 +03:00
}
}
2022-08-21 21:29:43 +03:00
bch2_trans_exit ( & trans ) ;
2020-03-29 23:48:53 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2022-08-21 21:29:43 +03:00
static int bch2_check_fix_ptrs ( struct btree_trans * trans , enum btree_id btree_id ,
2021-01-28 03:08:54 +03:00
unsigned level , bool is_root ,
struct bkey_s_c * k )
{
2022-08-21 21:29:43 +03:00
struct bch_fs * c = trans - > c ;
2021-01-28 03:08:54 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( * k ) ;
2021-02-17 21:37:22 +03:00
const union bch_extent_entry * entry ;
2021-03-25 05:11:22 +03:00
struct extent_ptr_decoded p = { 0 } ;
2021-01-28 03:08:54 +03:00
bool do_update = false ;
2022-02-25 21:18:19 +03:00
struct printbuf buf = PRINTBUF ;
2021-01-28 03:08:54 +03:00
int ret = 0 ;
2021-11-21 06:59:25 +03:00
/*
* XXX
* use check_bucket_ref here
*/
2021-02-17 21:37:22 +03:00
bkey_for_each_ptr_decode ( k - > k , ptrs , p , entry ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
2021-12-26 06:37:19 +03:00
struct bucket * g = PTR_GC_BUCKET ( ca , & p . ptr ) ;
2021-05-15 04:28:37 +03:00
enum bch_data_type data_type = bch2_bkey_ptr_data_type ( * k , & entry - > ptr ) ;
2022-07-12 04:06:52 +03:00
if ( c - > opts . reconstruct_alloc | |
fsck_err_on ( ! g - > gen_valid , c ,
2021-05-15 04:28:37 +03:00
" bucket %u:%zu data type %s ptr gen %u missing in alloc btree \n "
" while marking %s " ,
2021-02-17 21:37:22 +03:00
p . ptr . dev , PTR_BUCKET_NR ( ca , & p . ptr ) ,
bch2_data_types [ ptr_data_type ( k - > k , & p . ptr ) ] ,
2021-05-15 04:28:37 +03:00
p . ptr . gen ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) ) {
2021-02-17 21:37:22 +03:00
if ( ! p . ptr . cached ) {
2021-12-26 04:39:19 +03:00
g - > gen_valid = true ;
2022-02-14 08:07:38 +03:00
g - > gen = p . ptr . gen ;
2021-01-28 03:08:54 +03:00
} else {
do_update = true ;
}
}
2022-02-14 08:07:38 +03:00
if ( fsck_err_on ( gen_cmp ( p . ptr . gen , g - > gen ) > 0 , c ,
2021-05-15 04:28:37 +03:00
" bucket %u:%zu data type %s ptr gen in the future: %u > %u \n "
" while marking %s " ,
2021-02-17 21:37:22 +03:00
p . ptr . dev , PTR_BUCKET_NR ( ca , & p . ptr ) ,
bch2_data_types [ ptr_data_type ( k - > k , & p . ptr ) ] ,
2022-02-14 08:07:38 +03:00
p . ptr . gen , g - > gen ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) ) {
2021-02-17 21:37:22 +03:00
if ( ! p . ptr . cached ) {
2021-12-26 04:39:19 +03:00
g - > gen_valid = true ;
2022-02-14 08:07:38 +03:00
g - > gen = p . ptr . gen ;
g - > data_type = 0 ;
g - > dirty_sectors = 0 ;
g - > cached_sectors = 0 ;
2021-01-28 03:08:54 +03:00
set_bit ( BCH_FS_NEED_ANOTHER_GC , & c - > flags ) ;
} else {
do_update = true ;
}
}
2022-02-14 08:07:38 +03:00
if ( fsck_err_on ( gen_cmp ( g - > gen , p . ptr . gen ) > BUCKET_GC_GEN_MAX , c ,
2021-11-21 06:59:25 +03:00
" bucket %u:%zu gen %u data type %s: ptr gen %u too stale \n "
" while marking %s " ,
2022-02-14 08:07:38 +03:00
p . ptr . dev , PTR_BUCKET_NR ( ca , & p . ptr ) , g - > gen ,
2021-11-21 06:59:25 +03:00
bch2_data_types [ ptr_data_type ( k - > k , & p . ptr ) ] ,
p . ptr . gen ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) )
2021-11-21 06:59:25 +03:00
do_update = true ;
2021-02-17 21:37:22 +03:00
if ( fsck_err_on ( ! p . ptr . cached & &
2022-02-14 08:07:38 +03:00
gen_cmp ( p . ptr . gen , g - > gen ) < 0 , c ,
2021-05-15 04:28:37 +03:00
" bucket %u:%zu data type %s stale dirty ptr: %u < %u \n "
" while marking %s " ,
2021-02-17 21:37:22 +03:00
p . ptr . dev , PTR_BUCKET_NR ( ca , & p . ptr ) ,
bch2_data_types [ ptr_data_type ( k - > k , & p . ptr ) ] ,
2022-02-14 08:07:38 +03:00
p . ptr . gen , g - > gen ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) )
2021-01-28 03:08:54 +03:00
do_update = true ;
2021-02-17 21:37:22 +03:00
2022-02-14 08:07:38 +03:00
if ( data_type ! = BCH_DATA_btree & & p . ptr . gen ! = g - > gen )
2021-10-01 00:51:18 +03:00
continue ;
2022-02-14 08:07:38 +03:00
if ( fsck_err_on ( g - > data_type & &
g - > data_type ! = data_type , c ,
2021-10-01 00:51:18 +03:00
" bucket %u:%zu different types of data in same bucket: %s, %s \n "
" while marking %s " ,
p . ptr . dev , PTR_BUCKET_NR ( ca , & p . ptr ) ,
2022-02-14 08:07:38 +03:00
bch2_data_types [ g - > data_type ] ,
2021-10-01 00:51:18 +03:00
bch2_data_types [ data_type ] ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) ) {
2021-10-01 00:51:18 +03:00
if ( data_type = = BCH_DATA_btree ) {
2022-02-14 08:07:38 +03:00
g - > data_type = data_type ;
2022-01-02 07:16:15 +03:00
set_bit ( BCH_FS_NEED_ANOTHER_GC , & c - > flags ) ;
2021-10-01 00:51:18 +03:00
} else {
do_update = true ;
}
}
2021-02-17 21:37:22 +03:00
if ( p . has_ec ) {
2021-12-05 07:07:33 +03:00
struct gc_stripe * m = genradix_ptr ( & c - > gc_stripes , p . ec . idx ) ;
2021-02-17 21:37:22 +03:00
if ( fsck_err_on ( ! m | | ! m - > alive , c ,
2021-05-15 04:28:37 +03:00
" pointer to nonexistent stripe %llu \n "
" while marking %s " ,
( u64 ) p . ec . idx ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) )
2021-02-17 21:37:22 +03:00
do_update = true ;
2021-03-13 00:55:28 +03:00
if ( fsck_err_on ( ! bch2_ptr_matches_stripe_m ( m , p ) , c ,
2021-05-15 04:28:37 +03:00
" pointer does not match stripe %llu \n "
" while marking %s " ,
( u64 ) p . ec . idx ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , * k ) , buf . buf ) ) )
2021-03-13 00:55:28 +03:00
do_update = true ;
2021-02-17 21:37:22 +03:00
}
2021-01-28 03:08:54 +03:00
}
if ( do_update ) {
2021-02-17 21:37:22 +03:00
struct bkey_ptrs ptrs ;
union bch_extent_entry * entry ;
2021-01-28 03:08:54 +03:00
struct bch_extent_ptr * ptr ;
struct bkey_i * new ;
if ( is_root ) {
bch_err ( c , " cannot update btree roots yet " ) ;
2022-02-25 21:18:19 +03:00
ret = - EINVAL ;
goto err ;
2021-01-28 03:08:54 +03:00
}
new = kmalloc ( bkey_bytes ( k - > k ) , GFP_KERNEL ) ;
2021-02-23 23:16:41 +03:00
if ( ! new ) {
bch_err ( c , " %s: error allocating new key " , __func__ ) ;
2022-02-25 21:18:19 +03:00
ret = - ENOMEM ;
goto err ;
2021-02-23 23:16:41 +03:00
}
2021-01-28 03:08:54 +03:00
bkey_reassemble ( new , * k ) ;
2021-04-17 01:59:54 +03:00
if ( level ) {
/*
* We don ' t want to drop btree node pointers - if the
* btree node isn ' t there anymore , the read path will
* sort it out :
*/
ptrs = bch2_bkey_ptrs ( bkey_i_to_s ( new ) ) ;
bkey_for_each_ptr ( ptrs , ptr ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2021-12-26 06:37:19 +03:00
struct bucket * g = PTR_GC_BUCKET ( ca , ptr ) ;
2021-04-17 01:59:54 +03:00
2022-02-14 08:07:38 +03:00
ptr - > gen = g - > gen ;
2021-04-17 01:59:54 +03:00
}
} else {
bch2_bkey_drop_ptrs ( bkey_i_to_s ( new ) , ptr , ( {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2021-12-26 06:37:19 +03:00
struct bucket * g = PTR_GC_BUCKET ( ca , ptr ) ;
2021-05-15 04:28:37 +03:00
enum bch_data_type data_type = bch2_bkey_ptr_data_type ( * k , ptr ) ;
2021-04-17 01:59:54 +03:00
( ptr - > cached & &
2022-02-14 08:07:38 +03:00
( ! g - > gen_valid | | gen_cmp ( ptr - > gen , g - > gen ) > 0 ) ) | |
2021-04-17 01:59:54 +03:00
( ! ptr - > cached & &
2022-02-14 08:07:38 +03:00
gen_cmp ( ptr - > gen , g - > gen ) < 0 ) | |
gen_cmp ( g - > gen , ptr - > gen ) > BUCKET_GC_GEN_MAX | |
( g - > data_type & &
g - > data_type ! = data_type ) ;
2021-04-17 01:59:54 +03:00
} ) ) ;
2021-02-17 21:37:22 +03:00
again :
2021-04-17 01:59:54 +03:00
ptrs = bch2_bkey_ptrs ( bkey_i_to_s ( new ) ) ;
bkey_extent_entry_for_each ( ptrs , entry ) {
if ( extent_entry_type ( entry ) = = BCH_EXTENT_ENTRY_stripe_ptr ) {
2021-12-05 07:07:33 +03:00
struct gc_stripe * m = genradix_ptr ( & c - > gc_stripes ,
2021-04-17 01:59:54 +03:00
entry - > stripe_ptr . idx ) ;
2021-03-13 00:55:28 +03:00
union bch_extent_entry * next_ptr ;
bkey_extent_entry_for_each_from ( ptrs , next_ptr , entry )
if ( extent_entry_type ( next_ptr ) = = BCH_EXTENT_ENTRY_ptr )
goto found ;
next_ptr = NULL ;
found :
if ( ! next_ptr ) {
bch_err ( c , " aieee, found stripe ptr with no data ptr " ) ;
continue ;
}
2021-04-17 01:59:54 +03:00
2021-03-13 00:55:28 +03:00
if ( ! m | | ! m - > alive | |
! __bch2_ptr_matches_stripe ( & m - > ptrs [ entry - > stripe_ptr . block ] ,
& next_ptr - > ptr ,
m - > sectors ) ) {
2021-04-17 01:59:54 +03:00
bch2_bkey_extent_entry_drop ( new , entry ) ;
goto again ;
}
2021-02-17 21:37:22 +03:00
}
}
}
2021-01-28 03:08:54 +03:00
2021-12-24 05:35:28 +03:00
ret = bch2_journal_key_insert_take ( c , btree_id , level , new ) ;
2022-01-05 02:35:00 +03:00
if ( ret ) {
2021-01-28 03:08:54 +03:00
kfree ( new ) ;
2022-02-25 21:18:19 +03:00
goto err ;
2022-01-02 02:27:50 +03:00
}
2022-01-05 02:35:00 +03:00
if ( level )
2022-08-21 21:29:43 +03:00
bch2_btree_node_update_key_early ( trans , btree_id , level - 1 , * k , new ) ;
2022-01-05 02:35:00 +03:00
2022-07-12 05:32:04 +03:00
if ( c - > opts . verbose ) {
printbuf_reset ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , * k ) ;
bch_info ( c , " updated %s " , buf . buf ) ;
printbuf_reset ( & buf ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( new ) ) ;
bch_info ( c , " new key %s " , buf . buf ) ;
}
2022-02-25 21:18:19 +03:00
2022-01-05 02:35:00 +03:00
* k = bkey_i_to_s_c ( new ) ;
2021-01-28 03:08:54 +03:00
}
2022-02-25 21:18:19 +03:00
err :
2021-01-28 03:08:54 +03:00
fsck_err :
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf ) ;
2021-01-28 03:08:54 +03:00
return ret ;
}
2018-10-21 17:56:11 +03:00
/* marking of btree keys/nodes: */
2021-10-30 01:43:18 +03:00
static int bch2_gc_mark_key ( struct btree_trans * trans , enum btree_id btree_id ,
2021-01-28 03:08:54 +03:00
unsigned level , bool is_root ,
2021-04-17 01:02:57 +03:00
struct bkey_s_c * k ,
2022-02-13 09:58:12 +03:00
bool initial )
2018-10-21 17:56:11 +03:00
{
2021-10-30 01:43:18 +03:00
struct bch_fs * c = trans - > c ;
2021-11-28 22:31:19 +03:00
struct bkey deleted = KEY ( 0 , 0 , 0 ) ;
struct bkey_s_c old = ( struct bkey_s_c ) { & deleted , NULL } ;
2018-11-01 22:21:48 +03:00
unsigned flags =
2020-01-01 00:17:42 +03:00
BTREE_TRIGGER_GC |
( initial ? BTREE_TRIGGER_NOATOMIC : 0 ) ;
2018-10-21 17:56:11 +03:00
int ret = 0 ;
2021-11-28 22:31:19 +03:00
deleted . p = k - > k - > p ;
2018-11-13 01:26:36 +03:00
if ( initial ) {
2020-11-03 02:20:44 +03:00
BUG_ON ( bch2_journal_seq_verify & &
2022-03-01 00:35:42 +03:00
k - > k - > version . lo > atomic64_read ( & c - > journal . seq ) ) ;
2018-11-13 01:26:36 +03:00
2022-08-21 21:29:43 +03:00
ret = bch2_check_fix_ptrs ( trans , btree_id , level , is_root , k ) ;
2021-04-23 23:18:43 +03:00
if ( ret )
goto err ;
2021-04-17 01:02:57 +03:00
if ( fsck_err_on ( k - > k - > version . lo > atomic64_read ( & c - > key_version ) , c ,
2020-02-19 04:02:41 +03:00
" key version number higher than recorded: %llu > %llu " ,
2021-04-17 01:02:57 +03:00
k - > k - > version . lo ,
2020-02-19 04:02:41 +03:00
atomic64_read ( & c - > key_version ) ) )
2021-04-17 01:02:57 +03:00
atomic64_set ( & c - > key_version , k - > k - > version . lo ) ;
2018-10-21 17:56:11 +03:00
}
2022-07-13 12:25:29 +03:00
ret = commit_do ( trans , NULL , NULL , 0 ,
2022-02-19 10:48:27 +03:00
bch2_mark_key ( trans , old , * k , flags ) ) ;
2018-11-13 01:26:36 +03:00
fsck_err :
2021-02-23 23:16:41 +03:00
err :
if ( ret )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error from %s(): %s " , __func__ , bch2_err_str ( ret ) ) ;
2018-10-21 17:56:11 +03:00
return ret ;
}
2022-02-13 09:58:12 +03:00
static int btree_gc_mark_node ( struct btree_trans * trans , struct btree * b , bool initial )
2017-03-17 09:18:50 +03:00
{
2021-10-30 01:43:18 +03:00
struct bch_fs * c = trans - > c ;
2017-03-17 09:18:50 +03:00
struct btree_node_iter iter ;
struct bkey unpacked ;
struct bkey_s_c k ;
2021-01-27 00:04:38 +03:00
struct bkey_buf prev , cur ;
2018-11-27 16:14:51 +03:00
int ret = 0 ;
2018-11-01 22:10:01 +03:00
if ( ! btree_node_type_needs_gc ( btree_node_type ( b ) ) )
2018-10-21 17:56:11 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
2020-03-29 23:48:53 +03:00
bch2_btree_node_iter_init_from_start ( & iter , b ) ;
2021-01-27 00:04:38 +03:00
bch2_bkey_buf_init ( & prev ) ;
bch2_bkey_buf_init ( & cur ) ;
bkey_init ( & prev . k - > k ) ;
2020-03-29 23:48:53 +03:00
while ( ( k = bch2_btree_node_iter_peek_unpack ( & iter , b , & unpacked ) ) . k ) {
2021-10-30 01:43:18 +03:00
ret = bch2_gc_mark_key ( trans , b - > c . btree_id , b - > c . level , false ,
2022-02-13 09:58:12 +03:00
& k , initial ) ;
2018-11-27 16:14:51 +03:00
if ( ret )
break ;
2020-03-29 23:48:53 +03:00
bch2_btree_node_iter_advance ( & iter , b ) ;
if ( b - > c . level ) {
2021-01-27 00:04:38 +03:00
bch2_bkey_buf_reassemble ( & cur , c , k ) ;
ret = bch2_gc_check_topology ( c , b , & prev , cur ,
2020-03-29 23:48:53 +03:00
bch2_btree_node_iter_end ( & iter ) ) ;
if ( ret )
break ;
}
2018-10-21 17:56:11 +03:00
}
2021-01-27 00:04:38 +03:00
bch2_bkey_buf_exit ( & cur , c ) ;
bch2_bkey_buf_exit ( & prev , c ) ;
2018-11-27 16:14:51 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2021-10-30 01:43:18 +03:00
static int bch2_gc_btree ( struct btree_trans * trans , enum btree_id btree_id ,
2021-04-16 23:54:11 +03:00
bool initial , bool metadata_only )
2017-03-17 09:18:50 +03:00
{
2021-10-30 01:43:18 +03:00
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct btree * b ;
2022-06-23 05:53:43 +03:00
unsigned depth = metadata_only ? 1 : 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
gc_pos_set ( c , gc_pos_btree ( btree_id , POS_MIN , 0 ) ) ;
2021-10-30 01:43:18 +03:00
__for_each_btree_node ( trans , iter , btree_id , POS_MIN ,
2021-10-19 21:20:50 +03:00
0 , depth , BTREE_ITER_PREFETCH , b , ret ) {
2017-03-17 09:18:50 +03:00
bch2_verify_btree_nr_keys ( b ) ;
2019-02-12 23:03:47 +03:00
gc_pos_set ( c , gc_pos_btree_node ( b ) ) ;
2022-02-13 09:58:12 +03:00
ret = btree_gc_mark_node ( trans , b , initial ) ;
2018-11-27 16:14:51 +03:00
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
}
2021-10-30 01:43:18 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-03-20 03:29:11 +03:00
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
mutex_lock ( & c - > btree_root_lock ) ;
b = c - > btree_roots [ btree_id ] . b ;
2021-04-17 01:02:57 +03:00
if ( ! btree_node_fake ( b ) ) {
struct bkey_s_c k = bkey_i_to_s_c ( & b - > key ) ;
2022-02-13 09:58:12 +03:00
ret = bch2_gc_mark_key ( trans , b - > c . btree_id , b - > c . level ,
true , & k , initial ) ;
2021-04-17 01:02:57 +03:00
}
2020-06-06 19:28:01 +03:00
gc_pos_set ( c , gc_pos_btree_root ( b - > c . btree_id ) ) ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_root_lock ) ;
2019-03-22 02:12:52 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2021-10-30 01:43:18 +03:00
static int bch2_gc_btree_init_recurse ( struct btree_trans * trans , struct btree * b ,
2020-03-29 23:48:53 +03:00
unsigned target_depth )
2020-03-16 06:29:43 +03:00
{
2021-10-30 01:43:18 +03:00
struct bch_fs * c = trans - > c ;
2020-03-16 06:29:43 +03:00
struct btree_and_journal_iter iter ;
struct bkey_s_c k ;
2021-01-27 00:04:38 +03:00
struct bkey_buf cur , prev ;
2022-02-25 21:18:19 +03:00
struct printbuf buf = PRINTBUF ;
2020-03-16 06:29:43 +03:00
int ret = 0 ;
2021-01-27 04:15:46 +03:00
bch2_btree_and_journal_iter_init_node_iter ( & iter , c , b ) ;
2021-01-27 00:04:38 +03:00
bch2_bkey_buf_init ( & prev ) ;
bch2_bkey_buf_init ( & cur ) ;
bkey_init ( & prev . k - > k ) ;
2020-03-16 06:29:43 +03:00
while ( ( k = bch2_btree_and_journal_iter_peek ( & iter ) ) . k ) {
2021-03-05 00:20:16 +03:00
BUG_ON ( bpos_cmp ( k . k - > p , b - > data - > min_key ) < 0 ) ;
BUG_ON ( bpos_cmp ( k . k - > p , b - > data - > max_key ) > 0 ) ;
2020-03-29 23:48:53 +03:00
2022-02-13 09:58:12 +03:00
ret = bch2_gc_mark_key ( trans , b - > c . btree_id , b - > c . level ,
false , & k , true ) ;
2021-02-23 23:16:41 +03:00
if ( ret ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " %s: error from bch2_gc_mark_key: %s " ,
__func__ , bch2_err_str ( ret ) ) ;
2021-04-24 23:32:35 +03:00
goto fsck_err ;
2021-02-23 23:16:41 +03:00
}
2020-03-16 06:29:43 +03:00
2020-03-29 23:48:53 +03:00
if ( b - > c . level ) {
2021-01-27 00:04:38 +03:00
bch2_bkey_buf_reassemble ( & cur , c , k ) ;
k = bkey_i_to_s_c ( cur . k ) ;
2020-03-29 23:48:53 +03:00
bch2_btree_and_journal_iter_advance ( & iter ) ;
2020-03-16 06:29:43 +03:00
2021-01-27 00:04:38 +03:00
ret = bch2_gc_check_topology ( c , b ,
& prev , cur ,
2020-03-29 23:48:53 +03:00
! bch2_btree_and_journal_iter_peek ( & iter ) . k ) ;
2020-03-16 06:29:43 +03:00
if ( ret )
2021-04-24 23:32:35 +03:00
goto fsck_err ;
2021-01-27 04:59:00 +03:00
} else {
bch2_btree_and_journal_iter_advance ( & iter ) ;
}
}
2020-03-16 06:29:43 +03:00
2021-01-27 04:59:00 +03:00
if ( b - > c . level > target_depth ) {
bch2_btree_and_journal_iter_exit ( & iter ) ;
bch2_btree_and_journal_iter_init_node_iter ( & iter , c , b ) ;
while ( ( k = bch2_btree_and_journal_iter_peek ( & iter ) ) . k ) {
struct btree * child ;
bch2_bkey_buf_reassemble ( & cur , c , k ) ;
bch2_btree_and_journal_iter_advance ( & iter ) ;
2020-03-16 06:29:43 +03:00
2022-08-21 21:29:43 +03:00
child = bch2_btree_node_get_noiter ( trans , cur . k ,
2021-01-27 04:59:00 +03:00
b - > c . btree_id , b - > c . level - 1 ,
false ) ;
ret = PTR_ERR_OR_ZERO ( child ) ;
2020-03-29 23:48:53 +03:00
2021-04-24 23:32:35 +03:00
if ( ret = = - EIO ) {
bch2_topology_error ( c ) ;
2021-06-21 23:28:43 +03:00
if ( __fsck_err ( c ,
FSCK_CAN_FIX |
FSCK_CAN_IGNORE |
FSCK_NO_RATELIMIT ,
" Unreadable btree node at btree %s level %u: \n "
" %s " ,
bch2_btree_ids [ b - > c . btree_id ] ,
b - > c . level - 1 ,
2022-02-25 21:18:19 +03:00
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( cur . k ) ) , buf . buf ) ) & &
2021-06-23 03:44:54 +03:00
! test_bit ( BCH_FS_TOPOLOGY_REPAIR_DONE , & c - > flags ) ) {
2022-07-20 00:20:18 +03:00
ret = - BCH_ERR_need_topology_repair ;
2021-04-24 23:32:35 +03:00
bch_info ( c , " Halting mark and sweep to start topology repair pass " ) ;
goto fsck_err ;
} else {
/* Continue marking when opted to not
* fix the error : */
ret = 0 ;
set_bit ( BCH_FS_INITIAL_GC_UNFIXED , & c - > flags ) ;
continue ;
}
} else if ( ret ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " %s: error getting btree node: %s " ,
__func__ , bch2_err_str ( ret ) ) ;
2021-01-27 04:59:00 +03:00
break ;
2021-02-23 23:16:41 +03:00
}
2021-01-27 04:59:00 +03:00
2021-10-30 01:43:18 +03:00
ret = bch2_gc_btree_init_recurse ( trans , child ,
2021-01-27 04:59:00 +03:00
target_depth ) ;
six_unlock_read ( & child - > c . lock ) ;
if ( ret )
break ;
2020-03-29 23:48:53 +03:00
}
2020-03-16 06:29:43 +03:00
}
2021-01-27 04:59:00 +03:00
fsck_err :
2021-01-27 00:04:38 +03:00
bch2_bkey_buf_exit ( & cur , c ) ;
bch2_bkey_buf_exit ( & prev , c ) ;
2021-01-27 04:15:46 +03:00
bch2_btree_and_journal_iter_exit ( & iter ) ;
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf ) ;
2020-03-16 06:29:43 +03:00
return ret ;
}
2021-10-30 01:43:18 +03:00
static int bch2_gc_btree_init ( struct btree_trans * trans ,
2021-04-16 23:54:11 +03:00
enum btree_id btree_id ,
bool metadata_only )
2020-03-16 06:29:43 +03:00
{
2021-10-30 01:43:18 +03:00
struct bch_fs * c = trans - > c ;
2020-03-16 06:29:43 +03:00
struct btree * b ;
2022-06-23 05:53:43 +03:00
unsigned target_depth = metadata_only ? 1 : 0 ;
2022-02-25 21:18:19 +03:00
struct printbuf buf = PRINTBUF ;
2020-03-16 06:29:43 +03:00
int ret = 0 ;
b = c - > btree_roots [ btree_id ] . b ;
if ( btree_node_fake ( b ) )
return 0 ;
six_lock_read ( & b - > c . lock , NULL , NULL ) ;
2022-02-25 21:18:19 +03:00
printbuf_reset ( & buf ) ;
bch2_bpos_to_text ( & buf , b - > data - > min_key ) ;
2021-04-24 23:32:35 +03:00
if ( mustfix_fsck_err_on ( bpos_cmp ( b - > data - > min_key , POS_MIN ) , c ,
2022-02-25 21:18:19 +03:00
" btree root with incorrect min_key: %s " , buf . buf ) ) {
2021-04-24 23:32:35 +03:00
bch_err ( c , " repair unimplemented " ) ;
2022-07-20 00:20:18 +03:00
ret = - BCH_ERR_fsck_repair_unimplemented ;
2021-04-24 23:32:35 +03:00
goto fsck_err ;
2020-03-29 23:48:53 +03:00
}
2022-02-25 21:18:19 +03:00
printbuf_reset ( & buf ) ;
bch2_bpos_to_text ( & buf , b - > data - > max_key ) ;
2021-07-06 05:02:07 +03:00
if ( mustfix_fsck_err_on ( bpos_cmp ( b - > data - > max_key , SPOS_MAX ) , c ,
2022-02-25 21:18:19 +03:00
" btree root with incorrect max_key: %s " , buf . buf ) ) {
2021-04-24 23:32:35 +03:00
bch_err ( c , " repair unimplemented " ) ;
2022-07-20 00:20:18 +03:00
ret = - BCH_ERR_fsck_repair_unimplemented ;
2021-04-24 23:32:35 +03:00
goto fsck_err ;
2020-03-29 23:48:53 +03:00
}
2020-03-16 06:29:43 +03:00
if ( b - > c . level > = target_depth )
2021-10-30 01:43:18 +03:00
ret = bch2_gc_btree_init_recurse ( trans , b , target_depth ) ;
2020-03-16 06:29:43 +03:00
2021-04-17 01:02:57 +03:00
if ( ! ret ) {
struct bkey_s_c k = bkey_i_to_s_c ( & b - > key ) ;
2021-10-30 01:43:18 +03:00
ret = bch2_gc_mark_key ( trans , b - > c . btree_id , b - > c . level , true ,
2022-02-13 09:58:12 +03:00
& k , true ) ;
2021-04-17 01:02:57 +03:00
}
2020-03-29 23:48:53 +03:00
fsck_err :
2020-03-16 06:29:43 +03:00
six_unlock_read ( & b - > c . lock ) ;
2021-04-24 23:32:35 +03:00
if ( ret < 0 )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error from %s(): %s " , __func__ , bch2_err_str ( ret ) ) ;
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf ) ;
2020-03-16 06:29:43 +03:00
return ret ;
}
2018-11-01 22:13:19 +03:00
static inline int btree_id_gc_phase_cmp ( enum btree_id l , enum btree_id r )
{
return ( int ) btree_id_to_gc_phase ( l ) -
( int ) btree_id_to_gc_phase ( r ) ;
}
2021-04-16 23:54:11 +03:00
static int bch2_gc_btrees ( struct bch_fs * c , bool initial , bool metadata_only )
2018-10-21 17:56:11 +03:00
{
2021-10-30 01:43:18 +03:00
struct btree_trans trans ;
2018-11-01 22:13:19 +03:00
enum btree_id ids [ BTREE_ID_NR ] ;
2018-10-21 17:56:11 +03:00
unsigned i ;
2021-04-24 23:32:35 +03:00
int ret = 0 ;
2018-10-21 17:56:11 +03:00
2021-10-30 01:43:18 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2018-11-01 22:13:19 +03:00
for ( i = 0 ; i < BTREE_ID_NR ; i + + )
ids [ i ] = i ;
bubble_sort ( ids , BTREE_ID_NR , btree_id_gc_phase_cmp ) ;
2021-04-24 23:32:35 +03:00
for ( i = 0 ; i < BTREE_ID_NR & & ! ret ; i + + )
ret = initial
2021-10-30 01:43:18 +03:00
? bch2_gc_btree_init ( & trans , ids [ i ] , metadata_only )
: bch2_gc_btree ( & trans , ids [ i ] , initial , metadata_only ) ;
2018-10-21 17:56:11 +03:00
2021-04-24 23:32:35 +03:00
if ( ret < 0 )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error from %s(): %s " , __func__ , bch2_err_str ( ret ) ) ;
2021-10-30 01:43:18 +03:00
bch2_trans_exit ( & trans ) ;
2021-04-24 23:32:35 +03:00
return ret ;
2018-10-21 17:56:11 +03:00
}
2017-03-17 09:18:50 +03:00
static void mark_metadata_sectors ( struct bch_fs * c , struct bch_dev * ca ,
u64 start , u64 end ,
enum bch_data_type type ,
unsigned flags )
{
u64 b = sector_to_bucket ( ca , start ) ;
do {
unsigned sectors =
min_t ( u64 , bucket_to_sector ( ca , b + 1 ) , end ) - start ;
bch2_mark_metadata_bucket ( c , ca , b , type , sectors ,
gc_phase ( GC_PHASE_SB ) , flags ) ;
b + + ;
start + = sectors ;
} while ( start < end ) ;
}
2021-12-24 12:27:01 +03:00
static void bch2_mark_dev_superblock ( struct bch_fs * c , struct bch_dev * ca ,
unsigned flags )
2017-03-17 09:18:50 +03:00
{
struct bch_sb_layout * layout = & ca - > disk_sb . sb - > layout ;
unsigned i ;
u64 b ;
for ( i = 0 ; i < layout - > nr_superblocks ; i + + ) {
u64 offset = le64_to_cpu ( layout - > sb_offset [ i ] ) ;
if ( offset = = BCH_SB_SECTOR )
mark_metadata_sectors ( c , ca , 0 , BCH_SB_SECTOR ,
2020-07-10 01:28:11 +03:00
BCH_DATA_sb , flags ) ;
2017-03-17 09:18:50 +03:00
mark_metadata_sectors ( c , ca , offset ,
offset + ( 1 < < layout - > sb_max_size_bits ) ,
2020-07-10 01:28:11 +03:00
BCH_DATA_sb , flags ) ;
2017-03-17 09:18:50 +03:00
}
for ( i = 0 ; i < ca - > journal . nr ; i + + ) {
b = ca - > journal . buckets [ i ] ;
2020-07-10 01:28:11 +03:00
bch2_mark_metadata_bucket ( c , ca , b , BCH_DATA_journal ,
2017-03-17 09:18:50 +03:00
ca - > mi . bucket_size ,
gc_phase ( GC_PHASE_SB ) , flags ) ;
}
}
static void bch2_mark_superblocks ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
mutex_lock ( & c - > sb_lock ) ;
gc_pos_set ( c , gc_phase ( GC_PHASE_SB ) ) ;
for_each_online_member ( ca , c , i )
2020-01-01 00:17:42 +03:00
bch2_mark_dev_superblock ( c , ca , BTREE_TRIGGER_GC ) ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > sb_lock ) ;
}
2020-05-25 21:57:06 +03:00
#if 0
2017-03-17 09:18:50 +03:00
/* Also see bch2_pending_btree_node_free_insert_done() */
static void bch2_mark_pending_btree_node_frees ( struct bch_fs * c )
{
struct btree_update * as ;
struct pending_btree_node_free * d ;
mutex_lock ( & c - > btree_interior_update_lock ) ;
gc_pos_set ( c , gc_phase ( GC_PHASE_PENDING_DELETE ) ) ;
for_each_pending_btree_node_free ( c , as , d )
if ( d - > index_update_done )
2021-10-30 01:58:50 +03:00
bch2_mark_key ( c , bkey_i_to_s_c ( & d - > key ) , BTREE_TRIGGER_GC ) ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
}
2020-05-25 21:57:06 +03:00
# endif
2017-03-17 09:18:50 +03:00
2018-07-23 12:32:01 +03:00
static void bch2_gc_free ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
2021-12-05 07:07:33 +03:00
genradix_free ( & c - > reflink_gc_table ) ;
genradix_free ( & c - > gc_stripes ) ;
2018-11-25 01:09:44 +03:00
2018-07-23 12:32:01 +03:00
for_each_member_device ( ca , c , i ) {
2022-02-11 03:26:55 +03:00
kvpfree ( rcu_dereference_protected ( ca - > buckets_gc , 1 ) ,
2018-07-23 12:32:01 +03:00
sizeof ( struct bucket_array ) +
ca - > mi . nbuckets * sizeof ( struct bucket ) ) ;
2022-02-11 03:26:55 +03:00
ca - > buckets_gc = NULL ;
2018-07-23 12:32:01 +03:00
2021-01-22 05:52:06 +03:00
free_percpu ( ca - > usage_gc ) ;
ca - > usage_gc = NULL ;
2018-07-23 12:32:01 +03:00
}
2019-02-11 03:34:47 +03:00
free_percpu ( c - > usage_gc ) ;
c - > usage_gc = NULL ;
2018-12-01 19:32:12 +03:00
}
2019-03-30 05:22:45 +03:00
static int bch2_gc_done ( struct bch_fs * c ,
2021-04-16 23:54:11 +03:00
bool initial , bool metadata_only )
2018-07-23 12:32:01 +03:00
{
2021-05-08 03:43:43 +03:00
struct bch_dev * ca = NULL ;
2022-02-25 21:18:19 +03:00
struct printbuf buf = PRINTBUF ;
2022-07-12 04:06:52 +03:00
bool verify = ! metadata_only & &
! c - > opts . reconstruct_alloc & &
( ! initial | | ( c - > sb . compat & ( 1ULL < < BCH_COMPAT_alloc_info ) ) ) ;
2021-01-22 05:52:06 +03:00
unsigned i , dev ;
2019-03-28 10:13:27 +03:00
int ret = 0 ;
2018-07-23 12:32:01 +03:00
2021-12-26 04:39:19 +03:00
percpu_down_write ( & c - > mark_lock ) ;
2018-07-23 12:32:01 +03:00
# define copy_field(_f, _msg, ...) \
2022-07-12 04:06:52 +03:00
if ( dst - > _f ! = src - > _f & & \
( ! verify | | \
fsck_err ( c , _msg " : got %llu, should be %llu " \
, # # __VA_ARGS__ , dst - > _f , src - > _f ) ) ) \
dst - > _f = src - > _f
2018-11-25 01:09:44 +03:00
# define copy_stripe_field(_f, _msg, ...) \
2022-07-12 04:06:52 +03:00
if ( dst - > _f ! = src - > _f & & \
( ! verify | | \
fsck_err ( c , " stripe %zu has wrong " _msg \
" : got %u, should be %u " , \
iter . pos , # # __VA_ARGS__ , \
dst - > _f , src - > _f ) ) ) \
dst - > _f = src - > _f
2018-07-23 12:32:01 +03:00
# define copy_dev_field(_f, _msg, ...) \
2021-04-15 03:22:10 +03:00
copy_field ( _f , " dev %u has wrong " _msg , dev , # # __VA_ARGS__ )
2018-07-23 12:32:01 +03:00
# define copy_fs_field(_f, _msg, ...) \
copy_field ( _f , " fs has wrong " _msg , # # __VA_ARGS__ )
2021-01-22 05:52:06 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( c - > usage ) ; i + + )
bch2_fs_usage_acc_to_base ( c , i ) ;
for_each_member_device ( ca , c , dev ) {
2021-12-26 04:39:19 +03:00
struct bch_dev_usage * dst = ca - > usage_base ;
struct bch_dev_usage * src = ( void * )
bch2_acc_percpu_u64s ( ( void * ) ca - > usage_gc ,
dev_usage_u64s ( ) ) ;
copy_dev_field ( buckets_ec , " buckets_ec " ) ;
for ( i = 0 ; i < BCH_DATA_NR ; i + + ) {
copy_dev_field ( d [ i ] . buckets , " %s buckets " , bch2_data_types [ i ] ) ;
copy_dev_field ( d [ i ] . sectors , " %s sectors " , bch2_data_types [ i ] ) ;
copy_dev_field ( d [ i ] . fragmented , " %s fragmented " , bch2_data_types [ i ] ) ;
2021-01-22 05:52:06 +03:00
}
} ;
2018-07-23 12:32:01 +03:00
{
2019-02-15 04:39:17 +03:00
unsigned nr = fs_usage_u64s ( c ) ;
2019-02-11 03:34:47 +03:00
struct bch_fs_usage * dst = c - > usage_base ;
2018-12-17 16:44:56 +03:00
struct bch_fs_usage * src = ( void * )
2019-02-11 03:34:47 +03:00
bch2_acc_percpu_u64s ( ( void * ) c - > usage_gc , nr ) ;
2018-07-23 12:32:01 +03:00
2019-02-15 02:38:52 +03:00
copy_fs_field ( hidden , " hidden " ) ;
2019-03-30 05:22:45 +03:00
copy_fs_field ( btree , " btree " ) ;
2018-12-01 19:32:12 +03:00
2021-04-16 23:54:11 +03:00
if ( ! metadata_only ) {
copy_fs_field ( data , " data " ) ;
copy_fs_field ( cached , " cached " ) ;
copy_fs_field ( reserved , " reserved " ) ;
copy_fs_field ( nr_inodes , " nr_inodes " ) ;
for ( i = 0 ; i < BCH_REPLICAS_MAX ; i + + )
copy_fs_field ( persistent_reserved [ i ] ,
" persistent_reserved[%i] " , i ) ;
}
2018-07-23 12:32:01 +03:00
2019-01-21 23:32:13 +03:00
for ( i = 0 ; i < c - > replicas . nr ; i + + ) {
2019-02-12 23:03:47 +03:00
struct bch_replicas_entry * e =
cpu_replicas_entry ( & c - > replicas , i ) ;
2021-04-16 23:54:11 +03:00
if ( metadata_only & &
( e - > data_type = = BCH_DATA_user | |
e - > data_type = = BCH_DATA_cached ) )
continue ;
2022-02-25 21:18:19 +03:00
printbuf_reset ( & buf ) ;
bch2_replicas_entry_to_text ( & buf , e ) ;
2019-02-12 23:03:47 +03:00
2022-02-25 21:18:19 +03:00
copy_fs_field ( replicas [ i ] , " %s " , buf . buf ) ;
2019-01-21 23:32:13 +03:00
}
2018-07-23 12:32:01 +03:00
}
2019-02-12 03:27:33 +03:00
2018-07-23 12:32:01 +03:00
# undef copy_fs_field
# undef copy_dev_field
2018-11-25 01:09:44 +03:00
# undef copy_stripe_field
# undef copy_field
2019-03-28 10:13:27 +03:00
fsck_err :
2021-05-08 03:43:43 +03:00
if ( ca )
percpu_ref_put ( & ca - > ref ) ;
2021-02-23 23:16:41 +03:00
if ( ret )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error from %s(): %s " , __func__ , bch2_err_str ( ret ) ) ;
2021-12-26 04:39:19 +03:00
percpu_up_write ( & c - > mark_lock ) ;
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf ) ;
2019-03-28 10:13:27 +03:00
return ret ;
2018-07-23 12:32:01 +03:00
}
2021-04-16 23:54:11 +03:00
static int bch2_gc_start ( struct bch_fs * c ,
bool metadata_only )
2018-07-23 12:32:01 +03:00
{
2021-05-08 03:43:43 +03:00
struct bch_dev * ca = NULL ;
2018-07-23 12:32:01 +03:00
unsigned i ;
2018-11-25 01:09:44 +03:00
2019-02-11 03:34:47 +03:00
BUG_ON ( c - > usage_gc ) ;
2018-07-23 12:32:01 +03:00
2019-02-11 03:34:47 +03:00
c - > usage_gc = __alloc_percpu_gfp ( fs_usage_u64s ( c ) * sizeof ( u64 ) ,
2019-02-15 04:39:17 +03:00
sizeof ( u64 ) , GFP_KERNEL ) ;
2020-04-29 19:57:04 +03:00
if ( ! c - > usage_gc ) {
bch_err ( c , " error allocating c->usage_gc " ) ;
2018-07-23 12:32:01 +03:00
return - ENOMEM ;
2020-04-29 19:57:04 +03:00
}
2018-07-23 12:32:01 +03:00
2017-03-17 09:18:50 +03:00
for_each_member_device ( ca , c , i ) {
2022-02-11 03:26:55 +03:00
BUG_ON ( ca - > buckets_gc ) ;
2021-01-22 05:52:06 +03:00
BUG_ON ( ca - > usage_gc ) ;
2018-07-23 12:32:01 +03:00
2021-01-22 05:52:06 +03:00
ca - > usage_gc = alloc_percpu ( struct bch_dev_usage ) ;
if ( ! ca - > usage_gc ) {
bch_err ( c , " error allocating ca->usage_gc " ) ;
2018-07-23 12:32:01 +03:00
percpu_ref_put ( & ca - > ref ) ;
return - ENOMEM ;
2017-03-17 09:18:50 +03:00
}
2022-04-01 08:29:59 +03:00
this_cpu_write ( ca - > usage_gc - > d [ BCH_DATA_free ] . buckets ,
ca - > mi . nbuckets - ca - > mi . first_bucket ) ;
2017-03-17 09:18:50 +03:00
}
2018-07-23 12:32:01 +03:00
2021-12-26 04:39:19 +03:00
return 0 ;
}
2022-01-01 04:03:29 +03:00
/* returns true if not equal */
static inline bool bch2_alloc_v4_cmp ( struct bch_alloc_v4 l ,
struct bch_alloc_v4 r )
{
return l . gen ! = r . gen | |
l . oldest_gen ! = r . oldest_gen | |
l . data_type ! = r . data_type | |
l . dirty_sectors ! = r . dirty_sectors | |
l . cached_sectors ! = r . cached_sectors | |
l . stripe_redundancy ! = r . stripe_redundancy | |
l . stripe ! = r . stripe ;
}
2021-12-26 04:39:19 +03:00
static int bch2_alloc_write_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
2022-07-17 07:44:19 +03:00
struct bkey_s_c k ,
2022-02-19 10:48:27 +03:00
bool metadata_only )
2021-12-26 04:39:19 +03:00
{
struct bch_fs * c = trans - > c ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , iter - > pos . inode ) ;
2022-04-01 08:29:59 +03:00
struct bucket gc , * b ;
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 * a ;
2022-02-14 08:07:38 +03:00
struct bch_alloc_v4 old , new ;
2022-04-01 08:29:59 +03:00
enum bch_data_type type ;
2021-12-26 04:39:19 +03:00
int ret ;
2022-07-17 07:44:19 +03:00
if ( bkey_cmp ( iter - > pos , POS ( ca - > dev_idx , ca - > mi . nbuckets ) ) > = 0 )
return 1 ;
2021-12-26 04:39:19 +03:00
2022-01-01 04:03:29 +03:00
bch2_alloc_to_v4 ( k , & old ) ;
new = old ;
2021-12-26 04:39:19 +03:00
percpu_down_read ( & c - > mark_lock ) ;
2022-04-01 08:29:59 +03:00
b = gc_bucket ( ca , iter - > pos . offset ) ;
/*
* b - > data_type doesn ' t yet include need_discard & need_gc_gen states -
* fix that here :
*/
type = __alloc_data_type ( b - > dirty_sectors ,
b - > cached_sectors ,
b - > stripe ,
old ,
b - > data_type ) ;
if ( b - > data_type ! = type ) {
struct bch_dev_usage * u ;
preempt_disable ( ) ;
u = this_cpu_ptr ( ca - > usage_gc ) ;
u - > d [ b - > data_type ] . buckets - - ;
b - > data_type = type ;
u - > d [ b - > data_type ] . buckets + + ;
preempt_enable ( ) ;
}
gc = * b ;
2021-12-26 04:39:19 +03:00
percpu_up_read ( & c - > mark_lock ) ;
if ( metadata_only & &
2022-01-01 04:03:29 +03:00
gc . data_type ! = BCH_DATA_sb & &
gc . data_type ! = BCH_DATA_journal & &
gc . data_type ! = BCH_DATA_btree )
2021-12-26 04:39:19 +03:00
return 0 ;
2022-01-01 04:03:29 +03:00
if ( gen_after ( old . gen , gc . gen ) )
2021-12-26 04:39:19 +03:00
return 0 ;
# define copy_bucket_field(_f) \
2022-07-12 04:06:52 +03:00
if ( c - > opts . reconstruct_alloc | | \
fsck_err_on ( new . _f ! = gc . _f , c , \
2021-12-26 04:39:19 +03:00
" bucket %llu:%llu gen %u data type %s has wrong " # _f \
" : got %u, should be %u " , \
iter - > pos . inode , iter - > pos . offset , \
2022-02-14 08:07:38 +03:00
gc . gen , \
bch2_data_types [ gc . data_type ] , \
2022-01-01 04:03:29 +03:00
new . _f , gc . _f ) ) \
new . _f = gc . _f ; \
2021-12-26 04:39:19 +03:00
copy_bucket_field ( gen ) ;
copy_bucket_field ( data_type ) ;
copy_bucket_field ( dirty_sectors ) ;
copy_bucket_field ( cached_sectors ) ;
copy_bucket_field ( stripe_redundancy ) ;
copy_bucket_field ( stripe ) ;
# undef copy_bucket_field
2022-01-01 04:03:29 +03:00
if ( ! bch2_alloc_v4_cmp ( old , new ) )
2021-12-26 04:39:19 +03:00
return 0 ;
2022-01-01 04:03:29 +03:00
a = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
return ret ;
a - > v = new ;
2021-12-26 04:39:19 +03:00
2022-04-11 02:59:26 +03:00
/*
* The trigger normally makes sure this is set , but we ' re not running
* triggers :
*/
if ( a - > v . data_type = = BCH_DATA_cached & & ! a - > v . io_time [ READ ] )
a - > v . io_time [ READ ] = max_t ( u64 , 1 , atomic64_read ( & c - > io_clock [ READ ] . now ) ) ;
2022-01-01 04:03:29 +03:00
ret = bch2_trans_update ( trans , iter , & a - > k_i , BTREE_TRIGGER_NORUN ) ;
2021-12-26 04:39:19 +03:00
fsck_err :
return ret ;
}
2022-02-19 10:48:27 +03:00
static int bch2_gc_alloc_done ( struct bch_fs * c , bool metadata_only )
2021-12-26 04:39:19 +03:00
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
struct bch_dev * ca ;
unsigned i ;
int ret = 0 ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2019-10-05 02:14:43 +03:00
2018-07-23 12:32:01 +03:00
for_each_member_device ( ca , c , i ) {
2022-07-17 07:44:19 +03:00
ret = for_each_btree_key_commit ( & trans , iter , BTREE_ID_alloc ,
POS ( ca - > dev_idx , ca - > mi . first_bucket ) ,
BTREE_ITER_SLOTS | BTREE_ITER_PREFETCH , k ,
NULL , NULL , BTREE_INSERT_LAZY_RW ,
bch2_alloc_write_key ( & trans , & iter , k , metadata_only ) ) ;
2018-07-23 12:32:01 +03:00
2022-07-17 07:44:19 +03:00
if ( ret < 0 ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " error writing alloc info: %s " , bch2_err_str ( ret ) ) ;
2021-12-26 04:39:19 +03:00
percpu_ref_put ( & ca - > ref ) ;
break ;
}
}
2019-03-30 05:22:45 +03:00
2021-12-26 04:39:19 +03:00
bch2_trans_exit ( & trans ) ;
2022-07-17 07:44:19 +03:00
return ret < 0 ? ret : 0 ;
2021-12-26 04:39:19 +03:00
}
2021-04-16 23:54:11 +03:00
2022-02-19 10:48:27 +03:00
static int bch2_gc_alloc_start ( struct bch_fs * c , bool metadata_only )
2021-12-26 04:39:19 +03:00
{
struct bch_dev * ca ;
2022-02-11 03:26:55 +03:00
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
struct bucket * g ;
struct bch_alloc_v4 a ;
2021-12-26 04:39:19 +03:00
unsigned i ;
2022-02-11 03:26:55 +03:00
int ret ;
2021-12-26 04:39:19 +03:00
for_each_member_device ( ca , c , i ) {
struct bucket_array * buckets = kvpmalloc ( sizeof ( struct bucket_array ) +
ca - > mi . nbuckets * sizeof ( struct bucket ) ,
GFP_KERNEL | __GFP_ZERO ) ;
if ( ! buckets ) {
percpu_ref_put ( & ca - > ref ) ;
bch_err ( c , " error allocating ca->buckets[gc] " ) ;
return - ENOMEM ;
2019-02-21 01:57:06 +03:00
}
2018-07-23 12:32:01 +03:00
2021-12-26 04:39:19 +03:00
buckets - > first_bucket = ca - > mi . first_bucket ;
buckets - > nbuckets = ca - > mi . nbuckets ;
2022-02-11 03:26:55 +03:00
rcu_assign_pointer ( ca - > buckets_gc , buckets ) ;
2021-12-26 04:39:19 +03:00
} ;
2019-10-05 02:14:43 +03:00
2022-02-11 03:26:55 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
for_each_btree_key ( & trans , iter , BTREE_ID_alloc , POS_MIN ,
BTREE_ITER_PREFETCH , k , ret ) {
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
g = gc_bucket ( ca , k . k - > p . offset ) ;
bch2_alloc_to_v4 ( k , & a ) ;
2022-02-14 08:07:38 +03:00
g - > gen_valid = 1 ;
g - > gen = a . gen ;
2022-02-11 03:26:55 +03:00
if ( metadata_only & &
( a . data_type = = BCH_DATA_user | |
a . data_type = = BCH_DATA_cached | |
a . data_type = = BCH_DATA_parity ) ) {
2022-02-14 08:07:38 +03:00
g - > data_type = a . data_type ;
g - > dirty_sectors = a . dirty_sectors ;
g - > cached_sectors = a . cached_sectors ;
2022-02-11 03:26:55 +03:00
g - > stripe = a . stripe ;
g - > stripe_redundancy = a . stripe_redundancy ;
}
}
bch2_trans_iter_exit ( & trans , & iter ) ;
bch2_trans_exit ( & trans ) ;
if ( ret )
2022-07-19 02:42:58 +03:00
bch_err ( c , " error reading alloc info at gc start: %s " , bch2_err_str ( ret ) ) ;
2022-02-11 03:26:55 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2022-02-19 10:48:27 +03:00
static void bch2_gc_alloc_reset ( struct bch_fs * c , bool metadata_only )
2022-01-02 07:16:15 +03:00
{
struct bch_dev * ca ;
unsigned i ;
for_each_member_device ( ca , c , i ) {
2022-02-11 03:26:55 +03:00
struct bucket_array * buckets = gc_bucket_array ( ca ) ;
2022-01-02 07:16:15 +03:00
struct bucket * g ;
for_each_bucket ( g , buckets ) {
if ( metadata_only & &
2022-02-14 08:07:38 +03:00
( g - > data_type = = BCH_DATA_user | |
g - > data_type = = BCH_DATA_cached | |
g - > data_type = = BCH_DATA_parity ) )
2022-01-02 07:16:15 +03:00
continue ;
2022-04-08 00:41:02 +03:00
g - > data_type = 0 ;
2022-02-14 08:07:38 +03:00
g - > dirty_sectors = 0 ;
g - > cached_sectors = 0 ;
2022-01-02 07:16:15 +03:00
}
} ;
}
2022-07-17 07:44:19 +03:00
static int bch2_gc_write_reflink_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k ,
size_t * idx )
2021-05-23 09:31:33 +03:00
{
2022-07-17 07:44:19 +03:00
struct bch_fs * c = trans - > c ;
const __le64 * refcount = bkey_refcount_c ( k ) ;
2022-02-25 21:18:19 +03:00
struct printbuf buf = PRINTBUF ;
2022-07-17 07:44:19 +03:00
struct reflink_gc * r ;
2021-05-23 09:31:33 +03:00
int ret = 0 ;
2022-07-17 07:44:19 +03:00
if ( ! refcount )
2021-05-23 09:31:33 +03:00
return 0 ;
2022-07-17 07:44:19 +03:00
while ( ( r = genradix_ptr ( & c - > reflink_gc_table , * idx ) ) & &
r - > offset < k . k - > p . offset )
+ + * idx ;
2021-10-30 01:43:18 +03:00
2022-07-17 07:44:19 +03:00
if ( ! r | |
r - > offset ! = k . k - > p . offset | |
r - > size ! = k . k - > size ) {
bch_err ( c , " unexpected inconsistency walking reflink table at gc finish " ) ;
return - EINVAL ;
}
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
if ( fsck_err_on ( r - > refcount ! = le64_to_cpu ( * refcount ) , c ,
" reflink key has wrong refcount: \n "
" %s \n "
" should be %u " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ,
r - > refcount ) ) {
struct bkey_i * new ;
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
new = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
ret = PTR_ERR_OR_ZERO ( new ) ;
if ( ret )
return ret ;
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
bkey_reassemble ( new , k ) ;
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
if ( ! r - > refcount )
new - > k . type = KEY_TYPE_deleted ;
else
* bkey_refcount ( new ) = cpu_to_le64 ( r - > refcount ) ;
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
ret = bch2_trans_update ( trans , iter , new , 0 ) ;
}
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
static int bch2_gc_reflink_done ( struct bch_fs * c , bool metadata_only )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
size_t idx = 0 ;
int ret = 0 ;
2021-05-23 09:31:33 +03:00
2022-07-17 07:44:19 +03:00
if ( metadata_only )
return 0 ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
ret = for_each_btree_key_commit ( & trans , iter ,
BTREE_ID_reflink , POS_MIN ,
BTREE_ITER_PREFETCH , k ,
NULL , NULL , BTREE_INSERT_NOFAIL ,
bch2_gc_write_reflink_key ( & trans , & iter , k , & idx ) ) ;
2021-05-23 09:31:33 +03:00
c - > reflink_gc_nr = 0 ;
2021-10-30 01:43:18 +03:00
bch2_trans_exit ( & trans ) ;
2021-05-23 09:31:33 +03:00
return ret ;
}
2022-02-19 10:48:27 +03:00
static int bch2_gc_reflink_start ( struct bch_fs * c ,
2022-01-02 07:16:15 +03:00
bool metadata_only )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
struct reflink_gc * r ;
int ret = 0 ;
if ( metadata_only )
return 0 ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
c - > reflink_gc_nr = 0 ;
for_each_btree_key ( & trans , iter , BTREE_ID_reflink , POS_MIN ,
BTREE_ITER_PREFETCH , k , ret ) {
const __le64 * refcount = bkey_refcount_c ( k ) ;
if ( ! refcount )
continue ;
r = genradix_ptr_alloc ( & c - > reflink_gc_table , c - > reflink_gc_nr + + ,
GFP_KERNEL ) ;
if ( ! r ) {
ret = - ENOMEM ;
break ;
}
r - > offset = k . k - > p . offset ;
r - > size = k . k - > size ;
r - > refcount = 0 ;
}
bch2_trans_iter_exit ( & trans , & iter ) ;
bch2_trans_exit ( & trans ) ;
return ret ;
}
2022-02-19 10:48:27 +03:00
static void bch2_gc_reflink_reset ( struct bch_fs * c , bool metadata_only )
2022-01-02 07:16:15 +03:00
{
struct genradix_iter iter ;
struct reflink_gc * r ;
genradix_for_each ( & c - > reflink_gc_table , iter , r )
r - > refcount = 0 ;
}
2022-07-17 07:44:19 +03:00
static int bch2_gc_write_stripes_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
2021-12-05 07:07:33 +03:00
{
2022-07-17 07:44:19 +03:00
struct bch_fs * c = trans - > c ;
2022-02-25 21:18:19 +03:00
struct printbuf buf = PRINTBUF ;
2022-07-17 07:44:19 +03:00
const struct bch_stripe * s ;
struct gc_stripe * m ;
2021-12-05 07:07:33 +03:00
unsigned i ;
int ret = 0 ;
2022-07-17 07:44:19 +03:00
if ( k . k - > type ! = KEY_TYPE_stripe )
2021-12-05 07:07:33 +03:00
return 0 ;
2022-07-17 07:44:19 +03:00
s = bkey_s_c_to_stripe ( k ) . v ;
m = genradix_ptr ( & c - > gc_stripes , k . k - > p . offset ) ;
2021-12-26 04:07:00 +03:00
2022-07-17 07:44:19 +03:00
for ( i = 0 ; i < s - > nr_blocks ; i + + )
if ( stripe_blockcount_get ( s , i ) ! = ( m ? m - > block_sectors [ i ] : 0 ) )
goto inconsistent ;
return 0 ;
2021-12-05 07:07:33 +03:00
inconsistent :
2022-07-17 07:44:19 +03:00
if ( fsck_err_on ( true , c ,
" stripe has wrong block sector count %u: \n "
" %s \n "
" should be %u " , i ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ,
m ? m - > block_sectors [ i ] : 0 ) ) {
struct bkey_i_stripe * new ;
new = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
ret = PTR_ERR_OR_ZERO ( new ) ;
if ( ret )
return ret ;
2021-12-05 07:07:33 +03:00
2022-07-17 07:44:19 +03:00
bkey_reassemble ( & new - > k_i , k ) ;
2021-12-05 07:07:33 +03:00
2022-07-17 07:44:19 +03:00
for ( i = 0 ; i < new - > v . nr_blocks ; i + + )
stripe_blockcount_set ( & new - > v , i , m ? m - > block_sectors [ i ] : 0 ) ;
2021-12-05 07:07:33 +03:00
2022-07-17 07:44:19 +03:00
ret = bch2_trans_update ( trans , iter , & new - > k_i , 0 ) ;
2021-12-05 07:07:33 +03:00
}
fsck_err :
2022-07-17 07:44:19 +03:00
printbuf_exit ( & buf ) ;
return ret ;
}
2021-12-05 07:07:33 +03:00
2022-07-17 07:44:19 +03:00
static int bch2_gc_stripes_done ( struct bch_fs * c , bool metadata_only )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
int ret = 0 ;
2022-02-25 21:18:19 +03:00
2022-07-17 07:44:19 +03:00
if ( metadata_only )
return 0 ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
ret = for_each_btree_key_commit ( & trans , iter ,
BTREE_ID_stripes , POS_MIN ,
BTREE_ITER_PREFETCH , k ,
NULL , NULL , BTREE_INSERT_NOFAIL ,
bch2_gc_write_stripes_key ( & trans , & iter , k ) ) ;
bch2_trans_exit ( & trans ) ;
2021-12-05 07:07:33 +03:00
return ret ;
}
2022-02-19 10:48:27 +03:00
static void bch2_gc_stripes_reset ( struct bch_fs * c , bool metadata_only )
2021-05-23 09:31:33 +03:00
{
2022-01-02 07:16:15 +03:00
genradix_free ( & c - > gc_stripes ) ;
2021-05-23 09:31:33 +03:00
}
2017-03-17 09:18:50 +03:00
/**
2018-07-23 12:32:01 +03:00
* bch2_gc - walk _all_ references to buckets , and recompute them :
*
* Order matters here :
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node ( ) ,
* gc_will_visit_root ( )
*
* - also , references move around in the course of index updates and
* various other crap : everything needs to agree on the ordering
* references are allowed to move around in - e . g . , we ' re allowed to
* start with a reference owned by an open_bucket ( the allocator ) and
* move it to the btree , but not the reverse .
*
* This is necessary to ensure that gc doesn ' t miss references that
* move around - if references move backwards in the ordering GC
* uses , GC could skip past them
2017-03-17 09:18:50 +03:00
*/
2021-04-16 23:54:11 +03:00
int bch2_gc ( struct bch_fs * c , bool initial , bool metadata_only )
2017-03-17 09:18:50 +03:00
{
2022-01-10 04:48:31 +03:00
unsigned iter = 0 ;
2018-10-21 17:56:11 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2020-06-15 21:58:47 +03:00
lockdep_assert_held ( & c - > state_lock ) ;
2017-03-17 09:18:50 +03:00
down_write ( & c - > gc_lock ) ;
2020-05-25 21:57:06 +03:00
2022-04-18 00:30:49 +03:00
bch2_btree_interior_updates_flush ( c ) ;
2022-01-02 07:16:15 +03:00
2021-05-23 09:31:33 +03:00
ret = bch2_gc_start ( c , metadata_only ) ? :
2022-02-19 10:48:27 +03:00
bch2_gc_alloc_start ( c , metadata_only ) ? :
bch2_gc_reflink_start ( c , metadata_only ) ;
2018-07-23 12:32:01 +03:00
if ( ret )
2017-03-17 09:18:50 +03:00
goto out ;
2022-01-02 07:16:15 +03:00
again :
gc_pos_set ( c , gc_phase ( GC_PHASE_START ) ) ;
2017-03-17 09:18:50 +03:00
bch2_mark_superblocks ( c ) ;
2021-11-22 00:15:48 +03:00
if ( BCH_SB_HAS_TOPOLOGY_ERRORS ( c - > disk_sb . sb ) & &
2021-04-24 23:32:35 +03:00
! test_bit ( BCH_FS_INITIAL_GC_DONE , & c - > flags ) & &
c - > opts . fix_errors ! = FSCK_OPT_NO ) {
2022-04-08 00:41:02 +03:00
bch_info ( c , " Starting topology repair pass " ) ;
2021-04-24 23:32:35 +03:00
ret = bch2_repair_topology ( c ) ;
if ( ret )
goto out ;
2022-04-08 00:41:02 +03:00
bch_info ( c , " Topology repair pass done " ) ;
2021-06-23 03:44:54 +03:00
set_bit ( BCH_FS_TOPOLOGY_REPAIR_DONE , & c - > flags ) ;
2021-04-24 23:32:35 +03:00
}
2021-04-16 23:54:11 +03:00
ret = bch2_gc_btrees ( c , initial , metadata_only ) ;
2021-04-24 23:32:35 +03:00
2022-07-20 00:20:18 +03:00
if ( ret = = - BCH_ERR_need_topology_repair & &
2021-06-23 03:44:54 +03:00
! test_bit ( BCH_FS_TOPOLOGY_REPAIR_DONE , & c - > flags ) & &
2021-04-24 23:32:35 +03:00
! test_bit ( BCH_FS_INITIAL_GC_DONE , & c - > flags ) ) {
set_bit ( BCH_FS_NEED_ANOTHER_GC , & c - > flags ) ;
2022-04-08 00:41:02 +03:00
SET_BCH_SB_HAS_TOPOLOGY_ERRORS ( c - > disk_sb . sb , true ) ;
2021-04-24 23:32:35 +03:00
ret = 0 ;
}
2022-07-20 00:20:18 +03:00
if ( ret = = - BCH_ERR_need_topology_repair )
ret = - BCH_ERR_fsck_errors_not_fixed ;
2021-04-24 23:32:35 +03:00
2018-07-23 12:32:01 +03:00
if ( ret )
2018-10-21 17:56:11 +03:00
goto out ;
2017-03-17 09:18:50 +03:00
2020-05-25 21:57:06 +03:00
#if 0
2017-03-17 09:18:50 +03:00
bch2_mark_pending_btree_node_frees ( c ) ;
2020-05-25 21:57:06 +03:00
# endif
2017-03-17 09:18:50 +03:00
c - > gc_count + + ;
2021-01-27 04:59:00 +03:00
if ( test_bit ( BCH_FS_NEED_ANOTHER_GC , & c - > flags ) | |
( ! iter & & bch2_test_restart_gc ) ) {
2022-01-02 07:16:15 +03:00
if ( iter + + > 2 ) {
bch_info ( c , " Unable to fix bucket gens, looping " ) ;
ret = - EINVAL ;
goto out ;
}
2018-07-23 12:32:01 +03:00
/*
* XXX : make sure gens we fixed got saved
*/
2022-01-02 07:16:15 +03:00
bch_info ( c , " Second GC pass needed, restarting: " ) ;
clear_bit ( BCH_FS_NEED_ANOTHER_GC , & c - > flags ) ;
__gc_pos_set ( c , gc_phase ( GC_PHASE_NOT_RUNNING ) ) ;
2019-03-28 10:08:40 +03:00
2022-02-19 10:48:27 +03:00
bch2_gc_stripes_reset ( c , metadata_only ) ;
bch2_gc_alloc_reset ( c , metadata_only ) ;
bch2_gc_reflink_reset ( c , metadata_only ) ;
2018-07-23 12:32:01 +03:00
2022-01-02 07:16:15 +03:00
/* flush fsck errors, reset counters */
bch2_flush_fsck_errs ( c ) ;
goto again ;
2018-07-23 12:32:01 +03:00
}
2021-01-27 04:59:00 +03:00
out :
2019-02-11 03:34:47 +03:00
if ( ! ret ) {
bch2_journal_block ( & c - > journal ) ;
2019-03-28 10:08:40 +03:00
2022-02-19 10:48:27 +03:00
ret = bch2_gc_stripes_done ( c , metadata_only ) ? :
bch2_gc_reflink_done ( c , metadata_only ) ? :
bch2_gc_alloc_done ( c , metadata_only ) ? :
2021-05-23 09:31:33 +03:00
bch2_gc_done ( c , initial , metadata_only ) ;
2018-07-23 12:32:01 +03:00
2019-02-11 03:34:47 +03:00
bch2_journal_unblock ( & c - > journal ) ;
}
2021-12-26 04:39:19 +03:00
percpu_down_write ( & c - > mark_lock ) ;
2018-07-23 12:32:01 +03:00
/* Indicates that gc is no longer in progress: */
2018-11-25 01:09:44 +03:00
__gc_pos_set ( c , gc_phase ( GC_PHASE_NOT_RUNNING ) ) ;
2018-07-23 12:32:01 +03:00
bch2_gc_free ( c ) ;
2019-03-28 10:08:40 +03:00
percpu_up_write ( & c - > mark_lock ) ;
2017-03-17 09:18:50 +03:00
up_write ( & c - > gc_lock ) ;
2018-07-23 12:32:01 +03:00
2017-03-17 09:18:50 +03:00
/*
* At startup , allocations can happen directly instead of via the
* allocator thread - issue wakeup in case they blocked on gc_lock :
*/
closure_wake_up ( & c - > freelist_wait ) ;
2018-07-23 12:32:01 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2022-07-16 03:51:09 +03:00
static int gc_btree_gens_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
2020-10-13 10:58:50 +03:00
{
2022-07-16 03:51:09 +03:00
struct bch_fs * c = trans - > c ;
2020-10-13 10:58:50 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
2022-07-16 03:51:09 +03:00
struct bkey_i * u ;
int ret ;
2020-10-13 10:58:50 +03:00
percpu_down_read ( & c - > mark_lock ) ;
bkey_for_each_ptr ( ptrs , ptr ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2021-12-24 12:51:10 +03:00
if ( ptr_stale ( ca , ptr ) > 16 ) {
2020-10-13 10:58:50 +03:00
percpu_up_read ( & c - > mark_lock ) ;
2022-07-16 03:51:09 +03:00
goto update ;
2020-10-13 10:58:50 +03:00
}
}
bkey_for_each_ptr ( ptrs , ptr ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2021-12-24 12:51:10 +03:00
u8 * gen = & ca - > oldest_gen [ PTR_BUCKET_NR ( ca , ptr ) ] ;
2020-10-13 10:58:50 +03:00
2021-12-24 12:51:10 +03:00
if ( gen_after ( * gen , ptr - > gen ) )
* gen = ptr - > gen ;
2020-10-13 10:58:50 +03:00
}
percpu_up_read ( & c - > mark_lock ) ;
2022-07-16 03:51:09 +03:00
return 0 ;
update :
u = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
ret = PTR_ERR_OR_ZERO ( u ) ;
if ( ret )
return ret ;
2020-10-13 10:58:50 +03:00
2022-07-16 03:51:09 +03:00
bkey_reassemble ( u , k ) ;
2020-10-13 10:58:50 +03:00
2022-07-16 03:51:09 +03:00
bch2_extent_normalize ( c , bkey_i_to_s ( u ) ) ;
return bch2_trans_update ( trans , iter , u , 0 ) ;
2020-06-15 22:10:54 +03:00
}
2022-07-16 03:51:09 +03:00
static int bch2_alloc_write_oldest_gen ( struct btree_trans * trans , struct btree_iter * iter ,
struct bkey_s_c k )
2021-12-24 12:51:10 +03:00
{
struct bch_dev * ca = bch_dev_bkey_exists ( trans - > c , iter - > pos . inode ) ;
2022-01-01 04:03:29 +03:00
struct bch_alloc_v4 a ;
struct bkey_i_alloc_v4 * a_mut ;
2021-12-24 12:51:10 +03:00
int ret ;
2022-01-01 04:03:29 +03:00
bch2_alloc_to_v4 ( k , & a ) ;
2021-12-24 12:51:10 +03:00
2022-01-01 04:03:29 +03:00
if ( a . oldest_gen = = ca - > oldest_gen [ iter - > pos . offset ] )
2021-12-24 12:51:10 +03:00
return 0 ;
2022-01-01 04:03:29 +03:00
a_mut = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a_mut ) ;
if ( ret )
return ret ;
a_mut - > v . oldest_gen = ca - > oldest_gen [ iter - > pos . offset ] ;
2022-04-01 08:29:59 +03:00
a_mut - > v . data_type = alloc_data_type ( a_mut - > v , a_mut - > v . data_type ) ;
2021-12-24 12:51:10 +03:00
2022-01-01 04:03:29 +03:00
return bch2_trans_update ( trans , iter , & a_mut - > k_i , 0 ) ;
2021-12-24 12:51:10 +03:00
}
2020-06-15 22:10:54 +03:00
int bch2_gc_gens ( struct bch_fs * c )
{
2021-12-24 12:51:10 +03:00
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
2020-06-15 22:10:54 +03:00
struct bch_dev * ca ;
2021-12-24 12:51:10 +03:00
u64 b , start_time = local_clock ( ) ;
2020-06-15 22:10:54 +03:00
unsigned i ;
int ret ;
2020-06-18 00:30:38 +03:00
/*
* Ideally we would be using state_lock and not gc_lock here , but that
* introduces a deadlock in the RO path - we currently take the state
* lock at the start of going RO , thus the gc thread may get stuck :
*/
2021-12-24 12:51:10 +03:00
if ( ! mutex_trylock ( & c - > gc_gens_lock ) )
return 0 ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , gc_gens_start , c ) ;
2020-06-18 00:30:38 +03:00
down_read ( & c - > gc_lock ) ;
2021-12-24 12:51:10 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2020-06-15 22:10:54 +03:00
for_each_member_device ( ca , c , i ) {
2021-12-24 12:51:10 +03:00
struct bucket_gens * gens ;
BUG_ON ( ca - > oldest_gen ) ;
ca - > oldest_gen = kvmalloc ( ca - > mi . nbuckets , GFP_KERNEL ) ;
if ( ! ca - > oldest_gen ) {
percpu_ref_put ( & ca - > ref ) ;
ret = - ENOMEM ;
goto err ;
}
gens = bucket_gens ( ca ) ;
2020-06-15 22:10:54 +03:00
2021-12-24 12:51:10 +03:00
for ( b = gens - > first_bucket ;
b < gens - > nbuckets ; b + + )
ca - > oldest_gen [ b ] = gens - > b [ b ] ;
2020-06-15 22:10:54 +03:00
}
for ( i = 0 ; i < BTREE_ID_NR ; i + + )
2022-10-09 09:25:53 +03:00
if ( btree_type_has_ptrs ( i ) ) {
2022-07-16 03:51:09 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
2021-04-13 22:00:40 +03:00
c - > gc_gens_btree = i ;
c - > gc_gens_pos = POS_MIN ;
2022-07-16 03:51:09 +03:00
ret = for_each_btree_key_commit ( & trans , iter , i ,
POS_MIN ,
BTREE_ITER_PREFETCH | BTREE_ITER_ALL_SNAPSHOTS ,
k ,
NULL , NULL ,
BTREE_INSERT_NOFAIL ,
gc_btree_gens_key ( & trans , & iter , k ) ) ;
2020-07-22 00:12:39 +03:00
if ( ret ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " error recalculating oldest_gen: %s " , bch2_err_str ( ret ) ) ;
2020-06-15 22:10:54 +03:00
goto err ;
2020-07-22 00:12:39 +03:00
}
2020-06-15 22:10:54 +03:00
}
2022-07-16 03:51:09 +03:00
ret = for_each_btree_key_commit ( & trans , iter , BTREE_ID_alloc ,
POS_MIN ,
BTREE_ITER_PREFETCH ,
k ,
NULL , NULL ,
BTREE_INSERT_NOFAIL ,
bch2_alloc_write_oldest_gen ( & trans , & iter , k ) ) ;
if ( ret ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " error writing oldest_gen: %s " , bch2_err_str ( ret ) ) ;
2022-07-16 03:51:09 +03:00
goto err ;
2020-06-15 22:10:54 +03:00
}
2020-07-22 00:12:39 +03:00
2021-04-13 22:00:40 +03:00
c - > gc_gens_btree = 0 ;
c - > gc_gens_pos = POS_MIN ;
2020-07-22 00:12:39 +03:00
c - > gc_count + + ;
2021-12-10 23:41:38 +03:00
bch2_time_stats_update ( & c - > times [ BCH_TIME_btree_gc ] , start_time ) ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , gc_gens_end , c ) ;
2020-06-15 22:10:54 +03:00
err :
2021-12-24 12:51:10 +03:00
for_each_member_device ( ca , c , i ) {
kvfree ( ca - > oldest_gen ) ;
ca - > oldest_gen = NULL ;
}
bch2_trans_exit ( & trans ) ;
2020-06-18 00:30:38 +03:00
up_read ( & c - > gc_lock ) ;
2021-12-24 12:51:10 +03:00
mutex_unlock ( & c - > gc_gens_lock ) ;
2020-06-15 22:10:54 +03:00
return ret ;
}
2017-03-17 09:18:50 +03:00
static int bch2_gc_thread ( void * arg )
{
struct bch_fs * c = arg ;
struct io_clock * clock = & c - > io_clock [ WRITE ] ;
2021-01-21 23:28:59 +03:00
unsigned long last = atomic64_read ( & clock - > now ) ;
2017-03-17 09:18:50 +03:00
unsigned last_kick = atomic_read ( & c - > kick_gc ) ;
2018-07-23 12:32:01 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
set_freezable ( ) ;
while ( 1 ) {
while ( 1 ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( kthread_should_stop ( ) ) {
__set_current_state ( TASK_RUNNING ) ;
return 0 ;
}
if ( atomic_read ( & c - > kick_gc ) ! = last_kick )
break ;
if ( c - > btree_gc_periodic ) {
unsigned long next = last + c - > capacity / 16 ;
2021-01-21 23:28:59 +03:00
if ( atomic64_read ( & clock - > now ) > = next )
2017-03-17 09:18:50 +03:00
break ;
bch2_io_clock_schedule_timeout ( clock , next ) ;
} else {
schedule ( ) ;
}
try_to_freeze ( ) ;
}
__set_current_state ( TASK_RUNNING ) ;
2021-01-21 23:28:59 +03:00
last = atomic64_read ( & clock - > now ) ;
2017-03-17 09:18:50 +03:00
last_kick = atomic_read ( & c - > kick_gc ) ;
2020-06-15 22:10:54 +03:00
/*
* Full gc is currently incompatible with btree key cache :
*/
#if 0
2021-01-27 04:15:46 +03:00
ret = bch2_gc ( c , false , false ) ;
2020-06-15 22:10:54 +03:00
# else
ret = bch2_gc_gens ( c ) ;
# endif
2020-10-17 04:36:26 +03:00
if ( ret < 0 )
2022-07-19 02:42:58 +03:00
bch_err ( c , " btree gc failed: %s " , bch2_err_str ( ret ) ) ;
2017-03-17 09:18:50 +03:00
debug_check_no_locks_held ( ) ;
}
return 0 ;
}
void bch2_gc_thread_stop ( struct bch_fs * c )
{
struct task_struct * p ;
p = c - > gc_thread ;
c - > gc_thread = NULL ;
if ( p ) {
kthread_stop ( p ) ;
put_task_struct ( p ) ;
}
}
int bch2_gc_thread_start ( struct bch_fs * c )
{
struct task_struct * p ;
2021-03-23 01:39:16 +03:00
if ( c - > gc_thread )
return 0 ;
2017-03-17 09:18:50 +03:00
2020-11-20 04:55:33 +03:00
p = kthread_create ( bch2_gc_thread , c , " bch-gc/%s " , c - > name ) ;
2021-02-23 23:16:41 +03:00
if ( IS_ERR ( p ) ) {
2022-07-19 02:42:58 +03:00
bch_err ( c , " error creating gc thread: %s " , bch2_err_str ( PTR_ERR ( p ) ) ) ;
2017-03-17 09:18:50 +03:00
return PTR_ERR ( p ) ;
2021-02-23 23:16:41 +03:00
}
2017-03-17 09:18:50 +03:00
get_task_struct ( p ) ;
c - > gc_thread = p ;
wake_up_process ( p ) ;
return 0 ;
}