2021-11-28 21:42:05 +03:00
/* SPDX-License-Identifier: GPL-2.0 */
# ifndef _BCACHEFS_ERRCODE_H
# define _BCACHEFS_ERRCODE_H
2022-09-19 00:10:33 +03:00
# define BCH_ERRCODES() \
x ( ENOSPC , ENOSPC_disk_reservation ) \
x ( ENOSPC , ENOSPC_bucket_alloc ) \
x ( ENOSPC , ENOSPC_disk_label_add ) \
x ( ENOSPC , ENOSPC_stripe_create ) \
x ( ENOSPC , ENOSPC_stripe_reuse ) \
x ( ENOSPC , ENOSPC_inode_create ) \
x ( ENOSPC , ENOSPC_str_hash_create ) \
x ( ENOSPC , ENOSPC_snapshot_create ) \
x ( ENOSPC , ENOSPC_subvolume_create ) \
x ( ENOSPC , ENOSPC_sb ) \
x ( ENOSPC , ENOSPC_sb_journal ) \
x ( ENOSPC , ENOSPC_sb_quota ) \
x ( ENOSPC , ENOSPC_sb_replicas ) \
x ( ENOSPC , ENOSPC_sb_members ) \
2022-09-18 22:43:50 +03:00
x ( 0 , open_buckets_empty ) \
x ( 0 , freelist_empty ) \
x ( BCH_ERR_freelist_empty , no_buckets_found ) \
x ( 0 , insufficient_devices ) \
x ( 0 , transaction_restart ) \
x ( BCH_ERR_transaction_restart , transaction_restart_fault_inject ) \
x ( BCH_ERR_transaction_restart , transaction_restart_relock ) \
x ( BCH_ERR_transaction_restart , transaction_restart_relock_path ) \
x ( BCH_ERR_transaction_restart , transaction_restart_relock_path_intent ) \
x ( BCH_ERR_transaction_restart , transaction_restart_relock_after_fill ) \
x ( BCH_ERR_transaction_restart , transaction_restart_too_many_iters ) \
x ( BCH_ERR_transaction_restart , transaction_restart_lock_node_reused ) \
x ( BCH_ERR_transaction_restart , transaction_restart_fill_relock ) \
x ( BCH_ERR_transaction_restart , transaction_restart_fill_mem_alloc_fail ) \
x ( BCH_ERR_transaction_restart , transaction_restart_mem_realloced ) \
x ( BCH_ERR_transaction_restart , transaction_restart_in_traverse_all ) \
x ( BCH_ERR_transaction_restart , transaction_restart_would_deadlock ) \
x ( BCH_ERR_transaction_restart , transaction_restart_would_deadlock_write ) \
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy.
The btree iterator API provides an interface where the user doesn't need
to concern themselves with lock ordering - different btree iterators can
be traversed in any order. Without special care, this will lead to
deadlocks.
Our previous strategy was to define a lock ordering internally, and
whenever we attempt to take a lock and trylock() fails, we'd check if
the current btree transaction is holding any locks that cause a lock
ordering violation. If so, we'd issue a transaction restart, and then
bch2_trans_begin() would re-traverse all previously used iterators, but
in the correct order.
That approach had some issues, though.
- Sometimes we'd issue transaction restarts unnecessarily, when no
deadlock would have actually occured. Lock ordering restarts have
become our primary cause of transaction restarts, on some workloads
totally 20% of actual transaction commits.
- To avoid deadlock or livelock, we'd often have to take intent locks
when we only wanted a read lock: with the lock ordering approach, it
is actually illegal to hold _any_ read lock while blocking on an intent
lock, and this has been causing us unnecessary lock contention.
- It was getting fragile - the various lock ordering rules are not
trivial, and we'd been seeing occasional livelock issues related to
this machinery.
So, since bcachefs is already a relational database masquerading as a
filesystem, we're stealing the next traditional database technique and
switching to a cycle detector for avoiding deadlocks.
When we block taking a btree lock, after adding ourself to the waitlist
but before sleeping, we do a DFS of btree transactions waiting on other
btree transactions, starting with the current transaction and walking
our held locks, and transactions blocking on our held locks.
If we find a cycle, we emit a transaction restart. Occasionally (e.g.
the btree split path) we can not allow the lock() operation to fail, so
if necessary we'll tell another transaction that it has to fail.
Result: trans_restart_would_deadlock events are reduced by a factor of
10 to 100, and we'll be able to delete a whole bunch of grotty, fragile
code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2022-08-22 20:23:47 +03:00
x ( BCH_ERR_transaction_restart , transaction_restart_deadlock_recursion_limit ) \
2022-09-18 22:43:50 +03:00
x ( BCH_ERR_transaction_restart , transaction_restart_upgrade ) \
x ( BCH_ERR_transaction_restart , transaction_restart_key_cache_upgrade ) \
x ( BCH_ERR_transaction_restart , transaction_restart_key_cache_fill ) \
x ( BCH_ERR_transaction_restart , transaction_restart_key_cache_raced ) \
x ( BCH_ERR_transaction_restart , transaction_restart_key_cache_realloced ) \
x ( BCH_ERR_transaction_restart , transaction_restart_journal_preres_get ) \
x ( BCH_ERR_transaction_restart , transaction_restart_nested ) \
x ( 0 , no_btree_node ) \
x ( BCH_ERR_no_btree_node , no_btree_node_relock ) \
x ( BCH_ERR_no_btree_node , no_btree_node_upgrade ) \
x ( BCH_ERR_no_btree_node , no_btree_node_drop ) \
x ( BCH_ERR_no_btree_node , no_btree_node_lock_root ) \
x ( BCH_ERR_no_btree_node , no_btree_node_up ) \
x ( BCH_ERR_no_btree_node , no_btree_node_down ) \
x ( BCH_ERR_no_btree_node , no_btree_node_init ) \
x ( BCH_ERR_no_btree_node , no_btree_node_cached ) \
x ( 0 , lock_fail_root_changed ) \
x ( 0 , journal_reclaim_would_deadlock ) \
x ( 0 , fsck ) \
x ( BCH_ERR_fsck , fsck_fix ) \
x ( BCH_ERR_fsck , fsck_ignore ) \
x ( BCH_ERR_fsck , fsck_errors_not_fixed ) \
x ( BCH_ERR_fsck , fsck_repair_unimplemented ) \
x ( BCH_ERR_fsck , fsck_repair_impossible ) \
x ( 0 , need_snapshot_cleanup ) \
x ( 0 , need_topology_repair )
2022-07-18 05:31:21 +03:00
enum bch_errcode {
BCH_ERR_START = 2048 ,
# define x(class, err) BCH_ERR_##err,
BCH_ERRCODES ( )
# undef x
BCH_ERR_MAX
2021-11-28 21:42:05 +03:00
} ;
2022-07-18 05:31:21 +03:00
const char * bch2_err_str ( int ) ;
bool __bch2_err_matches ( int , int ) ;
static inline bool _bch2_err_matches ( int err , int class )
{
return err & & __bch2_err_matches ( err , class ) ;
}
# define bch2_err_matches(_err, _class) \
( { \
BUILD_BUG_ON ( ! __builtin_constant_p ( _class ) ) ; \
_bch2_err_matches ( _err , _class ) ; \
} )
2022-09-18 22:43:50 +03:00
int __bch2_err_class ( int ) ;
static inline long bch2_err_class ( long err )
{
return err < 0 ? __bch2_err_class ( err ) : err ;
}
2021-11-28 21:42:05 +03:00
# endif /* _BCACHFES_ERRCODE_H */