More bcachefs bugfixes for 6.7:

- Fix a deadlock in the data move path with nocow locks (vs. update in
    place writes); when trylock failed we were incorrectly waiting for in
    flight ios to flush.
  - Fix reporting of NFS file handle length
  - Fix early error path in bch2_fs_alloc() - list head wasn't being
    initialized early enough
  - Make sure correct (hardware accelerated) crc modules get loaded
  - Fix a rare overflow in the btree split path, when the packed bkey
    format grows and all the keys have no value (LRU btree).
  - Fix error handling in the sector allocator
    This was causing writes to spuriously fail in multidevice setups, and
    another bug meant that the errors weren't being logged, only reported
    via fsync.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmWCXs8ACgkQE6szbY3K
 bnaNcQ//bECexJp/gNYJg1kZDEeLrYoe5rAjVEE46hWM+PuXV8NqzJqw+lqhmnzL
 x/gX6vT/daGU2TMvxHo6Utk2dcmR18iYK3O1DK+No0m/U4riuB8sYNU2jw3QLYGc
 onZKg5fFqmuM5riuKDEsLnkTiDE/PRQ++YuFKcp9ejG57sSXszvPymAZVPC3pT17
 tGD0ejpVqjmp0ztKuCnFoknLhf8YxOIBwF2nHgtsVOKqBAmutmQcNPnuTdpYu5TO
 Bdc24DIWJqfjyGqO9SxlpcOYBp5dDK2PeP9FJ7UL6aDj3swP2DChKGZr0OW7h3jH
 wDFt7rR392Hcc5PEBJMU0CDdVj4Y5B6M88PUlUlNGKXgaX/epMKZTqSzepx2pqT6
 zjn+wN8Y/092NuEPeIDbAXny+LmxHGf4BPRvraruertLD/sPtW+p1qA0OB5+9leK
 SrfR/RMqSlPLEAgxbQ+AOtCaODgPODpLV3zpOdZU+NtWfXcs7sAxNcrEGk7pjnjn
 1YQn+LSHXGovURhJsDMT/ht8UOm9ryx1aCzRA344wPtyvswis9xy+GnGhsrwhjcu
 5TKvu9B3Lg3IONoMSegtInWJK2FiO9oOAuf75vjSaID+lAEiRNfxofU8dQ9DV3hx
 GHWz8D1tHobPVRtUJg6geC+3td06dFyKSD9cBPoNO5T23dgxQNQ=
 =y4Cl
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2023-12-19' of https://evilpiepirate.org/git/bcachefs

Pull more bcachefs fixes from Kent Overstreet:

 - Fix a deadlock in the data move path with nocow locks (vs. update in
   place writes); when trylock failed we were incorrectly waiting for in
   flight ios to flush.

 - Fix reporting of NFS file handle length

 - Fix early error path in bch2_fs_alloc() - list head wasn't being
   initialized early enough

 - Make sure correct (hardware accelerated) crc modules get loaded

 - Fix a rare overflow in the btree split path, when the packed bkey
   format grows and all the keys have no value (LRU btree).

 - Fix error handling in the sector allocator

   This was causing writes to spuriously fail in multidevice setups, and
   another bug meant that the errors weren't being logged, only reported
   via fsync.

* tag 'bcachefs-2023-12-19' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix bch2_alloc_sectors_start_trans() error handling
  bcachefs; guard against overflow in btree node split
  bcachefs: btree_node_u64s_with_format() takes nr keys
  bcachefs: print explicit recovery pass message only once
  bcachefs: improve modprobe support by providing softdeps
  bcachefs: fix invalid memory access in bch2_fs_alloc() error path
  bcachefs: Fix determining required file handle length
  bcachefs: Fix nocow locks deadlock
This commit is contained in:
Linus Torvalds 2023-12-20 11:24:28 -08:00
commit 74d8fc2b86
9 changed files with 70 additions and 28 deletions

View File

@ -1374,8 +1374,17 @@ retry:
goto alloc_done;
/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
goto allocate_blocking;
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, watermark,
flags, cl);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
goto alloc_done;
}
/*
* Only try to allocate cache (durability = 0 devices) from the
@ -1389,7 +1398,6 @@ retry:
&have_cache, watermark,
flags, cl);
} else {
allocate_blocking:
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,

View File

@ -3214,10 +3214,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
mempool_exit(&c->btree_trans_pool);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
void bch2_fs_btree_iter_init_early(struct bch_fs *c)
{
struct btree_transaction_stats *s;
int ret;
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
@ -3228,6 +3227,11 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
INIT_LIST_HEAD(&c->btree_trans_list);
seqmutex_init(&c->btree_trans_lock);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
int ret;
c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
if (!c->btree_trans_bufs)

View File

@ -938,6 +938,7 @@ unsigned bch2_trans_get_fn_idx(const char *);
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
void bch2_fs_btree_iter_exit(struct bch_fs *);
void bch2_fs_btree_iter_init_early(struct bch_fs *);
int bch2_fs_btree_iter_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_ITER_H */

View File

@ -99,7 +99,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
/* Calculate ideal packed bkey format for new btree nodes: */
void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
static void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
{
struct bkey_packed *k;
struct bset_tree *t;
@ -125,21 +125,20 @@ static struct bkey_format bch2_btree_calc_format(struct btree *b)
return bch2_bkey_format_done(&s);
}
static size_t btree_node_u64s_with_format(struct btree *b,
static size_t btree_node_u64s_with_format(struct btree_nr_keys nr,
struct bkey_format *old_f,
struct bkey_format *new_f)
{
struct bkey_format *old_f = &b->format;
/* stupid integer promotion rules */
ssize_t delta =
(((int) new_f->key_u64s - old_f->key_u64s) *
(int) b->nr.packed_keys) +
(int) nr.packed_keys) +
(((int) new_f->key_u64s - BKEY_U64s) *
(int) b->nr.unpacked_keys);
(int) nr.unpacked_keys);
BUG_ON(delta + b->nr.live_u64s < 0);
BUG_ON(delta + nr.live_u64s < 0);
return b->nr.live_u64s + delta;
return nr.live_u64s + delta;
}
/**
@ -147,16 +146,18 @@ static size_t btree_node_u64s_with_format(struct btree *b,
*
* @c: filesystem handle
* @b: btree node to rewrite
* @nr: number of keys for new node (i.e. b->nr)
* @new_f: bkey format to translate keys to
*
* Returns: true if all re-packed keys will be able to fit in a new node.
*
* Assumes all keys will successfully pack with the new format.
*/
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
struct btree_nr_keys nr,
struct bkey_format *new_f)
{
size_t u64s = btree_node_u64s_with_format(b, new_f);
size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
}
@ -391,7 +392,7 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
* The keys might expand with the new format - if they wouldn't fit in
* the btree node anymore, use the old format for now:
*/
if (!bch2_btree_node_format_fits(as->c, b, &format))
if (!bch2_btree_node_format_fits(as->c, b, b->nr, &format))
format = b->format;
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
@ -1345,8 +1346,11 @@ static void __btree_split_node(struct btree_update *as,
struct bkey_packed *out[2];
struct bkey uk;
unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5;
struct { unsigned nr_keys, val_u64s; } nr_keys[2];
int i;
memset(&nr_keys, 0, sizeof(nr_keys));
for (i = 0; i < 2; i++) {
BUG_ON(n[i]->nsets != 1);
@ -1368,6 +1372,9 @@ static void __btree_split_node(struct btree_update *as,
if (!i)
n1_pos = uk.p;
bch2_bkey_format_add_key(&format[i], &uk);
nr_keys[i].nr_keys++;
nr_keys[i].val_u64s += bkeyp_val_u64s(&b->format, k);
}
btree_set_min(n[0], b->data->min_key);
@ -1380,6 +1387,12 @@ static void __btree_split_node(struct btree_update *as,
bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key);
n[i]->data->format = bch2_bkey_format_done(&format[i]);
unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
nr_keys[i].val_u64s;
if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c))
n[i]->data->format = b->format;
btree_node_set_format(n[i], n[i]->data->format);
}
@ -1822,8 +1835,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_bkey_format_add_pos(&new_s, next->data->max_key);
new_f = bch2_bkey_format_done(&new_s);
sib_u64s = btree_node_u64s_with_format(b, &new_f) +
btree_node_u64s_with_format(m, &new_f);
sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) +
btree_node_u64s_with_format(m->nr, &m->format, &new_f);
if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);

View File

@ -6,10 +6,6 @@
#include "btree_locking.h"
#include "btree_update.h"
void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
struct bkey_format *);
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))

View File

@ -560,7 +560,8 @@ int bch2_data_update_init(struct btree_trans *trans,
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
!atomic_read(&ctxt->read_sectors));
(!atomic_read(&ctxt->read_sectors) &&
!atomic_read(&ctxt->write_sectors)));
if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks,

View File

@ -1143,24 +1143,33 @@ static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_inode_info *dir = to_bch_ei(vdir);
if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
return FILEID_INVALID;
int min_len;
if (!S_ISDIR(inode->v.i_mode) && dir) {
struct bcachefs_fid_with_parent *fid = (void *) fh;
min_len = sizeof(*fid) / sizeof(u32);
if (*len < min_len) {
*len = min_len;
return FILEID_INVALID;
}
fid->fid = bch2_inode_to_fid(inode);
fid->dir = bch2_inode_to_fid(dir);
*len = sizeof(*fid) / sizeof(u32);
*len = min_len;
return FILEID_BCACHEFS_WITH_PARENT;
} else {
struct bcachefs_fid *fid = (void *) fh;
min_len = sizeof(*fid) / sizeof(u32);
if (*len < min_len) {
*len = min_len;
return FILEID_INVALID;
}
*fid = bch2_inode_to_fid(inode);
*len = sizeof(*fid) / sizeof(u32);
*len = min_len;
return FILEID_BCACHEFS_WITHOUT_PARENT;
}
}

View File

@ -10,6 +10,9 @@ extern const char * const bch2_recovery_passes[];
static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
enum bch_recovery_pass pass)
{
if (c->recovery_passes_explicit & BIT_ULL(pass))
return 0;
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
bch2_recovery_passes[pass], pass,
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);

View File

@ -72,6 +72,12 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
MODULE_DESCRIPTION("bcachefs filesystem");
MODULE_SOFTDEP("pre: crc32c");
MODULE_SOFTDEP("pre: crc64");
MODULE_SOFTDEP("pre: sha256");
MODULE_SOFTDEP("pre: chacha20");
MODULE_SOFTDEP("pre: poly1305");
MODULE_SOFTDEP("pre: xxhash");
#define KTYPE(type) \
static const struct attribute_group type ## _group = { \
@ -714,6 +720,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_copygc_init(c);
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
bch2_fs_btree_iter_init_early(c);
bch2_fs_btree_interior_update_init_early(c);
bch2_fs_allocator_background_init(c);
bch2_fs_allocator_foreground_init(c);