diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 797deaf0ad2e..2894666bb77e 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -301,44 +301,6 @@ struct rw_aux_tree { struct bpos k; }; -/* - * BSET_CACHELINE was originally intended to match the hardware cacheline size - - * it used to be 64, but I realized the lookup code would touch slightly less - * memory if it was 128. - * - * It definites the number of bytes (in struct bset) per struct bkey_float in - * the auxiliar search tree - when we're done searching the bset_float tree we - * have this many bytes left that we do a linear search over. - * - * Since (after level 5) every level of the bset_tree is on a new cacheline, - * we're touching one fewer cacheline in the bset tree in exchange for one more - * cacheline in the linear search - but the linear search might stop before it - * gets to the second cacheline. - */ - -#define BSET_CACHELINE 128 - -/* Space required for the btree node keys */ -static inline size_t btree_keys_bytes(struct btree *b) -{ - return PAGE_SIZE << b->page_order; -} - -static inline size_t btree_keys_cachelines(struct btree *b) -{ - return btree_keys_bytes(b) / BSET_CACHELINE; -} - -static inline size_t btree_aux_data_bytes(struct btree *b) -{ - return btree_keys_cachelines(b) * 8; -} - -static inline size_t btree_aux_data_u64s(struct btree *b) -{ - return btree_aux_data_bytes(b) / sizeof(u64); -} - static unsigned bset_aux_tree_buf_end(const struct bset_tree *t) { BUG_ON(t->aux_data_offset == U16_MAX); @@ -414,24 +376,6 @@ static void bset_aux_tree_verify(struct btree *b) #endif } -/* Memory allocation */ - -void bch2_btree_keys_free(struct btree *b) -{ - kvfree(b->aux_data); - b->aux_data = NULL; -} - -int bch2_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp) -{ - b->page_order = page_order; - b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); - if (!b->aux_data) - return -ENOMEM; - - return 0; -} - void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks) { unsigned i; diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index a2e5e3ee68db..88f242191408 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -184,6 +184,38 @@ static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree } } +/* + * BSET_CACHELINE was originally intended to match the hardware cacheline size - + * it used to be 64, but I realized the lookup code would touch slightly less + * memory if it was 128. + * + * It definites the number of bytes (in struct bset) per struct bkey_float in + * the auxiliar search tree - when we're done searching the bset_float tree we + * have this many bytes left that we do a linear search over. + * + * Since (after level 5) every level of the bset_tree is on a new cacheline, + * we're touching one fewer cacheline in the bset tree in exchange for one more + * cacheline in the linear search - but the linear search might stop before it + * gets to the second cacheline. + */ + +#define BSET_CACHELINE 128 + +static inline size_t btree_keys_cachelines(struct btree *b) +{ + return (1U << b->byte_order) / BSET_CACHELINE; +} + +static inline size_t btree_aux_data_bytes(struct btree *b) +{ + return btree_keys_cachelines(b) * 8; +} + +static inline size_t btree_aux_data_u64s(struct btree *b) +{ + return btree_aux_data_bytes(b) / sizeof(u64); +} + typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *); static inline void @@ -334,8 +366,6 @@ static inline struct bset *bset_next_set(struct btree *b, return ((void *) i) + round_up(vstruct_bytes(i), block_bytes); } -void bch2_btree_keys_free(struct btree *); -int bch2_btree_keys_alloc(struct btree *, unsigned, gfp_t); void bch2_btree_keys_init(struct btree *, bool *); void bch2_bset_init_first(struct btree *, struct bset *); diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 6280110ba32b..829bff37df8d 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -44,7 +44,8 @@ static void __btree_node_data_free(struct bch_fs *c, struct btree *b) kvpfree(b->data, btree_bytes(c)); b->data = NULL; - bch2_btree_keys_free(b); + kvfree(b->aux_data); + b->aux_data = NULL; } static void btree_node_data_free(struct bch_fs *c, struct btree *b) @@ -72,7 +73,7 @@ static const struct rhashtable_params bch_btree_cache_params = { .obj_cmpfn = bch2_btree_cache_cmp_fn, }; -static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) +static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { BUG_ON(b->data || b->aux_data); @@ -80,7 +81,8 @@ static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) if (!b->data) return -ENOMEM; - if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp)) { + b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); + if (!b->aux_data) { kvpfree(b->data, btree_bytes(c)); b->data = NULL; return -ENOMEM; @@ -89,21 +91,9 @@ static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) return 0; } -static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) +static struct btree *__btree_node_mem_alloc(struct bch_fs *c) { - struct btree_cache *bc = &c->btree_cache; - - if (!__btree_node_data_alloc(c, b, gfp)) { - bc->used++; - list_move(&b->list, &bc->freeable); - } else { - list_move(&b->list, &bc->freed); - } -} - -static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) -{ - struct btree *b = kzalloc(sizeof(struct btree), gfp); + struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL); if (!b) return NULL; @@ -112,9 +102,25 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) lockdep_set_novalidate_class(&b->c.lock); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); + b->byte_order = ilog2(btree_bytes(c)); + return b; +} - btree_node_data_alloc(c, b, gfp); - return b->data ? b : NULL; +static struct btree *btree_node_mem_alloc(struct bch_fs *c) +{ + struct btree_cache *bc = &c->btree_cache; + struct btree *b = __btree_node_mem_alloc(c); + if (!b) + return NULL; + + if (btree_node_data_alloc(c, b, GFP_KERNEL)) { + kfree(b); + return NULL; + } + + bc->used++; + list_add(&b->list, &bc->freeable); + return b; } /* Btree in memory cache - hash table */ @@ -405,7 +411,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bch2_recalc_btree_reserve(c); for (i = 0; i < bc->reserve; i++) - if (!btree_node_mem_alloc(c, GFP_KERNEL)) { + if (!btree_node_mem_alloc(c)) { ret = -ENOMEM; goto out; } @@ -421,7 +427,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) goto out; } - c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL); + c->verify_data = btree_node_mem_alloc(c); if (!c->verify_data) { ret = -ENOMEM; goto out; @@ -553,21 +559,16 @@ got_node: mutex_unlock(&bc->lock); if (!b) { - b = kzalloc(sizeof(struct btree), GFP_KERNEL); + b = __btree_node_mem_alloc(c); if (!b) goto err; - bkey_btree_ptr_init(&b->key); - six_lock_init(&b->c.lock); - INIT_LIST_HEAD(&b->list); - INIT_LIST_HEAD(&b->write_blocked); - BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); } if (!b->data) { - if (__btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL)) + if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL)) goto err; mutex_lock(&bc->lock); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 2160012c734f..d0d3a85bb8be 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -79,14 +79,9 @@ static inline size_t btree_max_u64s(struct bch_fs *c) return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64); } -static inline size_t btree_page_order(struct bch_fs *c) -{ - return get_order(btree_bytes(c)); -} - static inline size_t btree_pages(struct bch_fs *c) { - return 1 << btree_page_order(c); + return btree_bytes(c) / PAGE_SIZE; } static inline unsigned btree_blocks(struct bch_fs *c) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index c8870a15a44f..f80b93a54c08 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -57,25 +57,25 @@ static void set_needs_whiteout(struct bset *i, int v) k->needs_whiteout = v; } -static void btree_bounce_free(struct bch_fs *c, unsigned order, +static void btree_bounce_free(struct bch_fs *c, size_t size, bool used_mempool, void *p) { if (used_mempool) mempool_free(p, &c->btree_bounce_pool); else - vpfree(p, PAGE_SIZE << order); + vpfree(p, size); } -static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, +static void *btree_bounce_alloc(struct bch_fs *c, size_t size, bool *used_mempool) { unsigned flags = memalloc_nofs_save(); void *p; - BUG_ON(order > btree_page_order(c)); + BUG_ON(size > btree_bytes(c)); *used_mempool = false; - p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order); + p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); @@ -125,16 +125,14 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) { struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k; bool used_mempool = false; - unsigned order; + size_t bytes = b->whiteout_u64s * sizeof(u64); if (!b->whiteout_u64s) return; - order = get_order(b->whiteout_u64s * sizeof(u64)); + new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); - new_whiteouts = btree_bounce_alloc(c, order, &used_mempool); - - ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order)); + ptrs = ptrs_end = ((void *) new_whiteouts + bytes); for (k = unwritten_whiteouts_start(c, b); k != unwritten_whiteouts_end(c, b); @@ -158,7 +156,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) memcpy_u64s(unwritten_whiteouts_start(c, b), new_whiteouts, b->whiteout_u64s); - btree_bounce_free(c, order, used_mempool, new_whiteouts); + btree_bounce_free(c, bytes, used_mempool, new_whiteouts); } static bool should_compact_bset(struct btree *b, struct bset_tree *t, @@ -187,7 +185,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, struct bkey_packed *whiteouts = NULL; struct bkey_packed *u_start, *u_pos; struct sort_iter sort_iter; - unsigned order, whiteout_u64s = 0, u64s; + unsigned bytes, whiteout_u64s = 0, u64s; bool used_mempool, compacting = false; BUG_ON(!btree_node_is_extents(b)); @@ -204,9 +202,9 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, sort_iter_init(&sort_iter, b); whiteout_u64s += b->whiteout_u64s; - order = get_order(whiteout_u64s * sizeof(u64)); + bytes = whiteout_u64s * sizeof(u64); - whiteouts = btree_bounce_alloc(c, order, &used_mempool); + whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); u_start = u_pos = whiteouts; memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b), @@ -306,7 +304,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, unwritten_whiteouts_end(c, b), true); - btree_bounce_free(c, order, used_mempool, whiteouts); + btree_bounce_free(c, bytes, used_mempool, whiteouts); bch2_btree_build_aux_trees(b); @@ -401,7 +399,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, struct bset *start_bset = bset(b, &b->set[start_idx]); bool used_mempool = false; u64 start_time, seq = 0; - unsigned i, u64s = 0, order, shift = end_idx - start_idx - 1; + unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1; bool sorting_entire_node = start_idx == 0 && end_idx == b->nsets; @@ -416,11 +414,11 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, btree_bkey_last(b, t)); } - order = sorting_entire_node - ? btree_page_order(c) - : get_order(__vstruct_bytes(struct btree_node, u64s)); + bytes = sorting_entire_node + ? btree_bytes(c) + : __vstruct_bytes(struct btree_node, u64s); - out = btree_bounce_alloc(c, order, &used_mempool); + out = btree_bounce_alloc(c, bytes, &used_mempool); start_time = local_clock(); @@ -435,7 +433,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, out->keys.u64s = cpu_to_le16(u64s); - BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order)); + BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes); if (sorting_entire_node) bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], @@ -449,7 +447,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, if (sorting_entire_node) { unsigned u64s = le16_to_cpu(out->keys.u64s); - BUG_ON(order != btree_page_order(c)); + BUG_ON(bytes != btree_bytes(c)); /* * Our temporary buffer is the same size as the btree node's @@ -484,7 +482,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, set_btree_bset_end(b, &b->set[start_idx]); bch2_bset_set_no_aux_tree(b, &b->set[start_idx]); - btree_bounce_free(c, order, used_mempool, out); + btree_bounce_free(c, bytes, used_mempool, out); bch2_verify_btree_nr_keys(b); } @@ -1043,7 +1041,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry BTREE_ERR_WANT_RETRY, c, b, NULL, "found bset signature after last bset"); - sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool); + sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); sorted->keys.u64s = 0; set_btree_bset(b, b->set, &b->data->keys); @@ -1061,7 +1059,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry BUG_ON(b->nr.live_u64s != u64s); - btree_bounce_free(c, btree_page_order(c), used_mempool, sorted); + btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { @@ -1403,7 +1401,7 @@ static void btree_node_write_work(struct work_struct *work) struct btree *b = wbio->wbio.bio.bi_private; btree_bounce_free(c, - wbio->wbio.order, + wbio->bytes, wbio->wbio.used_mempool, wbio->data); @@ -1486,7 +1484,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; - unsigned bytes_to_write, sectors_to_write, order, bytes, u64s; + unsigned bytes_to_write, sectors_to_write, bytes, u64s; u64 seq = 0; bool used_mempool; unsigned long old, new; @@ -1556,8 +1554,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, seq = max(seq, le64_to_cpu(i->journal_seq)); } - order = get_order(bytes); - data = btree_bounce_alloc(c, order, &used_mempool); + data = btree_bounce_alloc(c, bytes, &used_mempool); if (!b->written) { bn = data; @@ -1671,7 +1668,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct btree_write_bio, wbio.bio); wbio_init(&wbio->wbio.bio); wbio->data = data; - wbio->wbio.order = order; + wbio->bytes = bytes; wbio->wbio.used_mempool = used_mempool; wbio->wbio.bio.bi_end_io = btree_node_write_endio; wbio->wbio.bio.bi_private = b; @@ -1707,7 +1704,7 @@ err: set_btree_node_noevict(b); b->written += sectors_to_write; nowrite: - btree_bounce_free(c, order, used_mempool, data); + btree_bounce_free(c, bytes, used_mempool, data); btree_node_write_done(c, b); } diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index f3d7ec749b61..db013dc28eec 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -23,8 +23,9 @@ struct btree_read_bio { }; struct btree_write_bio { - void *data; struct work_struct work; + void *data; + unsigned bytes; struct bch_write_bio wbio; }; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index dd272318fba1..297cf26ca13e 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -94,7 +94,7 @@ struct btree { struct btree_nr_keys nr; u16 sib_u64s[2]; u16 whiteout_u64s; - u8 page_order; + u8 byte_order; u8 unpack_fn_len; /* diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h index 692af6dd6031..65969eeac253 100644 --- a/fs/bcachefs/io_types.h +++ b/fs/bcachefs/io_types.h @@ -79,7 +79,6 @@ struct bch_write_bio { u64 submit_time; struct bch_devs_list failed; - u8 order; u8 dev; unsigned split:1,