bcachefs: Mark superblocks transactionally

More work towards getting rid of the in memory struct bucket: this path
adds code for marking superblock and journal buckets via the btree, and
uses it in the device add and journal resize paths.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2021-01-22 17:56:34 -05:00 committed by Kent Overstreet
parent 9afc6652d1
commit bfcf840ddf
6 changed files with 214 additions and 50 deletions

View File

@ -323,48 +323,36 @@ err:
return ret;
}
int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
int bch2_alloc_write(struct bch_fs *c, unsigned flags)
{
struct btree_trans trans;
struct btree_iter *iter;
u64 first_bucket = ca->mi.first_bucket;
u64 nbuckets = ca->mi.nbuckets;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
POS(ca->dev_idx, first_bucket),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
while (iter->pos.offset < nbuckets) {
bch2_trans_cond_resched(&trans);
ret = bch2_alloc_write_key(&trans, iter, flags);
if (ret)
break;
bch2_btree_iter_next_slot(iter);
}
bch2_trans_exit(&trans);
return ret;
}
int bch2_alloc_write(struct bch_fs *c, unsigned flags)
{
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_member_device(ca, c, i) {
bch2_dev_alloc_write(c, ca, flags);
if (ret) {
percpu_ref_put(&ca->io_ref);
break;
bch2_btree_iter_set_pos(iter,
POS(ca->dev_idx, ca->mi.first_bucket));
while (iter->pos.offset < ca->mi.nbuckets) {
bch2_trans_cond_resched(&trans);
ret = bch2_alloc_write_key(&trans, iter, flags);
if (ret) {
percpu_ref_put(&ca->io_ref);
goto err;
}
bch2_btree_iter_next_slot(iter);
}
}
err:
bch2_trans_exit(&trans);
return ret;
}

View File

@ -98,7 +98,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *);
int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
int bch2_alloc_write(struct bch_fs *, unsigned);
void bch2_fs_allocator_background_init(struct bch_fs *);

View File

@ -2060,6 +2060,168 @@ int bch2_trans_mark_update(struct btree_trans *trans,
return ret;
}
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
struct bch_dev *ca, size_t b,
enum bch_data_type type,
unsigned sectors)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc *a;
struct bch_extent_ptr ptr = {
.dev = ca->dev_idx,
.offset = bucket_to_sector(ca, b),
};
int ret = 0;
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
return ret;
ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u);
if (ret)
return ret;
if (u.data_type && u.data_type != type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s",
iter->pos.inode, iter->pos.offset, u.gen,
bch2_data_types[u.data_type],
bch2_data_types[type],
bch2_data_types[type]);
ret = -EIO;
goto out;
}
if ((unsigned) (u.dirty_sectors + sectors) > ca->mi.bucket_size) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %llu:%llu gen %u data type %s sector count overflow: %u + %u > %u\n"
"while marking %s",
iter->pos.inode, iter->pos.offset, u.gen,
bch2_data_types[u.data_type ?: type],
u.dirty_sectors, sectors, ca->mi.bucket_size,
bch2_data_types[type]);
ret = -EIO;
goto out;
}
if (u.data_type == type &&
u.dirty_sectors == sectors)
goto out;
u.data_type = type;
u.dirty_sectors = sectors;
bkey_alloc_init(&a->k_i);
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
bch2_trans_update(trans, iter, &a->k_i, 0);
out:
bch2_trans_iter_put(trans, iter);
return ret;
}
int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
struct disk_reservation *res,
struct bch_dev *ca, size_t b,
enum bch_data_type type,
unsigned sectors)
{
return __bch2_trans_do(trans, res, NULL, 0,
__bch2_trans_mark_metadata_bucket(trans, ca, b, BCH_DATA_journal,
ca->mi.bucket_size));
}
static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
struct disk_reservation *res,
struct bch_dev *ca,
u64 start, u64 end,
enum bch_data_type type,
u64 *bucket, unsigned *bucket_sectors)
{
int ret;
do {
u64 b = sector_to_bucket(ca, start);
unsigned sectors =
min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
if (b != *bucket) {
if (*bucket_sectors) {
ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
*bucket, type, *bucket_sectors);
if (ret)
return ret;
}
*bucket = b;
*bucket_sectors = 0;
}
*bucket_sectors += sectors;
start += sectors;
} while (!ret && start < end);
return 0;
}
static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
struct disk_reservation *res,
struct bch_dev *ca)
{
struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
u64 bucket = 0;
unsigned i, bucket_sectors = 0;
int ret;
for (i = 0; i < layout->nr_superblocks; i++) {
u64 offset = le64_to_cpu(layout->sb_offset[i]);
if (offset == BCH_SB_SECTOR) {
ret = bch2_trans_mark_metadata_sectors(trans, res, ca,
0, BCH_SB_SECTOR,
BCH_DATA_sb, &bucket, &bucket_sectors);
if (ret)
return ret;
}
ret = bch2_trans_mark_metadata_sectors(trans, res, ca, offset,
offset + (1 << layout->sb_max_size_bits),
BCH_DATA_sb, &bucket, &bucket_sectors);
if (ret)
return ret;
}
if (bucket_sectors) {
ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
bucket, BCH_DATA_sb, bucket_sectors);
if (ret)
return ret;
}
for (i = 0; i < ca->journal.nr; i++) {
ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
ca->journal.buckets[i],
BCH_DATA_journal, ca->mi.bucket_size);
if (ret)
return ret;
}
return 0;
}
int bch2_trans_mark_dev_sb(struct bch_fs *c,
struct disk_reservation *res,
struct bch_dev *ca)
{
return bch2_trans_do(c, res, NULL, 0,
__bch2_trans_mark_dev_sb(&trans, res, ca));
}
/* Disk reservations: */
#define SECTORS_CACHE 1024

View File

@ -259,6 +259,12 @@ int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
struct bkey_i *insert, unsigned);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *,
struct disk_reservation *, struct bch_dev *,
size_t, enum bch_data_type, unsigned);
int bch2_trans_mark_dev_sb(struct bch_fs *, struct disk_reservation *,
struct bch_dev *);
/* disk reservations: */
static inline void bch2_disk_reservation_put(struct bch_fs *c,

View File

@ -9,6 +9,7 @@
#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "buckets.h"
#include "journal.h"
#include "journal_io.h"
@ -823,18 +824,28 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB),
0);
if (!c || new_fs)
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB),
0);
if (c) {
spin_unlock(&c->journal.lock);
percpu_up_read(&c->mark_lock);
}
if (c && !new_fs)
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_trans_mark_metadata_bucket(&trans, NULL, ca,
bucket, BCH_DATA_journal,
ca->mi.bucket_size));
if (!new_fs)
bch2_open_bucket_put(c, ob);
if (ret)
goto err;
}
err:
bch2_sb_resize_journal(&ca->disk_sb,

View File

@ -1220,13 +1220,6 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
if (ret)
return ret;
if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
!percpu_u64_get(&ca->usage[0]->d[BCH_DATA_sb].buckets)) {
mutex_lock(&c->sb_lock);
bch2_mark_dev_superblock(ca->fs, ca, 0);
mutex_unlock(&c->sb_lock);
}
bch2_dev_sysfs_online(c, ca);
if (c->sb.nr_devices == 1)
@ -1600,7 +1593,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
* allocate the journal, reset all the marks, then remark after we
* attach...
*/
bch2_mark_dev_superblock(ca->fs, ca, 0);
bch2_mark_dev_superblock(NULL, ca, 0);
err = "journal alloc failed";
ret = bch2_dev_journal_alloc(ca);
@ -1659,15 +1652,13 @@ have_slot:
ca->disk_sb.sb->dev_idx = dev_idx;
bch2_dev_attach(c, ca, dev_idx);
bch2_mark_dev_superblock(c, ca, 0);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
err = "alloc write failed";
ret = bch2_dev_alloc_write(c, ca, 0);
err = "error marking superblock";
ret = bch2_trans_mark_dev_sb(c, NULL, ca);
if (ret)
goto err;
goto err_late;
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
err = __bch2_dev_read_write(c, ca);
@ -1688,6 +1679,7 @@ err:
bch_err(c, "Unable to add device: %s", err);
return ret;
err_late:
up_write(&c->state_lock);
bch_err(c, "Error going rw after adding device: %s", err);
return -EINVAL;
}
@ -1723,6 +1715,12 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
ca = bch_dev_locked(c, dev_idx);
if (bch2_trans_mark_dev_sb(c, NULL, ca)) {
err = "bch2_trans_mark_dev_sb() error";
goto err;
}
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
err = __bch2_dev_read_write(c, ca);
if (err)