More bcachefs bugfixes for 6.7
Bigger/user visible fixes: - bcache & bcachefs were broken with CFI enabled; patch for closures to fix type punning - mark erasure coding as extra-experimental; there are incompatible disk space accounting changes coming for erasure coding, and I'm still seeing checksum errors in some tests - several fixes for durability-related issues (durability is a device specific setting where we can tell bcachefs that data on a given device should be counted as replicated x times ) - a fix for a rare livelock when a btree node merge then updates a parent node that is almost full - fix a race in the device removal path, where dropping a pointer in a btree node to a device would be clobbered by an in flight btree write updating the btree node key on completion - fix one SRCU lock hold time warning in the btree gc code - ther's still a bunch more of these to fix - fix a rare race where we'd start copygc before initializing the "are we rw" percpu refcount; copygc would think we were already ro and die immediately https://evilpiepirate.org/~testdashboard/ci?branch=bcachefs-for-upstream -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmVnoHoACgkQE6szbY3K bnbzLBAApVEg3kB3XDCHYw+8AxLbzkuKbuV8FR/w+ULYAmRKbnM5e4pM4UJzwVJ9 vzBS9KUT4mVNpA5zl7FWmqh5AiJkhbPgb/BijtQiS+gz1ofZ8uCW/DjzWZpaTaT9 0zz9auiKwzJbBmLXC2lWC28MUPjFNXxlP2pfQPqhpKqlGKBC893hKeJ0Veb6dM1R DqkctoWtSQzsNpEaXiQpKBNoNUIlYcFX1XXHn+XpPpWNe80SpMfVNCs2qPkMByu/ V/QULE9cHI7RTu7oyFY80+9xQDeXDDYZgvtpD7hqNPcyyoix+r/DVz1mZe41XF2B bvaJhfcdWePctmiuEXJVXT4HSkwwzC6EKHwi7fejGY56hOvsrEAxNzTEIPRNw5st ZkZlxASwFqkiJ3ehy+KRngLX2GZSbJsU4aM5ViQJKtz4rBzGyyf0LmMucdxAoDH5 zLzsAYaA6FkIZ5e5ZNdTDj7/TMnKWXlU9vTttqIpb8s7qSy+3ejk5NuGitJihZ4R LAaCTs1JIsItLP47Ko0ZvmKV6CHlmt+Ht8OBqu73BWJ8vsBTQ8JMK4mGt60bwHvm LdEMtp3C3FmXFc06zhKoGgjrletZYO6G4mFBPnQqh1brfFXM1prVg3ftDTqBWkMI iAz2chiVc8k0qxoSAqylCYFaGzgiBKzw6YMtqPRmZgfLcq/sJ34= =vN+y -----END PGP SIGNATURE----- Merge tag 'bcachefs-2023-11-29' of https://evilpiepirate.org/git/bcachefs Pull more bcachefs bugfixes from Kent Overstreet: - bcache & bcachefs were broken with CFI enabled; patch for closures to fix type punning - mark erasure coding as extra-experimental; there are incompatible disk space accounting changes coming for erasure coding, and I'm still seeing checksum errors in some tests - several fixes for durability-related issues (durability is a device specific setting where we can tell bcachefs that data on a given device should be counted as replicated x times) - a fix for a rare livelock when a btree node merge then updates a parent node that is almost full - fix a race in the device removal path, where dropping a pointer in a btree node to a device would be clobbered by an in flight btree write updating the btree node key on completion - fix one SRCU lock hold time warning in the btree gc code - ther's still a bunch more of these to fix - fix a rare race where we'd start copygc before initializing the "are we rw" percpu refcount; copygc would think we were already ro and die immediately * tag 'bcachefs-2023-11-29' of https://evilpiepirate.org/git/bcachefs: (23 commits) bcachefs: Extra kthread_should_stop() calls for copygc bcachefs: Convert gc_alloc_start() to for_each_btree_key2() bcachefs: Fix race between btree writes and metadata drop bcachefs: move journal seq assertion bcachefs: -EROFS doesn't count as move_extent_start_fail bcachefs: trace_move_extent_start_fail() now includes errcode bcachefs: Fix split_race livelock bcachefs: Fix bucket data type for stripe buckets bcachefs: Add missing validation for jset_entry_data_usage bcachefs: Fix zstd compress workspace size bcachefs: bpos is misaligned on big endian bcachefs: Fix ec + durability calculation bcachefs: Data update path won't accidentaly grow replicas bcachefs: deallocate_extra_replicas() bcachefs: Proper refcounting for journal_keys bcachefs: preserve device path as device name bcachefs: Fix an endianness conversion bcachefs: Start gc, copygc, rebalance threads after initing writes ref bcachefs: Don't stop copygc thread on device resize bcachefs: Make sure bch2_move_ratelimit() also waits for move_ops ...
This commit is contained in:
commit
e6861be452
@ -293,16 +293,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
|
||||
w->journal = NULL;
|
||||
}
|
||||
|
||||
static void btree_node_write_unlock(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(btree_node_write_unlock)
|
||||
{
|
||||
struct btree *b = container_of(cl, struct btree, io);
|
||||
closure_type(b, struct btree, io);
|
||||
|
||||
up(&b->io_mutex);
|
||||
}
|
||||
|
||||
static void __btree_node_write_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(__btree_node_write_done)
|
||||
{
|
||||
struct btree *b = container_of(cl, struct btree, io);
|
||||
closure_type(b, struct btree, io);
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
|
||||
bch_bbio_free(b->bio, b->c);
|
||||
@ -315,12 +315,12 @@ static void __btree_node_write_done(struct closure *cl)
|
||||
closure_return_with_destructor(cl, btree_node_write_unlock);
|
||||
}
|
||||
|
||||
static void btree_node_write_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(btree_node_write_done)
|
||||
{
|
||||
struct btree *b = container_of(cl, struct btree, io);
|
||||
closure_type(b, struct btree, io);
|
||||
|
||||
bio_free_pages(b->bio);
|
||||
__btree_node_write_done(cl);
|
||||
__btree_node_write_done(&cl->work);
|
||||
}
|
||||
|
||||
static void btree_node_write_endio(struct bio *bio)
|
||||
|
@ -723,11 +723,11 @@ static void journal_write_endio(struct bio *bio)
|
||||
closure_put(&w->c->journal.io);
|
||||
}
|
||||
|
||||
static void journal_write(struct closure *cl);
|
||||
static CLOSURE_CALLBACK(journal_write);
|
||||
|
||||
static void journal_write_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(journal_write_done)
|
||||
{
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
closure_type(j, struct journal, io);
|
||||
struct journal_write *w = (j->cur == j->w)
|
||||
? &j->w[1]
|
||||
: &j->w[0];
|
||||
@ -736,19 +736,19 @@ static void journal_write_done(struct closure *cl)
|
||||
continue_at_nobarrier(cl, journal_write, bch_journal_wq);
|
||||
}
|
||||
|
||||
static void journal_write_unlock(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(journal_write_unlock)
|
||||
__releases(&c->journal.lock)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
|
||||
closure_type(c, struct cache_set, journal.io);
|
||||
|
||||
c->journal.io_in_flight = 0;
|
||||
spin_unlock(&c->journal.lock);
|
||||
}
|
||||
|
||||
static void journal_write_unlocked(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(journal_write_unlocked)
|
||||
__releases(c->journal.lock)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
|
||||
closure_type(c, struct cache_set, journal.io);
|
||||
struct cache *ca = c->cache;
|
||||
struct journal_write *w = c->journal.cur;
|
||||
struct bkey *k = &c->journal.key;
|
||||
@ -823,12 +823,12 @@ static void journal_write_unlocked(struct closure *cl)
|
||||
continue_at(cl, journal_write_done, NULL);
|
||||
}
|
||||
|
||||
static void journal_write(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(journal_write)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
|
||||
closure_type(c, struct cache_set, journal.io);
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
journal_write_unlocked(cl);
|
||||
journal_write_unlocked(&cl->work);
|
||||
}
|
||||
|
||||
static void journal_try_write(struct cache_set *c)
|
||||
|
@ -35,16 +35,16 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
|
||||
|
||||
/* Moving GC - IO loop */
|
||||
|
||||
static void moving_io_destructor(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(moving_io_destructor)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
closure_type(io, struct moving_io, cl);
|
||||
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
static void write_moving_finish(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(write_moving_finish)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
closure_type(io, struct moving_io, cl);
|
||||
struct bio *bio = &io->bio.bio;
|
||||
|
||||
bio_free_pages(bio);
|
||||
@ -89,9 +89,9 @@ static void moving_init(struct moving_io *io)
|
||||
bch_bio_map(bio, NULL);
|
||||
}
|
||||
|
||||
static void write_moving(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(write_moving)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
closure_type(io, struct moving_io, cl);
|
||||
struct data_insert_op *op = &io->op;
|
||||
|
||||
if (!op->status) {
|
||||
@ -113,9 +113,9 @@ static void write_moving(struct closure *cl)
|
||||
continue_at(cl, write_moving_finish, op->wq);
|
||||
}
|
||||
|
||||
static void read_moving_submit(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(read_moving_submit)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
closure_type(io, struct moving_io, cl);
|
||||
struct bio *bio = &io->bio.bio;
|
||||
|
||||
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
struct kmem_cache *bch_search_cache;
|
||||
|
||||
static void bch_data_insert_start(struct closure *cl);
|
||||
static CLOSURE_CALLBACK(bch_data_insert_start);
|
||||
|
||||
static unsigned int cache_mode(struct cached_dev *dc)
|
||||
{
|
||||
@ -55,9 +55,9 @@ static void bio_csum(struct bio *bio, struct bkey *k)
|
||||
|
||||
/* Insert data into cache */
|
||||
|
||||
static void bch_data_insert_keys(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch_data_insert_keys)
|
||||
{
|
||||
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
|
||||
closure_type(op, struct data_insert_op, cl);
|
||||
atomic_t *journal_ref = NULL;
|
||||
struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
|
||||
int ret;
|
||||
@ -136,9 +136,9 @@ out:
|
||||
continue_at(cl, bch_data_insert_keys, op->wq);
|
||||
}
|
||||
|
||||
static void bch_data_insert_error(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch_data_insert_error)
|
||||
{
|
||||
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
|
||||
closure_type(op, struct data_insert_op, cl);
|
||||
|
||||
/*
|
||||
* Our data write just errored, which means we've got a bunch of keys to
|
||||
@ -163,7 +163,7 @@ static void bch_data_insert_error(struct closure *cl)
|
||||
|
||||
op->insert_keys.top = dst;
|
||||
|
||||
bch_data_insert_keys(cl);
|
||||
bch_data_insert_keys(&cl->work);
|
||||
}
|
||||
|
||||
static void bch_data_insert_endio(struct bio *bio)
|
||||
@ -184,9 +184,9 @@ static void bch_data_insert_endio(struct bio *bio)
|
||||
bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
|
||||
}
|
||||
|
||||
static void bch_data_insert_start(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch_data_insert_start)
|
||||
{
|
||||
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
|
||||
closure_type(op, struct data_insert_op, cl);
|
||||
struct bio *bio = op->bio, *n;
|
||||
|
||||
if (op->bypass)
|
||||
@ -305,16 +305,16 @@ err:
|
||||
* If op->bypass is true, instead of inserting the data it invalidates the
|
||||
* region of the cache represented by op->bio and op->inode.
|
||||
*/
|
||||
void bch_data_insert(struct closure *cl)
|
||||
CLOSURE_CALLBACK(bch_data_insert)
|
||||
{
|
||||
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
|
||||
closure_type(op, struct data_insert_op, cl);
|
||||
|
||||
trace_bcache_write(op->c, op->inode, op->bio,
|
||||
op->writeback, op->bypass);
|
||||
|
||||
bch_keylist_init(&op->insert_keys);
|
||||
bio_get(op->bio);
|
||||
bch_data_insert_start(cl);
|
||||
bch_data_insert_start(&cl->work);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -575,9 +575,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
|
||||
return n == bio ? MAP_DONE : MAP_CONTINUE;
|
||||
}
|
||||
|
||||
static void cache_lookup(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cache_lookup)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, iop.cl);
|
||||
closure_type(s, struct search, iop.cl);
|
||||
struct bio *bio = &s->bio.bio;
|
||||
struct cached_dev *dc;
|
||||
int ret;
|
||||
@ -698,9 +698,9 @@ static void do_bio_hook(struct search *s,
|
||||
bio_cnt_set(bio, 3);
|
||||
}
|
||||
|
||||
static void search_free(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(search_free)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
|
||||
atomic_dec(&s->iop.c->search_inflight);
|
||||
|
||||
@ -749,20 +749,20 @@ static inline struct search *search_alloc(struct bio *bio,
|
||||
|
||||
/* Cached devices */
|
||||
|
||||
static void cached_dev_bio_complete(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_bio_complete)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
|
||||
|
||||
cached_dev_put(dc);
|
||||
search_free(cl);
|
||||
search_free(&cl->work);
|
||||
}
|
||||
|
||||
/* Process reads */
|
||||
|
||||
static void cached_dev_read_error_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_read_error_done)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
|
||||
if (s->iop.replace_collision)
|
||||
bch_mark_cache_miss_collision(s->iop.c, s->d);
|
||||
@ -770,12 +770,12 @@ static void cached_dev_read_error_done(struct closure *cl)
|
||||
if (s->iop.bio)
|
||||
bio_free_pages(s->iop.bio);
|
||||
|
||||
cached_dev_bio_complete(cl);
|
||||
cached_dev_bio_complete(&cl->work);
|
||||
}
|
||||
|
||||
static void cached_dev_read_error(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_read_error)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct bio *bio = &s->bio.bio;
|
||||
|
||||
/*
|
||||
@ -801,9 +801,9 @@ static void cached_dev_read_error(struct closure *cl)
|
||||
continue_at(cl, cached_dev_read_error_done, NULL);
|
||||
}
|
||||
|
||||
static void cached_dev_cache_miss_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_cache_miss_done)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct bcache_device *d = s->d;
|
||||
|
||||
if (s->iop.replace_collision)
|
||||
@ -812,13 +812,13 @@ static void cached_dev_cache_miss_done(struct closure *cl)
|
||||
if (s->iop.bio)
|
||||
bio_free_pages(s->iop.bio);
|
||||
|
||||
cached_dev_bio_complete(cl);
|
||||
cached_dev_bio_complete(&cl->work);
|
||||
closure_put(&d->cl);
|
||||
}
|
||||
|
||||
static void cached_dev_read_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_read_done)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
|
||||
|
||||
/*
|
||||
@ -858,9 +858,9 @@ static void cached_dev_read_done(struct closure *cl)
|
||||
continue_at(cl, cached_dev_cache_miss_done, NULL);
|
||||
}
|
||||
|
||||
static void cached_dev_read_done_bh(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_read_done_bh)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
|
||||
|
||||
bch_mark_cache_accounting(s->iop.c, s->d,
|
||||
@ -955,13 +955,13 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
|
||||
|
||||
/* Process writes */
|
||||
|
||||
static void cached_dev_write_complete(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_write_complete)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
|
||||
|
||||
up_read_non_owner(&dc->writeback_lock);
|
||||
cached_dev_bio_complete(cl);
|
||||
cached_dev_bio_complete(&cl->work);
|
||||
}
|
||||
|
||||
static void cached_dev_write(struct cached_dev *dc, struct search *s)
|
||||
@ -1048,9 +1048,9 @@ insert_data:
|
||||
continue_at(cl, cached_dev_write_complete, NULL);
|
||||
}
|
||||
|
||||
static void cached_dev_nodata(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_nodata)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
struct bio *bio = &s->bio.bio;
|
||||
|
||||
if (s->iop.flush_journal)
|
||||
@ -1265,9 +1265,9 @@ static int flash_dev_cache_miss(struct btree *b, struct search *s,
|
||||
return MAP_CONTINUE;
|
||||
}
|
||||
|
||||
static void flash_dev_nodata(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(flash_dev_nodata)
|
||||
{
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
closure_type(s, struct search, cl);
|
||||
|
||||
if (s->iop.flush_journal)
|
||||
bch_journal_meta(s->iop.c, cl);
|
||||
|
@ -34,7 +34,7 @@ struct data_insert_op {
|
||||
};
|
||||
|
||||
unsigned int bch_get_congested(const struct cache_set *c);
|
||||
void bch_data_insert(struct closure *cl);
|
||||
CLOSURE_CALLBACK(bch_data_insert);
|
||||
|
||||
void bch_cached_dev_request_init(struct cached_dev *dc);
|
||||
void cached_dev_submit_bio(struct bio *bio);
|
||||
|
@ -327,9 +327,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
static void bch_write_bdev_super_unlock(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch_write_bdev_super_unlock)
|
||||
{
|
||||
struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
|
||||
closure_type(dc, struct cached_dev, sb_write);
|
||||
|
||||
up(&dc->sb_write_mutex);
|
||||
}
|
||||
@ -363,9 +363,9 @@ static void write_super_endio(struct bio *bio)
|
||||
closure_put(&ca->set->sb_write);
|
||||
}
|
||||
|
||||
static void bcache_write_super_unlock(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bcache_write_super_unlock)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, sb_write);
|
||||
closure_type(c, struct cache_set, sb_write);
|
||||
|
||||
up(&c->sb_write_mutex);
|
||||
}
|
||||
@ -407,9 +407,9 @@ static void uuid_endio(struct bio *bio)
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static void uuid_io_unlock(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(uuid_io_unlock)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
|
||||
closure_type(c, struct cache_set, uuid_write);
|
||||
|
||||
up(&c->uuid_write_mutex);
|
||||
}
|
||||
@ -1344,9 +1344,9 @@ void bch_cached_dev_release(struct kobject *kobj)
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static void cached_dev_free(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_free)
|
||||
{
|
||||
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
|
||||
closure_type(dc, struct cached_dev, disk.cl);
|
||||
|
||||
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
|
||||
cancel_writeback_rate_update_dwork(dc);
|
||||
@ -1378,9 +1378,9 @@ static void cached_dev_free(struct closure *cl)
|
||||
kobject_put(&dc->disk.kobj);
|
||||
}
|
||||
|
||||
static void cached_dev_flush(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cached_dev_flush)
|
||||
{
|
||||
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
|
||||
closure_type(dc, struct cached_dev, disk.cl);
|
||||
struct bcache_device *d = &dc->disk;
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
@ -1499,9 +1499,9 @@ void bch_flash_dev_release(struct kobject *kobj)
|
||||
kfree(d);
|
||||
}
|
||||
|
||||
static void flash_dev_free(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(flash_dev_free)
|
||||
{
|
||||
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
|
||||
closure_type(d, struct bcache_device, cl);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
atomic_long_sub(bcache_dev_sectors_dirty(d),
|
||||
@ -1512,9 +1512,9 @@ static void flash_dev_free(struct closure *cl)
|
||||
kobject_put(&d->kobj);
|
||||
}
|
||||
|
||||
static void flash_dev_flush(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(flash_dev_flush)
|
||||
{
|
||||
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
|
||||
closure_type(d, struct bcache_device, cl);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
bcache_device_unlink(d);
|
||||
@ -1670,9 +1670,9 @@ void bch_cache_set_release(struct kobject *kobj)
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static void cache_set_free(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cache_set_free)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, cl);
|
||||
closure_type(c, struct cache_set, cl);
|
||||
struct cache *ca;
|
||||
|
||||
debugfs_remove(c->debug);
|
||||
@ -1711,9 +1711,9 @@ static void cache_set_free(struct closure *cl)
|
||||
kobject_put(&c->kobj);
|
||||
}
|
||||
|
||||
static void cache_set_flush(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(cache_set_flush)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, caching);
|
||||
closure_type(c, struct cache_set, caching);
|
||||
struct cache *ca = c->cache;
|
||||
struct btree *b;
|
||||
|
||||
@ -1808,9 +1808,9 @@ static void conditional_stop_bcache_device(struct cache_set *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void __cache_set_unregister(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(__cache_set_unregister)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, caching);
|
||||
closure_type(c, struct cache_set, caching);
|
||||
struct cached_dev *dc;
|
||||
struct bcache_device *d;
|
||||
size_t i;
|
||||
|
@ -341,16 +341,16 @@ static void dirty_init(struct keybuf_key *w)
|
||||
bch_bio_map(bio, NULL);
|
||||
}
|
||||
|
||||
static void dirty_io_destructor(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(dirty_io_destructor)
|
||||
{
|
||||
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
|
||||
closure_type(io, struct dirty_io, cl);
|
||||
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
static void write_dirty_finish(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(write_dirty_finish)
|
||||
{
|
||||
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
|
||||
closure_type(io, struct dirty_io, cl);
|
||||
struct keybuf_key *w = io->bio.bi_private;
|
||||
struct cached_dev *dc = io->dc;
|
||||
|
||||
@ -400,9 +400,9 @@ static void dirty_endio(struct bio *bio)
|
||||
closure_put(&io->cl);
|
||||
}
|
||||
|
||||
static void write_dirty(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(write_dirty)
|
||||
{
|
||||
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
|
||||
closure_type(io, struct dirty_io, cl);
|
||||
struct keybuf_key *w = io->bio.bi_private;
|
||||
struct cached_dev *dc = io->dc;
|
||||
|
||||
@ -462,9 +462,9 @@ static void read_dirty_endio(struct bio *bio)
|
||||
dirty_endio(bio);
|
||||
}
|
||||
|
||||
static void read_dirty_submit(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(read_dirty_submit)
|
||||
{
|
||||
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
|
||||
closure_type(io, struct dirty_io, cl);
|
||||
|
||||
closure_bio_submit(io->dc->disk.c, &io->bio, cl);
|
||||
|
||||
|
@ -33,6 +33,18 @@ config BCACHEFS_QUOTA
|
||||
depends on BCACHEFS_FS
|
||||
select QUOTACTL
|
||||
|
||||
config BCACHEFS_ERASURE_CODING
|
||||
bool "bcachefs erasure coding (RAID5/6) support (EXPERIMENTAL)"
|
||||
depends on BCACHEFS_FS
|
||||
select QUOTACTL
|
||||
help
|
||||
This enables the "erasure_code" filesysystem and inode option, which
|
||||
organizes data into reed-solomon stripes instead of ordinary
|
||||
replication.
|
||||
|
||||
WARNING: this feature is still undergoing on disk format changes, and
|
||||
should only be enabled for testing purposes.
|
||||
|
||||
config BCACHEFS_POSIX_ACL
|
||||
bool "bcachefs POSIX ACL support"
|
||||
depends on BCACHEFS_FS
|
||||
|
@ -1297,6 +1297,30 @@ out:
|
||||
return wp;
|
||||
}
|
||||
|
||||
static noinline void
|
||||
deallocate_extra_replicas(struct bch_fs *c,
|
||||
struct open_buckets *ptrs,
|
||||
struct open_buckets *ptrs_no_use,
|
||||
unsigned extra_replicas)
|
||||
{
|
||||
struct open_buckets ptrs2 = { 0 };
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, ptrs, ob, i) {
|
||||
unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability;
|
||||
|
||||
if (d && d <= extra_replicas) {
|
||||
extra_replicas -= d;
|
||||
ob_push(c, ptrs_no_use, ob);
|
||||
} else {
|
||||
ob_push(c, &ptrs2, ob);
|
||||
}
|
||||
}
|
||||
|
||||
*ptrs = ptrs2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get us an open_bucket we can allocate from, return with it locked:
|
||||
*/
|
||||
@ -1321,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
|
||||
erasure_code = false;
|
||||
|
||||
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
|
||||
|
||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||
@ -1382,6 +1409,9 @@ alloc_done:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (nr_effective > nr_replicas)
|
||||
deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
|
||||
|
||||
/* Free buckets we didn't use: */
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
open_bucket_free_unused(c, ob);
|
||||
|
@ -638,6 +638,8 @@ struct journal_keys {
|
||||
size_t gap;
|
||||
size_t nr;
|
||||
size_t size;
|
||||
atomic_t ref;
|
||||
bool initial_ref_held;
|
||||
};
|
||||
|
||||
struct btree_trans_buf {
|
||||
@ -929,7 +931,7 @@ struct bch_fs {
|
||||
mempool_t compression_bounce[2];
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
|
||||
mempool_t decompress_workspace;
|
||||
ZSTD_parameters zstd_params;
|
||||
size_t zstd_workspace_size;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_sync_skcipher *chacha20;
|
||||
|
@ -151,7 +151,11 @@ struct bpos {
|
||||
#else
|
||||
#error edit for your odd byteorder.
|
||||
#endif
|
||||
} __packed __aligned(4);
|
||||
} __packed
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
__aligned(4)
|
||||
#endif
|
||||
;
|
||||
|
||||
#define KEY_INODE_MAX ((__u64)~0ULL)
|
||||
#define KEY_OFFSET_MAX ((__u64)~0ULL)
|
||||
@ -1528,7 +1532,7 @@ struct bch_sb_field_disk_groups {
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_fail, 38) \
|
||||
x(move_extent_alloc_mem_fail, 39) \
|
||||
x(move_extent_start_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
x(gc_gens_end, 42) \
|
||||
|
@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
|
||||
rcu_assign_pointer(ca->buckets_gc, buckets);
|
||||
}
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ({
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = gc_bucket(ca, k.k->p.offset);
|
||||
|
||||
@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
|
||||
g->stripe = a->stripe;
|
||||
g->stripe_redundancy = a->stripe_redundancy;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
0;
|
||||
}));
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
if (ret)
|
||||
|
@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void btree_node_read_all_replicas_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
|
||||
{
|
||||
struct btree_node_read_all *ra =
|
||||
container_of(cl, struct btree_node_read_all, cl);
|
||||
closure_type(ra, struct btree_node_read_all, cl);
|
||||
struct bch_fs *c = ra->c;
|
||||
struct btree *b = ra->b;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
|
||||
|
||||
if (sync) {
|
||||
closure_sync(&ra->cl);
|
||||
btree_node_read_all_replicas_done(&ra->cl);
|
||||
btree_node_read_all_replicas_done(&ra->cl.work);
|
||||
} else {
|
||||
continue_at(&ra->cl, btree_node_read_all_replicas_done,
|
||||
c->io_complete_wq);
|
||||
|
@ -2981,7 +2981,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
||||
trans->fn_idx = fn_idx;
|
||||
trans->locking_wait.task = current;
|
||||
trans->journal_replay_not_finished =
|
||||
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
||||
unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
|
||||
atomic_inc_not_zero(&c->journal_keys.ref);
|
||||
closure_init_stack(&trans->ref);
|
||||
|
||||
s = btree_trans_stats(trans);
|
||||
@ -3098,6 +3099,9 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
kfree(trans->fs_usage_deltas);
|
||||
}
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished))
|
||||
bch2_journal_keys_put(c);
|
||||
|
||||
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
|
||||
mempool_free(trans->mem, &c->btree_trans_mem_pool);
|
||||
else
|
||||
|
@ -80,6 +80,8 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
unsigned iters = 0;
|
||||
struct journal_key *k;
|
||||
|
||||
BUG_ON(*idx > keys->nr);
|
||||
search:
|
||||
if (!*idx)
|
||||
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
||||
@ -189,10 +191,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
/* Since @keys was full, there was no gap: */
|
||||
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
|
||||
kvfree(keys->d);
|
||||
*keys = new_keys;
|
||||
keys->d = new_keys.d;
|
||||
keys->nr = new_keys.nr;
|
||||
keys->size = new_keys.size;
|
||||
|
||||
/* And now the gap is at the end: */
|
||||
keys->gap = keys->nr;
|
||||
keys->gap = keys->nr;
|
||||
}
|
||||
|
||||
journal_iters_move_gap(c, keys->gap, idx);
|
||||
@ -415,10 +419,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
cmp_int(l->journal_offset, r->journal_offset);
|
||||
}
|
||||
|
||||
void bch2_journal_keys_free(struct journal_keys *keys)
|
||||
void bch2_journal_keys_put(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key *i;
|
||||
|
||||
BUG_ON(atomic_read(&keys->ref) <= 0);
|
||||
|
||||
if (!atomic_dec_and_test(&keys->ref))
|
||||
return;
|
||||
|
||||
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
||||
keys->gap = keys->nr;
|
||||
|
||||
@ -429,6 +439,8 @@ void bch2_journal_keys_free(struct journal_keys *keys)
|
||||
kvfree(keys->d);
|
||||
keys->d = NULL;
|
||||
keys->nr = keys->gap = keys->size = 0;
|
||||
|
||||
bch2_journal_entries_free(c);
|
||||
}
|
||||
|
||||
static void __journal_keys_sort(struct journal_keys *keys)
|
||||
|
@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *,
|
||||
struct btree *);
|
||||
|
||||
void bch2_journal_keys_free(struct journal_keys *);
|
||||
void bch2_journal_keys_put(struct bch_fs *);
|
||||
|
||||
static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
|
||||
{
|
||||
if (c->journal_keys.initial_ref_held)
|
||||
bch2_journal_keys_put(c);
|
||||
c->journal_keys.initial_ref_held = false;
|
||||
}
|
||||
|
||||
void bch2_journal_entries_free(struct bch_fs *);
|
||||
|
||||
int bch2_journal_keys_sort(struct bch_fs *);
|
||||
|
@ -778,9 +778,9 @@ static void btree_interior_update_work(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
static void btree_update_set_nodes_written(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(btree_update_set_nodes_written)
|
||||
{
|
||||
struct btree_update *as = container_of(cl, struct btree_update, cl);
|
||||
closure_type(as, struct btree_update, cl);
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
@ -1071,8 +1071,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Always check for space for two keys, even if we won't have to
|
||||
* split at prior level - it might have been a merge instead:
|
||||
*/
|
||||
if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
|
||||
BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
|
||||
BKEY_BTREE_PTR_U64s_MAX * 2))
|
||||
break;
|
||||
|
||||
split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
|
||||
@ -2266,6 +2270,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
|
||||
|
||||
BUG_ON(!btree_node_hashed(b));
|
||||
|
||||
struct bch_extent_ptr *ptr;
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
|
||||
!bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
|
||||
|
||||
ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
|
||||
commit_flags, skip_triggers);
|
||||
out:
|
||||
|
@ -854,8 +854,12 @@ static int __mark_pointer(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
*dst_sectors += sectors;
|
||||
*bucket_data_type = *dirty_sectors || *cached_sectors
|
||||
? ptr_data_type : 0;
|
||||
|
||||
if (!*dirty_sectors && !*cached_sectors)
|
||||
*bucket_data_type = 0;
|
||||
else if (*bucket_data_type != BCH_DATA_stripe)
|
||||
*bucket_data_type = ptr_data_type;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2091,8 +2095,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
bucket_gens->first_bucket = ca->mi.first_bucket;
|
||||
bucket_gens->nbuckets = nbuckets;
|
||||
|
||||
bch2_copygc_stop(c);
|
||||
|
||||
if (resize) {
|
||||
down_write(&c->gc_lock);
|
||||
down_write(&ca->bucket_lock);
|
||||
|
@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
|
||||
*/
|
||||
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
|
||||
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
|
||||
ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
|
||||
zstd_cctx_workspace_bound(¶ms.cParams));
|
||||
ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
|
||||
|
||||
/*
|
||||
* ZSTD requires that when we decompress we pass in the exact
|
||||
@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
|
||||
size_t len = zstd_compress_cctx(ctx,
|
||||
dst + 4, dst_len - 4 - 7,
|
||||
src, src_len,
|
||||
&c->zstd_params);
|
||||
¶ms);
|
||||
if (zstd_is_error(len))
|
||||
return 0;
|
||||
|
||||
@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
size_t decompress_workspace_size = 0;
|
||||
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
|
||||
c->opts.encoded_extent_max);
|
||||
|
||||
/*
|
||||
* ZSTD is lying: if we allocate the size of the workspace it says it
|
||||
* requires, it returns memory allocation errors
|
||||
*/
|
||||
c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams);
|
||||
|
||||
struct {
|
||||
unsigned feature;
|
||||
enum bch_compression_type type;
|
||||
@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
|
||||
zlib_inflate_workspacesize(), },
|
||||
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
|
||||
zstd_cctx_workspace_bound(¶ms.cParams),
|
||||
c->zstd_workspace_size,
|
||||
zstd_dctx_workspace_bound() },
|
||||
}, *i;
|
||||
bool have_compressed = false;
|
||||
|
||||
c->zstd_params = params;
|
||||
|
||||
for (i = compression_types;
|
||||
i < compression_types + ARRAY_SIZE(compression_types);
|
||||
i++)
|
||||
|
@ -356,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update)
|
||||
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
|
||||
}
|
||||
|
||||
void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
struct data_update *update)
|
||||
{
|
||||
struct bch_fs *c = update->op.c;
|
||||
@ -436,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct data_update_opts data_opts)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *n;
|
||||
int ret;
|
||||
|
||||
n = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (data_opts.kill_ptrs) {
|
||||
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
|
||||
struct bch_extent_ptr *ptr;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
|
||||
data_opts.kill_ptrs ^= 1U << drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the new extent no longer has any pointers, bch2_extent_normalize()
|
||||
* will do the appropriate thing with it (turning it into a
|
||||
* KEY_TYPE_error key, or just a discard if it was a cached extent)
|
||||
*/
|
||||
bch2_extent_normalize(c, bkey_i_to_s(n));
|
||||
|
||||
/*
|
||||
* Since we're not inserting through an extent iterator
|
||||
* (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
|
||||
* we aren't using the extent overwrite path to delete, we're
|
||||
* just using the normal key deletion path:
|
||||
*/
|
||||
if (bkey_deleted(&n->k))
|
||||
n->k.size = 0;
|
||||
|
||||
return bch2_trans_relock(trans) ?:
|
||||
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
}
|
||||
|
||||
int bch2_data_update_init(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct moving_context *ctxt,
|
||||
struct data_update *m,
|
||||
struct write_point_specifier wp,
|
||||
@ -452,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
|
||||
unsigned ptrs_locked = 0;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&m->k);
|
||||
bch2_bkey_buf_reassemble(&m->k, c, k);
|
||||
@ -478,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
|
||||
|
||||
unsigned durability_have = 0, durability_removing = 0;
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
bool locked;
|
||||
@ -489,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
reserve_sectors += k.k->size;
|
||||
|
||||
m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
|
||||
} else if (!p.ptr.cached) {
|
||||
durability_removing += bch2_extent_ptr_desired_durability(c, &p);
|
||||
} else if (!p.ptr.cached &&
|
||||
!((1U << i) & m->data_opts.kill_ptrs)) {
|
||||
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
|
||||
durability_have += bch2_extent_ptr_durability(c, &p);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -529,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
i++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If current extent durability is less than io_opts.data_replicas,
|
||||
* we're not trying to rereplicate the extent up to data_replicas here -
|
||||
* unless extra_replicas was specified
|
||||
*
|
||||
* Increasing replication is an explicit operation triggered by
|
||||
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
|
||||
*/
|
||||
if (durability_have >= io_opts.data_replicas) {
|
||||
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
|
||||
m->data_opts.rewrite_ptrs = 0;
|
||||
/* if iter == NULL, it's just a promote */
|
||||
if (iter)
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
|
||||
goto done;
|
||||
}
|
||||
|
||||
m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
|
||||
m->data_opts.extra_replicas;
|
||||
m->op.nr_replicas_required = m->op.nr_replicas;
|
||||
|
||||
BUG_ON(!m->op.nr_replicas);
|
||||
|
||||
if (reserve_sectors) {
|
||||
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
|
||||
m->data_opts.extra_replicas
|
||||
@ -538,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
m->op.nr_replicas += m->data_opts.extra_replicas;
|
||||
m->op.nr_replicas_required = m->op.nr_replicas;
|
||||
if (bkey_extent_is_unwritten(k)) {
|
||||
bch2_update_unwritten_extent(trans, m);
|
||||
goto done;
|
||||
}
|
||||
|
||||
BUG_ON(!m->op.nr_replicas);
|
||||
|
||||
/* Special handling required: */
|
||||
if (bkey_extent_is_unwritten(k))
|
||||
return -BCH_ERR_unwritten_extent_update;
|
||||
return 0;
|
||||
err:
|
||||
i = 0;
|
||||
@ -560,6 +629,9 @@ err:
|
||||
bch2_bkey_buf_exit(&m->k, c);
|
||||
bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
|
||||
return ret;
|
||||
done:
|
||||
bch2_data_update_exit(m);
|
||||
return ret ?: -BCH_ERR_data_update_done;
|
||||
}
|
||||
|
||||
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
|
||||
|
@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
|
||||
void bch2_data_update_read_done(struct data_update *,
|
||||
struct bch_extent_crc_unpacked);
|
||||
|
||||
int bch2_extent_drop_ptrs(struct btree_trans *,
|
||||
struct btree_iter *,
|
||||
struct bkey_s_c,
|
||||
struct data_update_opts);
|
||||
|
||||
void bch2_data_update_exit(struct data_update *);
|
||||
void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
|
||||
int bch2_data_update_init(struct btree_trans *, struct moving_context *,
|
||||
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
|
||||
struct moving_context *,
|
||||
struct data_update *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts, struct data_update_opts,
|
||||
|
@ -162,7 +162,7 @@
|
||||
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
|
||||
x(BCH_ERR_fsck, fsck_repair_impossible) \
|
||||
x(0, restart_recovery) \
|
||||
x(0, unwritten_extent_update) \
|
||||
x(0, data_update_done) \
|
||||
x(EINVAL, device_state_not_allowed) \
|
||||
x(EINVAL, member_info_missing) \
|
||||
x(EINVAL, mismatched_block_size) \
|
||||
@ -210,6 +210,7 @@
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_members) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_replicas) \
|
||||
x(BCH_ERR_invalid_sb, invalid_replicas_entry) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_journal) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
|
||||
|
@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
return replicas;
|
||||
}
|
||||
|
||||
unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
|
||||
static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
|
||||
if (p->ptr.cached)
|
||||
return 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
return p->has_ec
|
||||
? p->ec.redundancy + 1
|
||||
: ca->mi.durability;
|
||||
}
|
||||
|
||||
return ca->mi.durability +
|
||||
(p->has_ec
|
||||
? p->ec.redundancy
|
||||
: 0);
|
||||
unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
|
||||
return __extent_ptr_durability(ca, p);
|
||||
}
|
||||
|
||||
unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
|
||||
if (p->ptr.cached)
|
||||
return 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_failed)
|
||||
return 0;
|
||||
|
||||
return ca->mi.durability +
|
||||
(p->has_ec
|
||||
? p->ec.redundancy
|
||||
: 0);
|
||||
return __extent_ptr_durability(ca, p);
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
|
||||
|
@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty)
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_dio_read_complete(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch2_dio_read_complete)
|
||||
{
|
||||
struct dio_read *dio = container_of(cl, struct dio_read, cl);
|
||||
closure_type(dio, struct dio_read, cl);
|
||||
|
||||
dio->req->ki_complete(dio->req, dio->ret);
|
||||
bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
|
||||
@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_flush_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch2_dio_write_flush_done)
|
||||
{
|
||||
struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
|
||||
closure_type(dio, struct dio_write, op.cl);
|
||||
struct bch_fs *c = dio->op.c;
|
||||
|
||||
closure_debug_destroy(cl);
|
||||
|
@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
|
||||
if (!first)
|
||||
seq_putc(seq, ':');
|
||||
first = false;
|
||||
seq_puts(seq, "/dev/");
|
||||
seq_puts(seq, ca->name);
|
||||
seq_puts(seq, ca->disk_sb.sb_name);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
||||
bio = &op->write.op.wbio.bio;
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
|
||||
|
||||
ret = bch2_data_update_init(trans, NULL, &op->write,
|
||||
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
opts,
|
||||
(struct data_update_opts) {
|
||||
|
@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running)
|
||||
__wp_update_state(wp, state);
|
||||
}
|
||||
|
||||
static void bch2_write_index(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch2_write_index)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
closure_type(op, struct bch_write_op, cl);
|
||||
struct write_point *wp = op->wp;
|
||||
struct workqueue_struct *wq = index_update_wq(op);
|
||||
unsigned long flags;
|
||||
@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op)
|
||||
bch2_nocow_write_convert_unwritten(op);
|
||||
}
|
||||
|
||||
static void bch2_nocow_write_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch2_nocow_write_done)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
closure_type(op, struct bch_write_op, cl);
|
||||
|
||||
__bch2_nocow_write_done(op);
|
||||
bch2_write_done(cl);
|
||||
@ -1363,7 +1363,7 @@ err:
|
||||
op->insert_keys.top = op->insert_keys.keys;
|
||||
} else if (op->flags & BCH_WRITE_SYNC) {
|
||||
closure_sync(&op->cl);
|
||||
bch2_nocow_write_done(&op->cl);
|
||||
bch2_nocow_write_done(&op->cl.work);
|
||||
} else {
|
||||
/*
|
||||
* XXX
|
||||
@ -1566,9 +1566,9 @@ err:
|
||||
* If op->discard is true, instead of inserting the data it invalidates the
|
||||
* region of the cache represented by op->bio and op->inode.
|
||||
*/
|
||||
void bch2_write(struct closure *cl)
|
||||
CLOSURE_CALLBACK(bch2_write)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
closure_type(op, struct bch_write_op, cl);
|
||||
struct bio *bio = &op->wbio.bio;
|
||||
struct bch_fs *c = op->c;
|
||||
unsigned data_len;
|
||||
|
@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
op->devs_need_flush = NULL;
|
||||
}
|
||||
|
||||
void bch2_write(struct closure *);
|
||||
|
||||
CLOSURE_CALLBACK(bch2_write);
|
||||
void bch2_write_point_do_index_updates(struct work_struct *);
|
||||
|
||||
static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
||||
|
@ -321,6 +321,8 @@ static int journal_entry_open(struct journal *j)
|
||||
atomic64_inc(&j->seq);
|
||||
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
|
||||
|
||||
BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
|
||||
|
||||
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
|
||||
|
||||
bkey_extent_init(&buf->key);
|
||||
|
@ -136,9 +136,7 @@ static inline u64 journal_last_seq(struct journal *j)
|
||||
|
||||
static inline u64 journal_cur_seq(struct journal *j)
|
||||
{
|
||||
EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
|
||||
|
||||
return j->pin.back - 1;
|
||||
return atomic64_read(&j->seq);
|
||||
}
|
||||
|
||||
static inline u64 journal_last_unwritten_seq(struct journal *j)
|
||||
|
@ -547,6 +547,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes < sizeof(*u) ||
|
||||
@ -555,10 +556,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
journal_entry_data_usage_bad_size,
|
||||
"invalid journal entry usage: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
|
||||
c, version, jset, entry,
|
||||
journal_entry_data_usage_bad_size,
|
||||
"invalid journal entry usage: %s", err.buf)) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1025,10 +1035,9 @@ next_block:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_journal_read_device(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(bch2_journal_read_device)
|
||||
{
|
||||
struct journal_device *ja =
|
||||
container_of(cl, struct journal_device, read);
|
||||
closure_type(ja, struct journal_device, read);
|
||||
struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct journal_list *jlist =
|
||||
@ -1520,9 +1529,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
|
||||
return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
|
||||
}
|
||||
|
||||
static void journal_write_done(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(journal_write_done)
|
||||
{
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
closure_type(j, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bch_replicas_padded replicas;
|
||||
@ -1638,9 +1647,9 @@ static void journal_write_endio(struct bio *bio)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
static void do_journal_write(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(do_journal_write)
|
||||
{
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
closure_type(j, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
@ -1850,9 +1859,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_journal_write(struct closure *cl)
|
||||
CLOSURE_CALLBACK(bch2_journal_write)
|
||||
{
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
closure_type(j, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
|
@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
|
||||
|
||||
int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
|
||||
|
||||
void bch2_journal_write(struct closure *);
|
||||
CLOSURE_CALLBACK(bch2_journal_write);
|
||||
|
||||
#endif /* _BCACHEFS_JOURNAL_IO_H */
|
||||
|
@ -49,17 +49,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (trace_move_extent_alloc_mem_fail_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
trace_move_extent_alloc_mem_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
struct moving_io {
|
||||
struct list_head read_list;
|
||||
struct list_head io_list;
|
||||
@ -163,12 +152,18 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
|
||||
atomic_read(&ctxt->write_sectors) != sectors_pending);
|
||||
}
|
||||
|
||||
static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
|
||||
{
|
||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||
bch2_trans_unlock_long(ctxt->trans);
|
||||
closure_sync(&ctxt->cl);
|
||||
}
|
||||
|
||||
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||
{
|
||||
struct bch_fs *c = ctxt->trans->c;
|
||||
|
||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||
closure_sync(&ctxt->cl);
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
|
||||
EBUG_ON(atomic_read(&ctxt->write_sectors));
|
||||
EBUG_ON(atomic_read(&ctxt->write_ios));
|
||||
@ -223,49 +218,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
|
||||
scnprintf(stats->name, sizeof(stats->name), "%s", name);
|
||||
}
|
||||
|
||||
static int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct data_update_opts data_opts)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *n;
|
||||
int ret;
|
||||
|
||||
n = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (data_opts.kill_ptrs) {
|
||||
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
|
||||
struct bch_extent_ptr *ptr;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
|
||||
data_opts.kill_ptrs ^= 1U << drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the new extent no longer has any pointers, bch2_extent_normalize()
|
||||
* will do the appropriate thing with it (turning it into a
|
||||
* KEY_TYPE_error key, or just a discard if it was a cached extent)
|
||||
*/
|
||||
bch2_extent_normalize(c, bkey_i_to_s(n));
|
||||
|
||||
/*
|
||||
* Since we're not inserting through an extent iterator
|
||||
* (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
|
||||
* we aren't using the extent overwrite path to delete, we're
|
||||
* just using the normal key deletion path:
|
||||
*/
|
||||
if (bkey_deleted(&n->k))
|
||||
n->k.size = 0;
|
||||
|
||||
return bch2_trans_relock(trans) ?:
|
||||
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
}
|
||||
|
||||
int bch2_move_extent(struct moving_context *ctxt,
|
||||
struct move_bucket_in_flight *bucket_in_flight,
|
||||
struct btree_iter *iter,
|
||||
@ -335,19 +287,11 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
|
||||
ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
|
||||
io_opts, data_opts, iter->btree_id, k);
|
||||
if (ret && ret != -BCH_ERR_unwritten_extent_update)
|
||||
if (ret)
|
||||
goto err_free_pages;
|
||||
|
||||
if (ret == -BCH_ERR_unwritten_extent_update) {
|
||||
bch2_update_unwritten_extent(trans, &io->write);
|
||||
move_free(io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
io->write.op.end_io = move_write_done;
|
||||
|
||||
if (ctxt->rate)
|
||||
@ -391,8 +335,23 @@ err_free_pages:
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
|
||||
trace_move_extent_alloc_mem_fail2(c, k);
|
||||
if (ret == -BCH_ERR_data_update_done)
|
||||
return 0;
|
||||
|
||||
if (bch2_err_matches(ret, EROFS) ||
|
||||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]);
|
||||
if (trace_move_extent_start_fail_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_str(&buf, ": ");
|
||||
prt_str(&buf, bch2_err_str(ret));
|
||||
trace_move_extent_start_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -482,37 +441,30 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans,
|
||||
int bch2_move_ratelimit(struct moving_context *ctxt)
|
||||
{
|
||||
struct bch_fs *c = ctxt->trans->c;
|
||||
bool is_kthread = current->flags & PF_KTHREAD;
|
||||
u64 delay;
|
||||
|
||||
if (ctxt->wait_on_copygc && !c->copygc_running) {
|
||||
bch2_trans_unlock_long(ctxt->trans);
|
||||
if (ctxt->wait_on_copygc && c->copygc_running) {
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
wait_event_killable(c->copygc_running_wq,
|
||||
!c->copygc_running ||
|
||||
kthread_should_stop());
|
||||
(is_kthread && kthread_should_stop()));
|
||||
}
|
||||
|
||||
do {
|
||||
delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
|
||||
|
||||
|
||||
if (delay) {
|
||||
if (delay > HZ / 10)
|
||||
bch2_trans_unlock_long(ctxt->trans);
|
||||
else
|
||||
bch2_trans_unlock(ctxt->trans);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
if (is_kthread && kthread_should_stop())
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (delay)
|
||||
schedule_timeout(delay);
|
||||
move_ctxt_wait_event_timeout(ctxt,
|
||||
freezing(current) ||
|
||||
(is_kthread && kthread_should_stop()),
|
||||
delay);
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
try_to_freeze();
|
||||
}
|
||||
} while (delay);
|
||||
@ -683,6 +635,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
bool is_kthread = current->flags & PF_KTHREAD;
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree_iter iter;
|
||||
struct bkey_buf sk;
|
||||
@ -728,6 +681,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
}
|
||||
|
||||
while (!(ret = bch2_move_ratelimit(ctxt))) {
|
||||
if (is_kthread && kthread_should_stop())
|
||||
break;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, gen,
|
||||
|
@ -38,6 +38,25 @@ struct moving_context {
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \
|
||||
({ \
|
||||
int _ret = 0; \
|
||||
while (true) { \
|
||||
bool cond_finished = false; \
|
||||
bch2_moving_ctxt_do_pending_writes(_ctxt); \
|
||||
\
|
||||
if (_cond) \
|
||||
break; \
|
||||
bch2_trans_unlock_long((_ctxt)->trans); \
|
||||
_ret = __wait_event_timeout((_ctxt)->wait, \
|
||||
bch2_moving_ctxt_next_pending_write(_ctxt) || \
|
||||
(cond_finished = (_cond)), _timeout); \
|
||||
if (_ret || ( cond_finished)) \
|
||||
break; \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define move_ctxt_wait_event(_ctxt, _cond) \
|
||||
do { \
|
||||
bool cond_finished = false; \
|
||||
|
@ -207,7 +207,7 @@ static int bch2_copygc(struct moving_context *ctxt,
|
||||
goto err;
|
||||
|
||||
darray_for_each(buckets, i) {
|
||||
if (unlikely(freezing(current)))
|
||||
if (kthread_should_stop() || freezing(current))
|
||||
break;
|
||||
|
||||
f = move_bucket_in_flight_add(buckets_in_flight, *i);
|
||||
|
@ -167,6 +167,8 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
BUG_ON(!atomic_read(&keys->ref));
|
||||
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
@ -188,6 +190,9 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->opts.keep_journal)
|
||||
bch2_journal_keys_put_initial(c);
|
||||
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
j->replay_journal_seq = 0;
|
||||
|
||||
@ -909,10 +914,8 @@ out:
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
if (!c->opts.keep_journal &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) {
|
||||
bch2_journal_keys_free(&c->journal_keys);
|
||||
bch2_journal_entries_free(c);
|
||||
}
|
||||
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
bch2_journal_keys_put_initial(c);
|
||||
kfree(clean);
|
||||
|
||||
if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
|
||||
|
@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
|
||||
prt_printf(out, "]");
|
||||
}
|
||||
|
||||
int bch2_replicas_entry_validate(struct bch_replicas_entry *r,
|
||||
struct bch_sb *sb,
|
||||
struct printbuf *err)
|
||||
{
|
||||
if (!r->nr_devs) {
|
||||
prt_printf(err, "no devices in entry ");
|
||||
goto bad;
|
||||
}
|
||||
|
||||
if (r->nr_required > 1 &&
|
||||
r->nr_required >= r->nr_devs) {
|
||||
prt_printf(err, "bad nr_required in entry ");
|
||||
goto bad;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < r->nr_devs; i++)
|
||||
if (!bch2_dev_exists(sb, r->devs[i])) {
|
||||
prt_printf(err, "invalid device %u in entry ", r->devs[i]);
|
||||
goto bad;
|
||||
}
|
||||
|
||||
return 0;
|
||||
bad:
|
||||
bch2_replicas_entry_to_text(err, r);
|
||||
return -BCH_ERR_invalid_replicas_entry;
|
||||
}
|
||||
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *out,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu
|
||||
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
cpu_replicas_add_entry(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *old,
|
||||
struct bch_replicas_entry *new_entry)
|
||||
{
|
||||
unsigned i;
|
||||
@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
replicas_entry_bytes(new_entry)),
|
||||
};
|
||||
|
||||
for (i = 0; i < new_entry->nr_devs; i++)
|
||||
BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
|
||||
|
||||
BUG_ON(!new_entry->data_type);
|
||||
verify_replicas_entry(new_entry);
|
||||
|
||||
@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
|
||||
if (c->replicas_gc.entries &&
|
||||
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
|
||||
new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
|
||||
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
|
||||
if (!new_gc.entries) {
|
||||
ret = -BCH_ERR_ENOMEM_cpu_replicas;
|
||||
goto err;
|
||||
@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
}
|
||||
|
||||
if (!__replicas_has_entry(&c->replicas, new_entry)) {
|
||||
new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
|
||||
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
|
||||
if (!new_r.entries) {
|
||||
ret = -BCH_ERR_ENOMEM_cpu_replicas;
|
||||
goto err;
|
||||
@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
|
||||
if (idx < 0) {
|
||||
struct bch_replicas_cpu n;
|
||||
|
||||
n = cpu_replicas_add_entry(&c->replicas, r);
|
||||
n = cpu_replicas_add_entry(c, &c->replicas, r);
|
||||
if (!n.entries)
|
||||
return -BCH_ERR_ENOMEM_cpu_replicas;
|
||||
|
||||
@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
struct bch_sb *sb,
|
||||
struct printbuf *err)
|
||||
{
|
||||
unsigned i, j;
|
||||
unsigned i;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(cpu_r, i);
|
||||
|
||||
if (e->data_type >= BCH_DATA_NR) {
|
||||
prt_printf(err, "invalid data type in entry ");
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
|
||||
if (!e->nr_devs) {
|
||||
prt_printf(err, "no devices in entry ");
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
|
||||
if (e->nr_required > 1 &&
|
||||
e->nr_required >= e->nr_devs) {
|
||||
prt_printf(err, "bad nr_required in entry ");
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
|
||||
for (j = 0; j < e->nr_devs; j++)
|
||||
if (!bch2_dev_exists(sb, e->devs[j])) {
|
||||
prt_printf(err, "invalid device %u in entry ", e->devs[j]);
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
int ret = bch2_replicas_entry_validate(e, sb, err);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (i + 1 < cpu_r->nr) {
|
||||
struct bch_replicas_entry *n =
|
||||
|
@ -9,6 +9,8 @@
|
||||
void bch2_replicas_entry_sort(struct bch_replicas_entry *);
|
||||
void bch2_replicas_entry_to_text(struct printbuf *,
|
||||
struct bch_replicas_entry *);
|
||||
int bch2_replicas_entry_validate(struct bch_replicas_entry *,
|
||||
struct bch_sb *, struct printbuf *);
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
|
@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
|
||||
parent_id, id))
|
||||
goto err;
|
||||
|
||||
parent->v.children[i] = le32_to_cpu(child_id);
|
||||
parent->v.children[i] = cpu_to_le32(child_id);
|
||||
|
||||
normalize_snapshot_child_pointers(&parent->v);
|
||||
}
|
||||
|
@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
|
||||
if (!IS_ERR_OR_NULL(sb->bdev))
|
||||
blkdev_put(sb->bdev, sb->holder);
|
||||
kfree(sb->holder);
|
||||
kfree(sb->sb_name);
|
||||
|
||||
kfree(sb->sb);
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
@ -675,6 +676,10 @@ retry:
|
||||
if (!sb->holder)
|
||||
return -ENOMEM;
|
||||
|
||||
sb->sb_name = kstrdup(path, GFP_KERNEL);
|
||||
if (!sb->sb_name)
|
||||
return -ENOMEM;
|
||||
|
||||
#ifndef __KERNEL__
|
||||
if (opt_get(*opts, direct_io) == false)
|
||||
sb->mode |= BLK_OPEN_BUFFERED;
|
||||
|
@ -423,6 +423,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
set_bit(BCH_FS_RW, &c->flags);
|
||||
set_bit(BCH_FS_WAS_RW, &c->flags);
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_reinit(&c->writes);
|
||||
#else
|
||||
for (i = 0; i < BCH_WRITE_REF_NR; i++) {
|
||||
BUG_ON(atomic_long_read(&c->writes[i]));
|
||||
atomic_long_inc(&c->writes[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = bch2_gc_thread_start(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting gc thread");
|
||||
@ -439,24 +451,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
goto err;
|
||||
}
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_reinit(&c->writes);
|
||||
#else
|
||||
for (i = 0; i < BCH_WRITE_REF_NR; i++) {
|
||||
BUG_ON(atomic_long_read(&c->writes[i]));
|
||||
atomic_long_inc(&c->writes[i]);
|
||||
}
|
||||
#endif
|
||||
set_bit(BCH_FS_RW, &c->flags);
|
||||
set_bit(BCH_FS_WAS_RW, &c->flags);
|
||||
|
||||
bch2_do_discards(c);
|
||||
bch2_do_invalidates(c);
|
||||
bch2_do_stripe_deletes(c);
|
||||
bch2_do_pending_node_rewrites(c);
|
||||
return 0;
|
||||
err:
|
||||
__bch2_fs_read_only(c);
|
||||
if (test_bit(BCH_FS_RW, &c->flags))
|
||||
bch2_fs_read_only(c);
|
||||
else
|
||||
__bch2_fs_read_only(c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -504,8 +508,8 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_compress_exit(c);
|
||||
bch2_journal_keys_free(&c->journal_keys);
|
||||
bch2_journal_entries_free(c);
|
||||
bch2_journal_keys_put_initial(c);
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
bch2_fs_btree_write_buffer_exit(c);
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
free_percpu(c->online_reserved);
|
||||
@ -702,6 +706,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
atomic_set(&c->journal_keys.ref, 1);
|
||||
c->journal_keys.initial_ref_held = true;
|
||||
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_init(&c->times[i]);
|
||||
|
@ -5,6 +5,7 @@
|
||||
struct bch_sb_handle {
|
||||
struct bch_sb *sb;
|
||||
struct block_device *bdev;
|
||||
char *sb_name;
|
||||
struct bio *bio;
|
||||
void *holder;
|
||||
size_t buffer_size;
|
||||
|
@ -754,9 +754,9 @@ TRACE_EVENT(move_extent_fail,
|
||||
TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
DEFINE_EVENT(bkey, move_extent_start_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
TRACE_EVENT(move_data,
|
||||
|
@ -104,7 +104,7 @@
|
||||
|
||||
struct closure;
|
||||
struct closure_syncer;
|
||||
typedef void (closure_fn) (struct closure *);
|
||||
typedef void (closure_fn) (struct work_struct *);
|
||||
extern struct dentry *bcache_debug;
|
||||
|
||||
struct closure_waitlist {
|
||||
@ -254,7 +254,7 @@ static inline void closure_queue(struct closure *cl)
|
||||
INIT_WORK(&cl->work, cl->work.func);
|
||||
BUG_ON(!queue_work(wq, &cl->work));
|
||||
} else
|
||||
cl->fn(cl);
|
||||
cl->fn(&cl->work);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -309,6 +309,11 @@ static inline void closure_wake_up(struct closure_waitlist *list)
|
||||
__closure_wake_up(list);
|
||||
}
|
||||
|
||||
#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws)
|
||||
#define closure_type(name, type, member) \
|
||||
struct closure *cl = container_of(ws, struct closure, work); \
|
||||
type *name = container_of(cl, type, member)
|
||||
|
||||
/**
|
||||
* continue_at - jump to another function with barrier
|
||||
*
|
||||
|
@ -36,7 +36,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
|
||||
closure_debug_destroy(cl);
|
||||
|
||||
if (destructor)
|
||||
destructor(cl);
|
||||
destructor(&cl->work);
|
||||
|
||||
if (parent)
|
||||
closure_put(parent);
|
||||
@ -108,8 +108,9 @@ struct closure_syncer {
|
||||
int done;
|
||||
};
|
||||
|
||||
static void closure_sync_fn(struct closure *cl)
|
||||
static CLOSURE_CALLBACK(closure_sync_fn)
|
||||
{
|
||||
struct closure *cl = container_of(ws, struct closure, work);
|
||||
struct closure_syncer *s = cl->s;
|
||||
struct task_struct *p;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user