bcachefs: Rewrite journal_seq_blacklist machinery
Now, we store blacklisted journal sequence numbers in the superblock, not the journal: this helps to greatly simplify the code, and more importantly it's now implemented in a way that doesn't require all btree nodes to be visited before starting the journal - instead, we unconditionally blacklist the next 4 journal sequence numbers after an unclean shutdown. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
ece254b258
commit
1dd7f9d98d
@ -185,6 +185,7 @@
|
||||
#include <linux/closure.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/percpu-rwsem.h>
|
||||
@ -486,6 +487,7 @@ enum {
|
||||
BCH_FS_RW,
|
||||
|
||||
/* shutdown: */
|
||||
BCH_FS_STOPPING,
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||
|
||||
@ -511,6 +513,15 @@ struct bch_fs_pcpu {
|
||||
u64 sectors_available;
|
||||
};
|
||||
|
||||
struct journal_seq_blacklist_table {
|
||||
size_t nr;
|
||||
struct journal_seq_blacklist_table_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
bool dirty;
|
||||
} entries[0];
|
||||
};
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -646,6 +657,11 @@ struct bch_fs {
|
||||
|
||||
struct io_clock io_clock[2];
|
||||
|
||||
/* JOURNAL SEQ BLACKLIST */
|
||||
struct journal_seq_blacklist_table *
|
||||
journal_seq_blacklist_table;
|
||||
struct work_struct journal_seq_blacklist_gc_work;
|
||||
|
||||
/* ALLOCATOR */
|
||||
spinlock_t freelist_lock;
|
||||
struct closure_waitlist freelist_wait;
|
||||
|
@ -909,7 +909,8 @@ struct bch_sb_field {
|
||||
x(quota, 4) \
|
||||
x(disk_groups, 5) \
|
||||
x(clean, 6) \
|
||||
x(replicas, 7)
|
||||
x(replicas, 7) \
|
||||
x(journal_seq_blacklist, 8)
|
||||
|
||||
enum bch_sb_field_type {
|
||||
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
||||
@ -1124,6 +1125,20 @@ struct bch_sb_field_clean {
|
||||
};
|
||||
};
|
||||
|
||||
struct journal_seq_blacklist_entry {
|
||||
__le64 start;
|
||||
__le64 end;
|
||||
};
|
||||
|
||||
struct bch_sb_field_journal_seq_blacklist {
|
||||
struct bch_sb_field field;
|
||||
|
||||
union {
|
||||
struct journal_seq_blacklist_entry start[0];
|
||||
__u64 _data[0];
|
||||
};
|
||||
};
|
||||
|
||||
/* Superblock: */
|
||||
|
||||
/*
|
||||
@ -1279,6 +1294,7 @@ enum bch_sb_features {
|
||||
BCH_FEATURE_ZSTD = 2,
|
||||
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
|
||||
BCH_FEATURE_EC = 4,
|
||||
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
|
||||
BCH_FEATURE_NR,
|
||||
};
|
||||
|
||||
|
@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
struct btree_node *sorted;
|
||||
struct bkey_packed *k;
|
||||
struct bset *i;
|
||||
bool used_mempool;
|
||||
bool used_mempool, blacklisted;
|
||||
unsigned u64s;
|
||||
int ret, retry_read = 0, write = READ;
|
||||
|
||||
@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
|
||||
b->written += sectors;
|
||||
|
||||
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
|
||||
if (ret < 0) {
|
||||
btree_err(BTREE_ERR_FATAL, c, b, i,
|
||||
"insufficient memory");
|
||||
goto err;
|
||||
}
|
||||
blacklisted = bch2_journal_seq_is_blacklisted(c,
|
||||
le64_to_cpu(i->journal_seq),
|
||||
true);
|
||||
|
||||
if (ret) {
|
||||
btree_err_on(first,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"first btree node bset has blacklisted journal seq");
|
||||
if (!first)
|
||||
continue;
|
||||
}
|
||||
btree_err_on(blacklisted && first,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"first btree node bset has blacklisted journal seq");
|
||||
if (blacklisted && !first)
|
||||
continue;
|
||||
|
||||
bch2_btree_node_iter_large_push(iter, b,
|
||||
i->start,
|
||||
@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
out:
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
return retry_read;
|
||||
err:
|
||||
fsck_err:
|
||||
if (ret == BTREE_RETRY_READ) {
|
||||
retry_read = 1;
|
||||
|
@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
|
||||
if (!btree_iter_node(iter, iter->level))
|
||||
return NULL;
|
||||
|
||||
bch2_trans_cond_resched(iter->trans);
|
||||
|
||||
btree_iter_up(iter);
|
||||
|
||||
if (!bch2_btree_node_relock(iter, iter->level))
|
||||
|
@ -4,8 +4,6 @@
|
||||
|
||||
#include "opts.h"
|
||||
|
||||
#include <linux/math64.h>
|
||||
|
||||
extern const char * const bch2_inode_opts[];
|
||||
|
||||
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
cancel_delayed_work_sync(&j->reclaim_work);
|
||||
}
|
||||
|
||||
void bch2_fs_journal_start(struct journal *j)
|
||||
int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
||||
struct list_head *journal_entries)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_seq_blacklist *bl;
|
||||
u64 blacklist = 0;
|
||||
struct journal_entry_pin_list *p;
|
||||
struct journal_replay *i;
|
||||
u64 last_seq = cur_seq, nr, seq;
|
||||
|
||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
||||
blacklist = max(blacklist, bl->end);
|
||||
if (!list_empty(journal_entries))
|
||||
last_seq = le64_to_cpu(list_last_entry(journal_entries,
|
||||
struct journal_replay,
|
||||
list)->j.last_seq);
|
||||
|
||||
nr = cur_seq - last_seq;
|
||||
|
||||
if (nr + 1 > j->pin.size) {
|
||||
free_fifo(&j->pin);
|
||||
init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
|
||||
if (!j->pin.data) {
|
||||
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
j->last_seq_ondisk = last_seq;
|
||||
j->pin.front = last_seq;
|
||||
j->pin.back = cur_seq;
|
||||
atomic64_set(&j->seq, cur_seq - 1);
|
||||
|
||||
fifo_for_each_entry_ptr(p, &j->pin, seq) {
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, 0);
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
list_for_each_entry(i, journal_entries, list) {
|
||||
seq = le64_to_cpu(i->j.seq);
|
||||
|
||||
BUG_ON(seq < last_seq || seq >= cur_seq);
|
||||
|
||||
p = journal_seq_pin(j, seq);
|
||||
|
||||
atomic_set(&p->count, 1);
|
||||
p->devs = i->devs;
|
||||
}
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
set_bit(JOURNAL_STARTED, &j->flags);
|
||||
|
||||
while (journal_cur_seq(j) < blacklist)
|
||||
journal_pin_new_entry(j, 0);
|
||||
|
||||
/*
|
||||
* __journal_entry_close() only inits the next journal entry when it
|
||||
* closes an open journal entry - the very first journal entry gets
|
||||
* initialized here:
|
||||
*/
|
||||
journal_pin_new_entry(j, 1);
|
||||
bch2_journal_buf_init(j);
|
||||
|
||||
@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j)
|
||||
bch2_journal_space_available(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* Adding entries to the next journal entry before allocating space on
|
||||
* disk for the next journal entry - this is ok, because these entries
|
||||
* only have to go down with the next journal entry we write:
|
||||
*/
|
||||
bch2_journal_seq_blacklist_write(j);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* init/exit: */
|
||||
@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
|
||||
init_waitqueue_head(&j->pin_flush_wait);
|
||||
mutex_init(&j->blacklist_lock);
|
||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
mutex_init(&j->discard_lock);
|
||||
|
||||
|
@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
|
||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||
|
||||
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||
|
||||
void bch2_fs_journal_stop(struct journal *);
|
||||
void bch2_fs_journal_start(struct journal *);
|
||||
int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
|
||||
|
||||
void bch2_dev_journal_exit(struct bch_dev *);
|
||||
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
|
||||
void bch2_fs_journal_exit(struct journal *);
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "replicas.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_entry_pin_list *p;
|
||||
u64 seq, nr = end_seq - last_seq + 1;
|
||||
|
||||
if (nr > j->pin.size) {
|
||||
free_fifo(&j->pin);
|
||||
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
|
||||
if (!j->pin.data) {
|
||||
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
atomic64_set(&j->seq, end_seq);
|
||||
j->last_seq_ondisk = last_seq;
|
||||
|
||||
j->pin.front = last_seq;
|
||||
j->pin.back = end_seq + 1;
|
||||
|
||||
fifo_for_each_entry_ptr(p, &j->pin, seq) {
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, 0);
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_list jlist;
|
||||
struct journal_replay *i;
|
||||
struct journal_entry_pin_list *p;
|
||||
struct bch_dev *ca;
|
||||
u64 cur_seq, end_seq;
|
||||
unsigned iter;
|
||||
size_t keys = 0, entries = 0;
|
||||
bool degraded = false;
|
||||
@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
if (jlist.ret)
|
||||
return jlist.ret;
|
||||
|
||||
if (list_empty(list)){
|
||||
bch_err(c, "no journal entries found");
|
||||
return BCH_FSCK_REPAIR_IMPOSSIBLE;
|
||||
}
|
||||
|
||||
list_for_each_entry(i, list, list) {
|
||||
struct jset_entry *entry;
|
||||
struct bkey_i *k, *_n;
|
||||
struct bch_replicas_padded replicas;
|
||||
char buf[80];
|
||||
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
|
||||
|
||||
ret = jset_validate_entries(c, &i->j, READ);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
* the devices - this is wrong:
|
||||
*/
|
||||
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
|
||||
|
||||
if (!degraded &&
|
||||
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
|
||||
@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
i = list_last_entry(list, struct journal_replay, list);
|
||||
|
||||
ret = bch2_journal_set_seq(c,
|
||||
le64_to_cpu(i->j.last_seq),
|
||||
le64_to_cpu(i->j.seq));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
|
||||
list_for_each_entry(i, list, list) {
|
||||
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
|
||||
|
||||
atomic_set(&p->count, 1);
|
||||
p->devs = i->devs;
|
||||
|
||||
if (bch2_journal_seq_blacklist_read(j, i)) {
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
|
||||
cur_seq = journal_last_seq(j);
|
||||
end_seq = le64_to_cpu(list_last_entry(list,
|
||||
struct journal_replay, list)->j.seq);
|
||||
|
||||
list_for_each_entry(i, list, list) {
|
||||
struct jset_entry *entry;
|
||||
struct bkey_i *k, *_n;
|
||||
bool blacklisted;
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
while (cur_seq < le64_to_cpu(i->j.seq) &&
|
||||
bch2_journal_seq_blacklist_find(j, cur_seq))
|
||||
cur_seq++;
|
||||
|
||||
blacklisted = bch2_journal_seq_blacklist_find(j,
|
||||
le64_to_cpu(i->j.seq));
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
|
||||
fsck_err_on(blacklisted, c,
|
||||
"found blacklisted journal entry %llu",
|
||||
le64_to_cpu(i->j.seq));
|
||||
|
||||
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
|
||||
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
||||
cur_seq, le64_to_cpu(i->j.seq) - 1,
|
||||
journal_last_seq(j), end_seq);
|
||||
|
||||
cur_seq = le64_to_cpu(i->j.seq) + 1;
|
||||
|
||||
for_each_jset_key(k, _n, entry, &i->j)
|
||||
keys++;
|
||||
entries++;
|
||||
}
|
||||
|
||||
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
|
||||
keys, entries, journal_cur_seq(j));
|
||||
if (!list_empty(list)) {
|
||||
i = list_last_entry(list, struct journal_replay, list);
|
||||
|
||||
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
|
||||
keys, entries, le64_to_cpu(i->j.seq));
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
|
||||
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
|
||||
vstruct_for_each_safe(entry, k, _n)
|
||||
|
||||
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
|
||||
int bch2_journal_read(struct bch_fs *, struct list_head *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
||||
|
@ -1,13 +1,10 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "btree_iter.h"
|
||||
#include "eytzinger.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "super-io.h"
|
||||
|
||||
/*
|
||||
* journal_seq_blacklist machinery:
|
||||
@ -37,327 +34,285 @@
|
||||
* record that it was blacklisted so that a) on recovery we don't think we have
|
||||
* missing journal entries and b) so that the btree code continues to ignore
|
||||
* that bset, until that btree node is rewritten.
|
||||
*
|
||||
* Blacklisted journal sequence numbers are themselves recorded as entries in
|
||||
* the journal.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Called when journal needs to evict a blacklist entry to reclaim space: find
|
||||
* any btree nodes that refer to the blacklist journal sequence numbers, and
|
||||
* rewrite them:
|
||||
*/
|
||||
static void journal_seq_blacklist_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin, u64 seq)
|
||||
static unsigned
|
||||
blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
|
||||
{
|
||||
struct bch_fs *c =
|
||||
container_of(j, struct bch_fs, journal);
|
||||
struct journal_seq_blacklist *bl =
|
||||
container_of(pin, struct journal_seq_blacklist, pin);
|
||||
struct blacklisted_node n;
|
||||
struct closure cl;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
for (i = 0;; i++) {
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
struct btree *b;
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
if (i >= bl->nr_entries) {
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
break;
|
||||
}
|
||||
n = bl->entries[i];
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
|
||||
iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
|
||||
0, 0, 0);
|
||||
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
|
||||
/* The node might have already been rewritten: */
|
||||
|
||||
if (b->data->keys.seq == n.seq) {
|
||||
ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
|
||||
if (ret) {
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_fs_fatal_error(c,
|
||||
"error %i rewriting btree node with blacklisted journal seq",
|
||||
ret);
|
||||
bch2_journal_halt(j);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
}
|
||||
|
||||
for (i = 0;; i++) {
|
||||
struct btree_update *as;
|
||||
struct pending_btree_node_free *d;
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
if (i >= bl->nr_entries) {
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
break;
|
||||
}
|
||||
n = bl->entries[i];
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
redo_wait:
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
/*
|
||||
* Is the node on the list of pending interior node updates -
|
||||
* being freed? If so, wait for that to finish:
|
||||
*/
|
||||
for_each_pending_btree_node_free(c, as, d)
|
||||
if (n.seq == d->seq &&
|
||||
n.btree_id == d->btree_id &&
|
||||
!d->level &&
|
||||
!bkey_cmp(n.pos, d->key.k.p)) {
|
||||
closure_wait(&as->wait, &cl);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
closure_sync(&cl);
|
||||
goto redo_wait;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
|
||||
bch2_journal_pin_drop(j, &bl->pin);
|
||||
list_del(&bl->list);
|
||||
kfree(bl->entries);
|
||||
kfree(bl);
|
||||
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
return bl
|
||||
? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
|
||||
sizeof(struct journal_seq_blacklist_entry))
|
||||
: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if a particular sequence number is blacklisted - if so, return
|
||||
* blacklist entry:
|
||||
*/
|
||||
struct journal_seq_blacklist *
|
||||
bch2_journal_seq_blacklist_find(struct journal *j, u64 seq)
|
||||
static unsigned sb_blacklist_u64s(unsigned nr)
|
||||
{
|
||||
struct journal_seq_blacklist *bl;
|
||||
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||
|
||||
lockdep_assert_held(&j->blacklist_lock);
|
||||
return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
|
||||
}
|
||||
|
||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
||||
if (seq >= bl->start && seq <= bl->end)
|
||||
return bl;
|
||||
static struct bch_sb_field_journal_seq_blacklist *
|
||||
blacklist_entry_try_merge(struct bch_fs *c,
|
||||
struct bch_sb_field_journal_seq_blacklist *bl,
|
||||
unsigned i)
|
||||
{
|
||||
unsigned nr = blacklist_nr_entries(bl);
|
||||
|
||||
if (le64_to_cpu(bl->start[i].end) >=
|
||||
le64_to_cpu(bl->start[i + 1].start)) {
|
||||
bl->start[i].end = bl->start[i + 1].end;
|
||||
--nr;
|
||||
memmove(&bl->start[i],
|
||||
&bl->start[i + 1],
|
||||
sizeof(bl->start[0]) * (nr - i));
|
||||
|
||||
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
|
||||
sb_blacklist_u64s(nr));
|
||||
BUG_ON(!bl);
|
||||
}
|
||||
|
||||
return bl;
|
||||
}
|
||||
|
||||
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
|
||||
{
|
||||
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||
unsigned i, nr;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
|
||||
nr = blacklist_nr_entries(bl);
|
||||
|
||||
if (bl) {
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct journal_seq_blacklist_entry *e =
|
||||
bl->start + i;
|
||||
|
||||
if (start == le64_to_cpu(e->start) &&
|
||||
end == le64_to_cpu(e->end))
|
||||
goto out;
|
||||
|
||||
if (start <= le64_to_cpu(e->start) &&
|
||||
end >= le64_to_cpu(e->end)) {
|
||||
e->start = cpu_to_le64(start);
|
||||
e->end = cpu_to_le64(end);
|
||||
|
||||
if (i + 1 < nr)
|
||||
bl = blacklist_entry_try_merge(c,
|
||||
bl, i);
|
||||
if (i)
|
||||
bl = blacklist_entry_try_merge(c,
|
||||
bl, i - 1);
|
||||
goto out_write_sb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
|
||||
sb_blacklist_u64s(nr + 1));
|
||||
if (!bl) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bl->start[nr].start = cpu_to_le64(start);
|
||||
bl->start[nr].end = cpu_to_le64(end);
|
||||
out_write_sb:
|
||||
c->disk_sb.sb->features[0] |=
|
||||
1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
|
||||
|
||||
ret = bch2_write_super(c);
|
||||
out:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l,
|
||||
const void *_r, size_t size)
|
||||
{
|
||||
const struct journal_seq_blacklist_table_entry *l = _l;
|
||||
const struct journal_seq_blacklist_table_entry *r = _r;
|
||||
|
||||
return (l->start > r->start) - (l->start < r->start);
|
||||
}
|
||||
|
||||
bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
|
||||
bool dirty)
|
||||
{
|
||||
struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
|
||||
struct journal_seq_blacklist_table_entry search = { .start = seq };
|
||||
int idx;
|
||||
|
||||
if (!t)
|
||||
return false;
|
||||
|
||||
idx = eytzinger0_find_le(t->entries, t->nr,
|
||||
sizeof(t->entries[0]),
|
||||
journal_seq_blacklist_table_cmp,
|
||||
&search);
|
||||
if (idx < 0)
|
||||
return false;
|
||||
|
||||
BUG_ON(t->entries[idx].start > seq);
|
||||
|
||||
if (seq >= t->entries[idx].end)
|
||||
return false;
|
||||
|
||||
if (dirty)
|
||||
t->entries[idx].dirty = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
int bch2_blacklist_table_initialize(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_journal_seq_blacklist *bl =
|
||||
bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
|
||||
struct journal_seq_blacklist_table *t;
|
||||
unsigned i, nr = blacklist_nr_entries(bl);
|
||||
|
||||
BUG_ON(c->journal_seq_blacklist_table);
|
||||
|
||||
if (!bl)
|
||||
return 0;
|
||||
|
||||
t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
|
||||
GFP_KERNEL);
|
||||
if (!t)
|
||||
return -ENOMEM;
|
||||
|
||||
t->nr = nr;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
t->entries[i].start = le64_to_cpu(bl->start[i].start);
|
||||
t->entries[i].end = le64_to_cpu(bl->start[i].end);
|
||||
}
|
||||
|
||||
eytzinger0_sort(t->entries,
|
||||
t->nr,
|
||||
sizeof(t->entries[0]),
|
||||
journal_seq_blacklist_table_cmp,
|
||||
NULL);
|
||||
|
||||
c->journal_seq_blacklist_table = t;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *
|
||||
bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_journal_seq_blacklist *bl =
|
||||
field_to_type(f, journal_seq_blacklist);
|
||||
struct journal_seq_blacklist_entry *i;
|
||||
unsigned nr = blacklist_nr_entries(bl);
|
||||
|
||||
for (i = bl->start; i < bl->start + nr; i++) {
|
||||
if (le64_to_cpu(i->start) >=
|
||||
le64_to_cpu(i->end))
|
||||
return "entry start >= end";
|
||||
|
||||
if (i + 1 < bl->start + nr &&
|
||||
le64_to_cpu(i[0].end) >
|
||||
le64_to_cpu(i[1].start))
|
||||
return "entries out of order";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new, in memory blacklist entry:
|
||||
*/
|
||||
static struct journal_seq_blacklist *
|
||||
bch2_journal_seq_blacklisted_new(struct journal *j, u64 start, u64 end)
|
||||
static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
|
||||
struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct journal_seq_blacklist *bl;
|
||||
struct bch_sb_field_journal_seq_blacklist *bl =
|
||||
field_to_type(f, journal_seq_blacklist);
|
||||
struct journal_seq_blacklist_entry *i;
|
||||
unsigned nr = blacklist_nr_entries(bl);
|
||||
|
||||
lockdep_assert_held(&j->blacklist_lock);
|
||||
for (i = bl->start; i < bl->start + nr; i++) {
|
||||
if (i != bl->start)
|
||||
pr_buf(out, " ");
|
||||
|
||||
/*
|
||||
* When we start the journal, bch2_journal_start() will skip over @seq:
|
||||
*/
|
||||
|
||||
bl = kzalloc(sizeof(*bl), GFP_KERNEL);
|
||||
if (!bl)
|
||||
return NULL;
|
||||
|
||||
bl->start = start;
|
||||
bl->end = end;
|
||||
|
||||
list_add_tail(&bl->list, &j->seq_blacklist);
|
||||
return bl;
|
||||
pr_buf(out, "%llu-%llu",
|
||||
le64_to_cpu(i->start),
|
||||
le64_to_cpu(i->end));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if @seq is newer than the most recent journal entry that got
|
||||
* written, and data corresponding to @seq should be ignored - also marks @seq
|
||||
* as blacklisted so that on future restarts the corresponding data will still
|
||||
* be ignored:
|
||||
*/
|
||||
int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b)
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
|
||||
.validate = bch2_sb_journal_seq_blacklist_validate,
|
||||
.to_text = bch2_sb_journal_seq_blacklist_to_text
|
||||
};
|
||||
|
||||
void bch2_blacklist_entries_gc(struct work_struct *work)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_seq_blacklist *bl = NULL;
|
||||
struct blacklisted_node *n;
|
||||
u64 journal_seq;
|
||||
int ret = 0;
|
||||
struct bch_fs *c = container_of(work, struct bch_fs,
|
||||
journal_seq_blacklist_gc_work);
|
||||
struct journal_seq_blacklist_table *t;
|
||||
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||
struct journal_seq_blacklist_entry *src, *dst;
|
||||
struct btree_trans trans;
|
||||
unsigned i, nr, new_nr;
|
||||
int ret;
|
||||
|
||||
if (!seq)
|
||||
return 0;
|
||||
bch2_trans_init(&trans, c);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
journal_seq = journal_cur_seq(j);
|
||||
spin_unlock(&j->lock);
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
struct btree_iter *iter;
|
||||
struct btree *b;
|
||||
|
||||
/* Interier updates aren't journalled: */
|
||||
BUG_ON(b->level);
|
||||
BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
|
||||
|
||||
/*
|
||||
* Decrease this back to j->seq + 2 when we next rev the on disk format:
|
||||
* increasing it temporarily to work around bug in old kernels
|
||||
*/
|
||||
fsck_err_on(seq > journal_seq + 4, c,
|
||||
"bset journal seq too far in the future: %llu > %llu",
|
||||
seq, journal_seq);
|
||||
|
||||
if (seq <= journal_seq &&
|
||||
list_empty_careful(&j->seq_blacklist))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
|
||||
if (seq <= journal_seq) {
|
||||
bl = bch2_journal_seq_blacklist_find(j, seq);
|
||||
if (!bl)
|
||||
goto out;
|
||||
} else {
|
||||
bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting",
|
||||
b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq);
|
||||
|
||||
if (!j->new_blacklist) {
|
||||
j->new_blacklist = bch2_journal_seq_blacklisted_new(j,
|
||||
journal_seq + 1,
|
||||
journal_seq + 1);
|
||||
if (!j->new_blacklist) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
for_each_btree_node(&trans, iter, i, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, b)
|
||||
if (test_bit(BCH_FS_STOPPING, &c->flags)) {
|
||||
bch2_trans_exit(&trans);
|
||||
return;
|
||||
}
|
||||
}
|
||||
bl = j->new_blacklist;
|
||||
bl->end = max(bl->end, seq);
|
||||
bch2_trans_iter_free(&trans, iter);
|
||||
}
|
||||
|
||||
for (n = bl->entries; n < bl->entries + bl->nr_entries; n++)
|
||||
if (b->data->keys.seq == n->seq &&
|
||||
b->btree_id == n->btree_id &&
|
||||
!bkey_cmp(b->key.k.p, n->pos))
|
||||
goto found_entry;
|
||||
|
||||
if (!bl->nr_entries ||
|
||||
is_power_of_2(bl->nr_entries)) {
|
||||
n = krealloc(bl->entries,
|
||||
max_t(size_t, bl->nr_entries * 2, 8) * sizeof(*n),
|
||||
GFP_KERNEL);
|
||||
if (!n) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
bl->entries = n;
|
||||
}
|
||||
|
||||
bl->entries[bl->nr_entries++] = (struct blacklisted_node) {
|
||||
.seq = b->data->keys.seq,
|
||||
.btree_id = b->btree_id,
|
||||
.pos = b->key.k.p,
|
||||
};
|
||||
found_entry:
|
||||
ret = 1;
|
||||
out:
|
||||
fsck_err:
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_journal_seq_blacklist_read(struct journal *j,
|
||||
struct journal_replay *i,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_seq_blacklist *bl;
|
||||
|
||||
bch_verbose(c, "blacklisting existing journal seq %llu-%llu",
|
||||
start, end);
|
||||
|
||||
bl = bch2_journal_seq_blacklisted_new(j, start, end);
|
||||
if (!bl)
|
||||
return -ENOMEM;
|
||||
|
||||
bch2_journal_pin_add(j, le64_to_cpu(i->j.seq), &bl->pin,
|
||||
journal_seq_blacklist_flush);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* After reading the journal, find existing journal seq blacklist entries and
|
||||
* read them into memory:
|
||||
*/
|
||||
int bch2_journal_seq_blacklist_read(struct journal *j,
|
||||
struct journal_replay *i)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
int ret = 0;
|
||||
|
||||
vstruct_for_each(&i->j, entry) {
|
||||
switch (entry->type) {
|
||||
case BCH_JSET_ENTRY_blacklist: {
|
||||
struct jset_entry_blacklist *bl_entry =
|
||||
container_of(entry, struct jset_entry_blacklist, entry);
|
||||
|
||||
ret = __bch2_journal_seq_blacklist_read(j, i,
|
||||
le64_to_cpu(bl_entry->seq),
|
||||
le64_to_cpu(bl_entry->seq));
|
||||
break;
|
||||
}
|
||||
case BCH_JSET_ENTRY_blacklist_v2: {
|
||||
struct jset_entry_blacklist_v2 *bl_entry =
|
||||
container_of(entry, struct jset_entry_blacklist_v2, entry);
|
||||
|
||||
ret = __bch2_journal_seq_blacklist_read(j, i,
|
||||
le64_to_cpu(bl_entry->start),
|
||||
le64_to_cpu(bl_entry->end));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* After reading the journal and walking the btree, we might have new journal
|
||||
* sequence numbers to blacklist - add entries to the next journal entry to be
|
||||
* written:
|
||||
*/
|
||||
void bch2_journal_seq_blacklist_write(struct journal *j)
|
||||
{
|
||||
struct journal_seq_blacklist *bl = j->new_blacklist;
|
||||
struct jset_entry_blacklist_v2 *bl_entry;
|
||||
struct jset_entry *entry;
|
||||
|
||||
if (!bl)
|
||||
ret = bch2_trans_exit(&trans);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
entry = bch2_journal_add_entry_noreservation(journal_cur_buf(j),
|
||||
(sizeof(*bl_entry) - sizeof(*entry)) / sizeof(u64));
|
||||
mutex_lock(&c->sb_lock);
|
||||
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
|
||||
if (!bl)
|
||||
goto out;
|
||||
|
||||
bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
|
||||
bl_entry->entry.type = BCH_JSET_ENTRY_blacklist_v2;
|
||||
bl_entry->start = cpu_to_le64(bl->start);
|
||||
bl_entry->end = cpu_to_le64(bl->end);
|
||||
nr = blacklist_nr_entries(bl);
|
||||
dst = bl->start;
|
||||
|
||||
bch2_journal_pin_add(j,
|
||||
journal_cur_seq(j),
|
||||
&bl->pin,
|
||||
journal_seq_blacklist_flush);
|
||||
t = c->journal_seq_blacklist_table;
|
||||
BUG_ON(nr != t->nr);
|
||||
|
||||
j->new_blacklist = NULL;
|
||||
for (src = bl->start, i = eytzinger0_first(t->nr);
|
||||
src < bl->start + nr;
|
||||
src++, i = eytzinger0_next(i, nr)) {
|
||||
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
|
||||
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
|
||||
|
||||
if (t->entries[i].dirty)
|
||||
*dst++ = *src;
|
||||
}
|
||||
|
||||
new_nr = dst - bl->start;
|
||||
|
||||
bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
|
||||
|
||||
if (new_nr != nr) {
|
||||
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
|
||||
new_nr ? sb_blacklist_u64s(new_nr) : 0);
|
||||
BUG_ON(new_nr && !bl);
|
||||
|
||||
if (!new_nr)
|
||||
c->disk_sb.sb->features[0] &=
|
||||
~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
|
||||
|
||||
bch2_write_super(c);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
@ -2,13 +2,12 @@
|
||||
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
||||
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
||||
|
||||
struct journal_replay;
|
||||
bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
|
||||
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
|
||||
int bch2_blacklist_table_initialize(struct bch_fs *);
|
||||
|
||||
struct journal_seq_blacklist *
|
||||
bch2_journal_seq_blacklist_find(struct journal *, u64);
|
||||
int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *);
|
||||
int bch2_journal_seq_blacklist_read(struct journal *,
|
||||
struct journal_replay *);
|
||||
void bch2_journal_seq_blacklist_write(struct journal *);
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
|
||||
|
||||
void bch2_blacklist_entries_gc(struct work_struct *);
|
||||
|
||||
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
|
||||
|
@ -54,24 +54,6 @@ struct journal_entry_pin {
|
||||
u64 seq;
|
||||
};
|
||||
|
||||
/* corresponds to a btree node with a blacklisted bset: */
|
||||
struct blacklisted_node {
|
||||
__le64 seq;
|
||||
enum btree_id btree_id;
|
||||
struct bpos pos;
|
||||
};
|
||||
|
||||
struct journal_seq_blacklist {
|
||||
struct list_head list;
|
||||
u64 start;
|
||||
u64 end;
|
||||
|
||||
struct journal_entry_pin pin;
|
||||
|
||||
struct blacklisted_node *entries;
|
||||
size_t nr_entries;
|
||||
};
|
||||
|
||||
struct journal_res {
|
||||
bool ref;
|
||||
u8 idx;
|
||||
@ -222,10 +204,6 @@ struct journal {
|
||||
|
||||
u64 replay_journal_seq;
|
||||
|
||||
struct mutex blacklist_lock;
|
||||
struct list_head seq_blacklist;
|
||||
struct journal_seq_blacklist *new_blacklist;
|
||||
|
||||
struct write_point wp;
|
||||
spinlock_t err_lock;
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "error.h"
|
||||
#include "fsck.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "quota.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
@ -99,18 +100,49 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
|
||||
struct list_head *journal)
|
||||
{
|
||||
struct journal_replay *i =
|
||||
list_last_entry(journal, struct journal_replay, list);
|
||||
u64 start_seq = le64_to_cpu(i->j.last_seq);
|
||||
u64 end_seq = le64_to_cpu(i->j.seq);
|
||||
u64 seq = start_seq;
|
||||
int ret = 0;
|
||||
|
||||
list_for_each_entry(i, journal, list) {
|
||||
fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
|
||||
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
||||
seq, le64_to_cpu(i->j.seq) - 1,
|
||||
start_seq, end_seq);
|
||||
|
||||
seq = le64_to_cpu(i->j.seq);
|
||||
|
||||
fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
|
||||
"found blacklisted journal entry %llu", seq);
|
||||
|
||||
do {
|
||||
seq++;
|
||||
} while (bch2_journal_seq_is_blacklisted(c, seq, false));
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_clean *clean, *sb_clean;
|
||||
|
||||
if (!c->sb.clean)
|
||||
return NULL;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
|
||||
if (!sb_clean) {
|
||||
|
||||
if (fsck_err_on(!sb_clean, c,
|
||||
"superblock marked clean but clean section not present")) {
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->sb.clean = false;
|
||||
mutex_unlock(&c->sb_lock);
|
||||
bch_err(c, "superblock marked clean but clean section not present");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return clean;
|
||||
fsck_err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int journal_replay_entry_early(struct bch_fs *c,
|
||||
@ -179,14 +214,32 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
}
|
||||
case BCH_JSET_ENTRY_blacklist: {
|
||||
struct jset_entry_blacklist *bl_entry =
|
||||
container_of(entry, struct jset_entry_blacklist, entry);
|
||||
|
||||
ret = bch2_journal_seq_blacklist_add(c,
|
||||
le64_to_cpu(bl_entry->seq),
|
||||
le64_to_cpu(bl_entry->seq) + 1);
|
||||
break;
|
||||
}
|
||||
case BCH_JSET_ENTRY_blacklist_v2: {
|
||||
struct jset_entry_blacklist_v2 *bl_entry =
|
||||
container_of(entry, struct jset_entry_blacklist_v2, entry);
|
||||
|
||||
ret = bch2_journal_seq_blacklist_add(c,
|
||||
le64_to_cpu(bl_entry->start),
|
||||
le64_to_cpu(bl_entry->end) + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int load_journal_metadata(struct bch_fs *c,
|
||||
struct bch_sb_field_clean *clean,
|
||||
struct list_head *journal)
|
||||
static int journal_replay_early(struct bch_fs *c,
|
||||
struct bch_sb_field_clean *clean,
|
||||
struct list_head *journal)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
int ret;
|
||||
@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal)
|
||||
int bch2_fs_recovery(struct bch_fs *c)
|
||||
{
|
||||
const char *err = "cannot allocate memory";
|
||||
struct bch_sb_field_clean *clean;
|
||||
struct bch_sb_field_clean *clean = NULL;
|
||||
u64 journal_seq;
|
||||
LIST_HEAD(journal);
|
||||
int ret;
|
||||
|
||||
clean = read_superblock_clean(c);
|
||||
if (clean)
|
||||
if (c->sb.clean)
|
||||
clean = read_superblock_clean(c);
|
||||
ret = PTR_ERR_OR_ZERO(clean);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->sb.clean)
|
||||
bch_info(c, "recovering from clean shutdown, journal seq %llu",
|
||||
le64_to_cpu(clean->journal_seq));
|
||||
|
||||
if (!clean || c->opts.fsck) {
|
||||
if (!c->replicas.entries) {
|
||||
bch_info(c, "building replicas info");
|
||||
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
}
|
||||
|
||||
if (!c->sb.clean || c->opts.fsck) {
|
||||
struct jset *j;
|
||||
|
||||
ret = bch2_journal_read(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = verify_superblock_clean(c, &clean,
|
||||
&list_last_entry(&journal, struct journal_replay,
|
||||
list)->j);
|
||||
fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
|
||||
"filesystem marked clean but journal not empty");
|
||||
|
||||
if (!c->sb.clean && list_empty(&journal)){
|
||||
bch_err(c, "no journal entries found");
|
||||
ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
j = &list_last_entry(&journal, struct journal_replay, list)->j;
|
||||
|
||||
ret = verify_superblock_clean(c, &clean, j);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
journal_seq = le64_to_cpu(j->seq) + 1;
|
||||
} else {
|
||||
ret = bch2_journal_set_seq(c,
|
||||
le64_to_cpu(clean->journal_seq),
|
||||
le64_to_cpu(clean->journal_seq));
|
||||
if (ret)
|
||||
goto err;
|
||||
journal_seq = le64_to_cpu(clean->journal_seq) + 1;
|
||||
}
|
||||
|
||||
fsck_err_on(clean && !journal_empty(&journal), c,
|
||||
"filesystem marked clean but journal not empty");
|
||||
ret = journal_replay_early(c, clean, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = load_journal_metadata(c, clean, &journal);
|
||||
if (!c->sb.clean) {
|
||||
ret = bch2_journal_seq_blacklist_add(c,
|
||||
journal_seq,
|
||||
journal_seq + 4);
|
||||
if (ret) {
|
||||
bch_err(c, "error creating new journal seq blacklist entry");
|
||||
goto err;
|
||||
}
|
||||
|
||||
journal_seq += 4;
|
||||
}
|
||||
|
||||
ret = bch2_blacklist_table_initialize(c);
|
||||
|
||||
ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
if (!c->replicas.entries) {
|
||||
bch_info(c, "building replicas info");
|
||||
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
}
|
||||
|
||||
if (c->opts.fsck ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
|
||||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
|
||||
@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
if (c->sb.encryption_type && !c->sb.clean)
|
||||
atomic64_add(1 << 16, &c->key_version);
|
||||
|
||||
/*
|
||||
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
|
||||
* will give spurious errors about oldest_gen > bucket_gen -
|
||||
* this is a hack but oh well.
|
||||
*/
|
||||
bch2_fs_journal_start(&c->journal);
|
||||
|
||||
if (c->opts.noreplay)
|
||||
goto out;
|
||||
|
||||
@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (c->journal_seq_blacklist_table &&
|
||||
c->journal_seq_blacklist_table->nr > 128)
|
||||
queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
|
||||
out:
|
||||
bch2_journal_entries_free(&journal);
|
||||
kfree(clean);
|
||||
@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
* journal_res_get() will crash if called before this has
|
||||
* set up the journal.pin FIFO and journal.cur pointer:
|
||||
*/
|
||||
bch2_fs_journal_start(&c->journal);
|
||||
bch2_fs_journal_start(&c->journal, 1, &journal);
|
||||
bch2_journal_set_replay_done(&c->journal);
|
||||
|
||||
err = "error going read write";
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "error.h"
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "replicas.h"
|
||||
#include "quota.h"
|
||||
#include "super-io.h"
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "move.h"
|
||||
#include "migrate.h"
|
||||
#include "movinggc.h"
|
||||
@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
kfree(c->replicas.entries);
|
||||
kfree(c->replicas_gc.entries);
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
|
||||
if (c->journal_reclaim_wq)
|
||||
destroy_workqueue(c->journal_reclaim_wq);
|
||||
@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c)
|
||||
|
||||
bch_verbose(c, "shutting down");
|
||||
|
||||
set_bit(BCH_FS_STOPPING, &c->flags);
|
||||
|
||||
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
if (ca->kobj.state_in_sysfs &&
|
||||
ca->disk_sb.bdev)
|
||||
@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
spin_lock_init(&c->btree_write_error_lock);
|
||||
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
|
||||
|
||||
INIT_WORK(&c->journal_seq_blacklist_gc_work,
|
||||
bch2_blacklist_entries_gc);
|
||||
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user