Merge branch 'foreign/zhaolei/reada' into for-chris-4.6
This commit is contained in:
commit
ff7db6e05a
@ -1825,6 +1825,9 @@ struct btrfs_fs_info {
|
|||||||
spinlock_t reada_lock;
|
spinlock_t reada_lock;
|
||||||
struct radix_tree_root reada_tree;
|
struct radix_tree_root reada_tree;
|
||||||
|
|
||||||
|
/* readahead works cnt */
|
||||||
|
atomic_t reada_works_cnt;
|
||||||
|
|
||||||
/* Extent buffer radix tree */
|
/* Extent buffer radix tree */
|
||||||
spinlock_t buffer_lock;
|
spinlock_t buffer_lock;
|
||||||
struct radix_tree_root buffer_radix;
|
struct radix_tree_root buffer_radix;
|
||||||
@ -4563,8 +4566,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
|||||||
struct btrfs_key *start, struct btrfs_key *end);
|
struct btrfs_key *start, struct btrfs_key *end);
|
||||||
int btrfs_reada_wait(void *handle);
|
int btrfs_reada_wait(void *handle);
|
||||||
void btrfs_reada_detach(void *handle);
|
void btrfs_reada_detach(void *handle);
|
||||||
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
|
||||||
u64 start, int err);
|
struct extent_buffer *eb, u64 start, int err);
|
||||||
|
|
||||||
static inline int is_fstree(u64 rootid)
|
static inline int is_fstree(u64 rootid)
|
||||||
{
|
{
|
||||||
|
@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
|||||||
int found_level;
|
int found_level;
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
|
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
|
||||||
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int reads_done;
|
int reads_done;
|
||||||
|
|
||||||
@ -637,20 +638,20 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
|||||||
|
|
||||||
found_start = btrfs_header_bytenr(eb);
|
found_start = btrfs_header_bytenr(eb);
|
||||||
if (found_start != eb->start) {
|
if (found_start != eb->start) {
|
||||||
btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
|
btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
|
||||||
found_start, eb->start);
|
found_start, eb->start);
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
if (check_tree_block_fsid(root->fs_info, eb)) {
|
if (check_tree_block_fsid(fs_info, eb)) {
|
||||||
btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
|
btrfs_err_rl(fs_info, "bad fsid on block %llu",
|
||||||
eb->start);
|
eb->start);
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
found_level = btrfs_header_level(eb);
|
found_level = btrfs_header_level(eb);
|
||||||
if (found_level >= BTRFS_MAX_LEVEL) {
|
if (found_level >= BTRFS_MAX_LEVEL) {
|
||||||
btrfs_err(root->fs_info, "bad tree block level %d",
|
btrfs_err(fs_info, "bad tree block level %d",
|
||||||
(int)btrfs_header_level(eb));
|
(int)btrfs_header_level(eb));
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto err;
|
goto err;
|
||||||
@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
|||||||
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
|
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
|
||||||
eb, found_level);
|
eb, found_level);
|
||||||
|
|
||||||
ret = csum_tree_block(root->fs_info, eb, 1);
|
ret = csum_tree_block(fs_info, eb, 1);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto err;
|
goto err;
|
||||||
@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
|||||||
err:
|
err:
|
||||||
if (reads_done &&
|
if (reads_done &&
|
||||||
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
|
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
|
||||||
btree_readahead_hook(root, eb, eb->start, ret);
|
btree_readahead_hook(fs_info, eb, eb->start, ret);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/*
|
/*
|
||||||
@ -699,14 +700,13 @@ out:
|
|||||||
static int btree_io_failed_hook(struct page *page, int failed_mirror)
|
static int btree_io_failed_hook(struct page *page, int failed_mirror)
|
||||||
{
|
{
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
|
|
||||||
|
|
||||||
eb = (struct extent_buffer *)page->private;
|
eb = (struct extent_buffer *)page->private;
|
||||||
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
|
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
|
||||||
eb->read_mirror = failed_mirror;
|
eb->read_mirror = failed_mirror;
|
||||||
atomic_dec(&eb->io_pages);
|
atomic_dec(&eb->io_pages);
|
||||||
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
|
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
|
||||||
btree_readahead_hook(root, eb, eb->start, -EIO);
|
btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
|
||||||
return -EIO; /* we fixed nothing */
|
return -EIO; /* we fixed nothing */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2604,6 +2604,7 @@ int open_ctree(struct super_block *sb,
|
|||||||
atomic_set(&fs_info->nr_async_bios, 0);
|
atomic_set(&fs_info->nr_async_bios, 0);
|
||||||
atomic_set(&fs_info->defrag_running, 0);
|
atomic_set(&fs_info->defrag_running, 0);
|
||||||
atomic_set(&fs_info->qgroup_op_seq, 0);
|
atomic_set(&fs_info->qgroup_op_seq, 0);
|
||||||
|
atomic_set(&fs_info->reada_works_cnt, 0);
|
||||||
atomic64_set(&fs_info->tree_mod_seq, 0);
|
atomic64_set(&fs_info->tree_mod_seq, 0);
|
||||||
fs_info->sb = sb;
|
fs_info->sb = sb;
|
||||||
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
|
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
|
||||||
|
218
fs/btrfs/reada.c
218
fs/btrfs/reada.c
@ -72,7 +72,7 @@ struct reada_extent {
|
|||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
|
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
|
||||||
int nzones;
|
int nzones;
|
||||||
struct btrfs_device *scheduled_for;
|
int scheduled;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct reada_zone {
|
struct reada_zone {
|
||||||
@ -101,11 +101,12 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
|
|||||||
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
|
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
|
||||||
|
|
||||||
static int reada_add_block(struct reada_control *rc, u64 logical,
|
static int reada_add_block(struct reada_control *rc, u64 logical,
|
||||||
struct btrfs_key *top, int level, u64 generation);
|
struct btrfs_key *top, u64 generation);
|
||||||
|
|
||||||
/* recurses */
|
/* recurses */
|
||||||
/* in case of err, eb might be NULL */
|
/* in case of err, eb might be NULL */
|
||||||
static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
static void __readahead_hook(struct btrfs_fs_info *fs_info,
|
||||||
|
struct reada_extent *re, struct extent_buffer *eb,
|
||||||
u64 start, int err)
|
u64 start, int err)
|
||||||
{
|
{
|
||||||
int level = 0;
|
int level = 0;
|
||||||
@ -113,55 +114,40 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
|||||||
int i;
|
int i;
|
||||||
u64 bytenr;
|
u64 bytenr;
|
||||||
u64 generation;
|
u64 generation;
|
||||||
struct reada_extent *re;
|
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
unsigned long index = start >> PAGE_CACHE_SHIFT;
|
|
||||||
struct btrfs_device *for_dev;
|
|
||||||
|
|
||||||
if (eb)
|
if (eb)
|
||||||
level = btrfs_header_level(eb);
|
level = btrfs_header_level(eb);
|
||||||
|
|
||||||
/* find extent */
|
|
||||||
spin_lock(&fs_info->reada_lock);
|
|
||||||
re = radix_tree_lookup(&fs_info->reada_tree, index);
|
|
||||||
if (re)
|
|
||||||
re->refcnt++;
|
|
||||||
spin_unlock(&fs_info->reada_lock);
|
|
||||||
|
|
||||||
if (!re)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
spin_lock(&re->lock);
|
spin_lock(&re->lock);
|
||||||
/*
|
/*
|
||||||
* just take the full list from the extent. afterwards we
|
* just take the full list from the extent. afterwards we
|
||||||
* don't need the lock anymore
|
* don't need the lock anymore
|
||||||
*/
|
*/
|
||||||
list_replace_init(&re->extctl, &list);
|
list_replace_init(&re->extctl, &list);
|
||||||
for_dev = re->scheduled_for;
|
re->scheduled = 0;
|
||||||
re->scheduled_for = NULL;
|
|
||||||
spin_unlock(&re->lock);
|
spin_unlock(&re->lock);
|
||||||
|
|
||||||
if (err == 0) {
|
|
||||||
nritems = level ? btrfs_header_nritems(eb) : 0;
|
|
||||||
generation = btrfs_header_generation(eb);
|
|
||||||
/*
|
|
||||||
* FIXME: currently we just set nritems to 0 if this is a leaf,
|
|
||||||
* effectively ignoring the content. In a next step we could
|
|
||||||
* trigger more readahead depending from the content, e.g.
|
|
||||||
* fetch the checksums for the extents in the leaf.
|
|
||||||
*/
|
|
||||||
} else {
|
|
||||||
/*
|
/*
|
||||||
* this is the error case, the extent buffer has not been
|
* this is the error case, the extent buffer has not been
|
||||||
* read correctly. We won't access anything from it and
|
* read correctly. We won't access anything from it and
|
||||||
* just cleanup our data structures. Effectively this will
|
* just cleanup our data structures. Effectively this will
|
||||||
* cut the branch below this node from read ahead.
|
* cut the branch below this node from read ahead.
|
||||||
*/
|
*/
|
||||||
nritems = 0;
|
if (err)
|
||||||
generation = 0;
|
goto cleanup;
|
||||||
}
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME: currently we just set nritems to 0 if this is a leaf,
|
||||||
|
* effectively ignoring the content. In a next step we could
|
||||||
|
* trigger more readahead depending from the content, e.g.
|
||||||
|
* fetch the checksums for the extents in the leaf.
|
||||||
|
*/
|
||||||
|
if (!level)
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
nritems = btrfs_header_nritems(eb);
|
||||||
|
generation = btrfs_header_generation(eb);
|
||||||
for (i = 0; i < nritems; i++) {
|
for (i = 0; i < nritems; i++) {
|
||||||
struct reada_extctl *rec;
|
struct reada_extctl *rec;
|
||||||
u64 n_gen;
|
u64 n_gen;
|
||||||
@ -188,7 +174,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
|||||||
*/
|
*/
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (rec->generation != generation) {
|
if (rec->generation != generation) {
|
||||||
btrfs_debug(root->fs_info,
|
btrfs_debug(fs_info,
|
||||||
"generation mismatch for (%llu,%d,%llu) %llu != %llu",
|
"generation mismatch for (%llu,%d,%llu) %llu != %llu",
|
||||||
key.objectid, key.type, key.offset,
|
key.objectid, key.type, key.offset,
|
||||||
rec->generation, generation);
|
rec->generation, generation);
|
||||||
@ -197,10 +183,11 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
|||||||
if (rec->generation == generation &&
|
if (rec->generation == generation &&
|
||||||
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
|
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
|
||||||
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
|
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
|
||||||
reada_add_block(rc, bytenr, &next_key,
|
reada_add_block(rc, bytenr, &next_key, n_gen);
|
||||||
level - 1, n_gen);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
/*
|
/*
|
||||||
* free extctl records
|
* free extctl records
|
||||||
*/
|
*/
|
||||||
@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
|||||||
|
|
||||||
reada_extent_put(fs_info, re); /* one ref for each entry */
|
reada_extent_put(fs_info, re); /* one ref for each entry */
|
||||||
}
|
}
|
||||||
reada_extent_put(fs_info, re); /* our ref */
|
|
||||||
if (for_dev)
|
|
||||||
atomic_dec(&for_dev->reada_in_flight);
|
|
||||||
|
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* start is passed separately in case eb in NULL, which may be the case with
|
* start is passed separately in case eb in NULL, which may be the case with
|
||||||
* failed I/O
|
* failed I/O
|
||||||
*/
|
*/
|
||||||
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
|
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
|
||||||
u64 start, int err)
|
struct extent_buffer *eb, u64 start, int err)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
struct reada_extent *re;
|
||||||
|
|
||||||
ret = __readahead_hook(root, eb, start, err);
|
/* find extent */
|
||||||
|
spin_lock(&fs_info->reada_lock);
|
||||||
|
re = radix_tree_lookup(&fs_info->reada_tree,
|
||||||
|
start >> PAGE_CACHE_SHIFT);
|
||||||
|
if (re)
|
||||||
|
re->refcnt++;
|
||||||
|
spin_unlock(&fs_info->reada_lock);
|
||||||
|
if (!re) {
|
||||||
|
ret = -1;
|
||||||
|
goto start_machine;
|
||||||
|
}
|
||||||
|
|
||||||
reada_start_machine(root->fs_info);
|
__readahead_hook(fs_info, re, eb, start, err);
|
||||||
|
reada_extent_put(fs_info, re); /* our ref */
|
||||||
|
|
||||||
|
start_machine:
|
||||||
|
reada_start_machine(fs_info);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
|
|||||||
spin_lock(&fs_info->reada_lock);
|
spin_lock(&fs_info->reada_lock);
|
||||||
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
|
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
|
||||||
logical >> PAGE_CACHE_SHIFT, 1);
|
logical >> PAGE_CACHE_SHIFT, 1);
|
||||||
if (ret == 1)
|
if (ret == 1 && logical >= zone->start && logical <= zone->end) {
|
||||||
kref_get(&zone->refcnt);
|
kref_get(&zone->refcnt);
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
|
|
||||||
if (ret == 1) {
|
|
||||||
if (logical >= zone->start && logical < zone->end)
|
|
||||||
return zone;
|
return zone;
|
||||||
spin_lock(&fs_info->reada_lock);
|
|
||||||
kref_put(&zone->refcnt, reada_zone_release);
|
|
||||||
spin_unlock(&fs_info->reada_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_unlock(&fs_info->reada_lock);
|
||||||
|
|
||||||
cache = btrfs_lookup_block_group(fs_info, logical);
|
cache = btrfs_lookup_block_group(fs_info, logical);
|
||||||
if (!cache)
|
if (!cache)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
|
|||||||
kfree(zone);
|
kfree(zone);
|
||||||
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
|
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
|
||||||
logical >> PAGE_CACHE_SHIFT, 1);
|
logical >> PAGE_CACHE_SHIFT, 1);
|
||||||
if (ret == 1)
|
if (ret == 1 && logical >= zone->start && logical <= zone->end)
|
||||||
kref_get(&zone->refcnt);
|
kref_get(&zone->refcnt);
|
||||||
|
else
|
||||||
|
zone = NULL;
|
||||||
}
|
}
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
|
|
||||||
@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
||||||
u64 logical,
|
u64 logical,
|
||||||
struct btrfs_key *top, int level)
|
struct btrfs_key *top)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct reada_extent *re = NULL;
|
struct reada_extent *re = NULL;
|
||||||
@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
u64 length;
|
u64 length;
|
||||||
int real_stripes;
|
int real_stripes;
|
||||||
int nzones = 0;
|
int nzones = 0;
|
||||||
int i;
|
|
||||||
unsigned long index = logical >> PAGE_CACHE_SHIFT;
|
unsigned long index = logical >> PAGE_CACHE_SHIFT;
|
||||||
int dev_replace_is_ongoing;
|
int dev_replace_is_ongoing;
|
||||||
|
int have_zone = 0;
|
||||||
|
|
||||||
spin_lock(&fs_info->reada_lock);
|
spin_lock(&fs_info->reada_lock);
|
||||||
re = radix_tree_lookup(&fs_info->reada_tree, index);
|
re = radix_tree_lookup(&fs_info->reada_tree, index);
|
||||||
@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
struct reada_zone *zone;
|
struct reada_zone *zone;
|
||||||
|
|
||||||
dev = bbio->stripes[nzones].dev;
|
dev = bbio->stripes[nzones].dev;
|
||||||
|
|
||||||
|
/* cannot read ahead on missing device. */
|
||||||
|
if (!dev->bdev)
|
||||||
|
continue;
|
||||||
|
|
||||||
zone = reada_find_zone(fs_info, dev, logical, bbio);
|
zone = reada_find_zone(fs_info, dev, logical, bbio);
|
||||||
if (!zone)
|
if (!zone)
|
||||||
break;
|
continue;
|
||||||
|
|
||||||
re->zones[nzones] = zone;
|
re->zones[re->nzones++] = zone;
|
||||||
spin_lock(&zone->lock);
|
spin_lock(&zone->lock);
|
||||||
if (!zone->elems)
|
if (!zone->elems)
|
||||||
kref_get(&zone->refcnt);
|
kref_get(&zone->refcnt);
|
||||||
@ -389,8 +390,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
kref_put(&zone->refcnt, reada_zone_release);
|
kref_put(&zone->refcnt, reada_zone_release);
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
}
|
}
|
||||||
re->nzones = nzones;
|
if (re->nzones == 0) {
|
||||||
if (nzones == 0) {
|
|
||||||
/* not a single zone found, error and out */
|
/* not a single zone found, error and out */
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -415,8 +415,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
prev_dev = NULL;
|
prev_dev = NULL;
|
||||||
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
|
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
|
||||||
&fs_info->dev_replace);
|
&fs_info->dev_replace);
|
||||||
for (i = 0; i < nzones; ++i) {
|
for (nzones = 0; nzones < re->nzones; ++nzones) {
|
||||||
dev = bbio->stripes[i].dev;
|
dev = re->zones[nzones]->device;
|
||||||
|
|
||||||
if (dev == prev_dev) {
|
if (dev == prev_dev) {
|
||||||
/*
|
/*
|
||||||
* in case of DUP, just add the first zone. As both
|
* in case of DUP, just add the first zone. As both
|
||||||
@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
*/
|
*/
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!dev->bdev) {
|
if (!dev->bdev)
|
||||||
/*
|
|
||||||
* cannot read ahead on missing device, but for RAID5/6,
|
|
||||||
* REQ_GET_READ_MIRRORS return 1. So don't skip missing
|
|
||||||
* device for such case.
|
|
||||||
*/
|
|
||||||
if (nzones > 1)
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
if (dev_replace_is_ongoing &&
|
if (dev_replace_is_ongoing &&
|
||||||
dev == fs_info->dev_replace.tgtdev) {
|
dev == fs_info->dev_replace.tgtdev) {
|
||||||
/*
|
/*
|
||||||
@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
prev_dev = dev;
|
prev_dev = dev;
|
||||||
ret = radix_tree_insert(&dev->reada_extents, index, re);
|
ret = radix_tree_insert(&dev->reada_extents, index, re);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
while (--i >= 0) {
|
while (--nzones >= 0) {
|
||||||
dev = bbio->stripes[i].dev;
|
dev = re->zones[nzones]->device;
|
||||||
BUG_ON(dev == NULL);
|
BUG_ON(dev == NULL);
|
||||||
/* ignore whether the entry was inserted */
|
/* ignore whether the entry was inserted */
|
||||||
radix_tree_delete(&dev->reada_extents, index);
|
radix_tree_delete(&dev->reada_extents, index);
|
||||||
@ -459,18 +454,21 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
|
|||||||
btrfs_dev_replace_unlock(&fs_info->dev_replace);
|
btrfs_dev_replace_unlock(&fs_info->dev_replace);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
have_zone = 1;
|
||||||
}
|
}
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
btrfs_dev_replace_unlock(&fs_info->dev_replace);
|
btrfs_dev_replace_unlock(&fs_info->dev_replace);
|
||||||
|
|
||||||
|
if (!have_zone)
|
||||||
|
goto error;
|
||||||
|
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bbio(bbio);
|
||||||
return re;
|
return re;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
while (nzones) {
|
for (nzones = 0; nzones < re->nzones; ++nzones) {
|
||||||
struct reada_zone *zone;
|
struct reada_zone *zone;
|
||||||
|
|
||||||
--nzones;
|
|
||||||
zone = re->zones[nzones];
|
zone = re->zones[nzones];
|
||||||
kref_get(&zone->refcnt);
|
kref_get(&zone->refcnt);
|
||||||
spin_lock(&zone->lock);
|
spin_lock(&zone->lock);
|
||||||
@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
|
|||||||
kref_put(&zone->refcnt, reada_zone_release);
|
kref_put(&zone->refcnt, reada_zone_release);
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
}
|
}
|
||||||
if (re->scheduled_for)
|
|
||||||
atomic_dec(&re->scheduled_for->reada_in_flight);
|
|
||||||
|
|
||||||
kfree(re);
|
kfree(re);
|
||||||
}
|
}
|
||||||
@ -556,13 +552,13 @@ static void reada_control_release(struct kref *kref)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int reada_add_block(struct reada_control *rc, u64 logical,
|
static int reada_add_block(struct reada_control *rc, u64 logical,
|
||||||
struct btrfs_key *top, int level, u64 generation)
|
struct btrfs_key *top, u64 generation)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = rc->root;
|
struct btrfs_root *root = rc->root;
|
||||||
struct reada_extent *re;
|
struct reada_extent *re;
|
||||||
struct reada_extctl *rec;
|
struct reada_extctl *rec;
|
||||||
|
|
||||||
re = reada_find_extent(root, logical, top, level); /* takes one ref */
|
re = reada_find_extent(root, logical, top); /* takes one ref */
|
||||||
if (!re)
|
if (!re)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
|
|||||||
u64 logical;
|
u64 logical;
|
||||||
int ret;
|
int ret;
|
||||||
int i;
|
int i;
|
||||||
int need_kick = 0;
|
|
||||||
|
|
||||||
spin_lock(&fs_info->reada_lock);
|
spin_lock(&fs_info->reada_lock);
|
||||||
if (dev->reada_curr_zone == NULL) {
|
if (dev->reada_curr_zone == NULL) {
|
||||||
@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
|
|||||||
*/
|
*/
|
||||||
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
|
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
|
||||||
dev->reada_next >> PAGE_CACHE_SHIFT, 1);
|
dev->reada_next >> PAGE_CACHE_SHIFT, 1);
|
||||||
if (ret == 0 || re->logical >= dev->reada_curr_zone->end) {
|
if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
|
||||||
ret = reada_pick_zone(dev);
|
ret = reada_pick_zone(dev);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
|
|
||||||
|
spin_lock(&re->lock);
|
||||||
|
if (re->scheduled || list_empty(&re->extctl)) {
|
||||||
|
spin_unlock(&re->lock);
|
||||||
|
reada_extent_put(fs_info, re);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
re->scheduled = 1;
|
||||||
|
spin_unlock(&re->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* find mirror num
|
* find mirror num
|
||||||
*/
|
*/
|
||||||
@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
|
|||||||
}
|
}
|
||||||
logical = re->logical;
|
logical = re->logical;
|
||||||
|
|
||||||
spin_lock(&re->lock);
|
|
||||||
if (re->scheduled_for == NULL) {
|
|
||||||
re->scheduled_for = dev;
|
|
||||||
need_kick = 1;
|
|
||||||
}
|
|
||||||
spin_unlock(&re->lock);
|
|
||||||
|
|
||||||
reada_extent_put(fs_info, re);
|
|
||||||
|
|
||||||
if (!need_kick)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
atomic_inc(&dev->reada_in_flight);
|
atomic_inc(&dev->reada_in_flight);
|
||||||
ret = reada_tree_block_flagged(fs_info->extent_root, logical,
|
ret = reada_tree_block_flagged(fs_info->extent_root, logical,
|
||||||
mirror_num, &eb);
|
mirror_num, &eb);
|
||||||
if (ret)
|
if (ret)
|
||||||
__readahead_hook(fs_info->extent_root, NULL, logical, ret);
|
__readahead_hook(fs_info, re, NULL, logical, ret);
|
||||||
else if (eb)
|
else if (eb)
|
||||||
__readahead_hook(fs_info->extent_root, eb, eb->start, ret);
|
__readahead_hook(fs_info, re, eb, eb->start, ret);
|
||||||
|
|
||||||
if (eb)
|
if (eb)
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
|
|
||||||
|
atomic_dec(&dev->reada_in_flight);
|
||||||
|
reada_extent_put(fs_info, re);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
|
|||||||
set_task_ioprio(current, BTRFS_IOPRIO_READA);
|
set_task_ioprio(current, BTRFS_IOPRIO_READA);
|
||||||
__reada_start_machine(fs_info);
|
__reada_start_machine(fs_info);
|
||||||
set_task_ioprio(current, old_ioprio);
|
set_task_ioprio(current, old_ioprio);
|
||||||
|
|
||||||
|
atomic_dec(&fs_info->reada_works_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
|
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
|
||||||
@ -783,8 +780,12 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
|
|||||||
* enqueue to workers to finish it. This will distribute the load to
|
* enqueue to workers to finish it. This will distribute the load to
|
||||||
* the cores.
|
* the cores.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < 2; ++i)
|
for (i = 0; i < 2; ++i) {
|
||||||
reada_start_machine(fs_info);
|
reada_start_machine(fs_info);
|
||||||
|
if (atomic_read(&fs_info->reada_works_cnt) >
|
||||||
|
BTRFS_MAX_MIRRORS * 2)
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void reada_start_machine(struct btrfs_fs_info *fs_info)
|
static void reada_start_machine(struct btrfs_fs_info *fs_info)
|
||||||
@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
|
|||||||
rmw->fs_info = fs_info;
|
rmw->fs_info = fs_info;
|
||||||
|
|
||||||
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
|
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
|
||||||
|
atomic_inc(&fs_info->reada_works_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
|||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
break;
|
break;
|
||||||
printk(KERN_DEBUG
|
printk(KERN_DEBUG
|
||||||
" re: logical %llu size %u empty %d for %lld",
|
" re: logical %llu size %u empty %d scheduled %d",
|
||||||
re->logical, fs_info->tree_root->nodesize,
|
re->logical, fs_info->tree_root->nodesize,
|
||||||
list_empty(&re->extctl), re->scheduled_for ?
|
list_empty(&re->extctl), re->scheduled);
|
||||||
re->scheduled_for->devid : -1);
|
|
||||||
|
|
||||||
for (i = 0; i < re->nzones; ++i) {
|
for (i = 0; i < re->nzones; ++i) {
|
||||||
printk(KERN_CONT " zone %llu-%llu devs",
|
printk(KERN_CONT " zone %llu-%llu devs",
|
||||||
@ -878,19 +879,14 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
|||||||
index, 1);
|
index, 1);
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
break;
|
break;
|
||||||
if (!re->scheduled_for) {
|
if (!re->scheduled) {
|
||||||
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
|
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
printk(KERN_DEBUG
|
printk(KERN_DEBUG
|
||||||
"re: logical %llu size %u list empty %d for %lld",
|
"re: logical %llu size %u list empty %d scheduled %d",
|
||||||
re->logical, fs_info->tree_root->nodesize,
|
re->logical, fs_info->tree_root->nodesize,
|
||||||
list_empty(&re->extctl),
|
list_empty(&re->extctl), re->scheduled);
|
||||||
re->scheduled_for ? re->scheduled_for->devid : -1);
|
|
||||||
for (i = 0; i < re->nzones; ++i) {
|
|
||||||
printk(KERN_CONT " zone %llu-%llu devs",
|
|
||||||
re->zones[i]->start,
|
|
||||||
re->zones[i]->end);
|
|
||||||
for (i = 0; i < re->nzones; ++i) {
|
for (i = 0; i < re->nzones; ++i) {
|
||||||
printk(KERN_CONT " zone %llu-%llu devs",
|
printk(KERN_CONT " zone %llu-%llu devs",
|
||||||
re->zones[i]->start,
|
re->zones[i]->start,
|
||||||
@ -900,7 +896,6 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
|||||||
re->zones[i]->devs[j]->devid);
|
re->zones[i]->devs[j]->devid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
printk(KERN_CONT "\n");
|
printk(KERN_CONT "\n");
|
||||||
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
|
index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
|
||||||
}
|
}
|
||||||
@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
|||||||
struct reada_control *rc;
|
struct reada_control *rc;
|
||||||
u64 start;
|
u64 start;
|
||||||
u64 generation;
|
u64 generation;
|
||||||
int level;
|
|
||||||
int ret;
|
int ret;
|
||||||
struct extent_buffer *node;
|
struct extent_buffer *node;
|
||||||
static struct btrfs_key max_key = {
|
static struct btrfs_key max_key = {
|
||||||
@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
|||||||
|
|
||||||
node = btrfs_root_node(root);
|
node = btrfs_root_node(root);
|
||||||
start = node->start;
|
start = node->start;
|
||||||
level = btrfs_header_level(node);
|
|
||||||
generation = btrfs_header_generation(node);
|
generation = btrfs_header_generation(node);
|
||||||
free_extent_buffer(node);
|
free_extent_buffer(node);
|
||||||
|
|
||||||
ret = reada_add_block(rc, start, &max_key, level, generation);
|
ret = reada_add_block(rc, start, &max_key, generation);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
kfree(rc);
|
kfree(rc);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
|||||||
int btrfs_reada_wait(void *handle)
|
int btrfs_reada_wait(void *handle)
|
||||||
{
|
{
|
||||||
struct reada_control *rc = handle;
|
struct reada_control *rc = handle;
|
||||||
|
struct btrfs_fs_info *fs_info = rc->root->fs_info;
|
||||||
|
|
||||||
while (atomic_read(&rc->elems)) {
|
while (atomic_read(&rc->elems)) {
|
||||||
|
if (!atomic_read(&fs_info->reada_works_cnt))
|
||||||
|
reada_start_machine(fs_info);
|
||||||
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
|
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
|
||||||
5 * HZ);
|
5 * HZ);
|
||||||
dump_devs(rc->root->fs_info,
|
dump_devs(rc->root->fs_info,
|
||||||
@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
|
|||||||
int btrfs_reada_wait(void *handle)
|
int btrfs_reada_wait(void *handle)
|
||||||
{
|
{
|
||||||
struct reada_control *rc = handle;
|
struct reada_control *rc = handle;
|
||||||
|
struct btrfs_fs_info *fs_info = rc->root->fs_info;
|
||||||
|
|
||||||
while (atomic_read(&rc->elems)) {
|
while (atomic_read(&rc->elems)) {
|
||||||
wait_event(rc->wait, atomic_read(&rc->elems) == 0);
|
if (!atomic_read(&fs_info->reada_works_cnt))
|
||||||
|
reada_start_machine(fs_info);
|
||||||
|
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
|
||||||
|
(HZ + 9) / 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
kref_put(&rc->refcnt, reada_control_release);
|
kref_put(&rc->refcnt, reada_control_release);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user