Btrfs: Link block groups of different raid types
The size of reserved space is stored in space_info. If block groups of different raid types are linked to separate space_info, changing allocation profile will corrupt reserved space accounting. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
e40152ee1e
commit
b742bb82f1
@ -663,6 +663,7 @@ struct btrfs_csum_item {
|
||||
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
|
||||
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
|
||||
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
|
||||
#define BTRFS_NR_RAID_TYPES 5
|
||||
|
||||
struct btrfs_block_group_item {
|
||||
__le64 used;
|
||||
@ -674,7 +675,8 @@ struct btrfs_space_info {
|
||||
u64 flags;
|
||||
|
||||
u64 total_bytes; /* total bytes in the space */
|
||||
u64 bytes_used; /* total bytes used on disk */
|
||||
u64 bytes_used; /* total bytes used,
|
||||
this does't take mirrors into account */
|
||||
u64 bytes_pinned; /* total bytes pinned, will be freed when the
|
||||
transaction finishes */
|
||||
u64 bytes_reserved; /* total bytes the allocator has reserved for
|
||||
@ -687,6 +689,7 @@ struct btrfs_space_info {
|
||||
delalloc/allocations */
|
||||
u64 bytes_delalloc; /* number of bytes currently reserved for
|
||||
delayed allocation */
|
||||
u64 disk_used; /* total bytes used on disk */
|
||||
|
||||
int full; /* indicates that we cannot allocate any more
|
||||
chunks for this space */
|
||||
@ -704,7 +707,7 @@ struct btrfs_space_info {
|
||||
int flushing;
|
||||
|
||||
/* for block groups in our same type */
|
||||
struct list_head block_groups;
|
||||
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
|
||||
spinlock_t lock;
|
||||
struct rw_semaphore groups_sem;
|
||||
atomic_t caching_threads;
|
||||
|
@ -507,6 +507,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
|
||||
struct list_head *head = &info->space_info;
|
||||
struct btrfs_space_info *found;
|
||||
|
||||
flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
|
||||
BTRFS_BLOCK_GROUP_METADATA;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(found, head, list) {
|
||||
if (found->flags == flags) {
|
||||
@ -2660,12 +2663,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
struct btrfs_space_info **space_info)
|
||||
{
|
||||
struct btrfs_space_info *found;
|
||||
int i;
|
||||
int factor;
|
||||
|
||||
if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
factor = 2;
|
||||
else
|
||||
factor = 1;
|
||||
|
||||
found = __find_space_info(info, flags);
|
||||
if (found) {
|
||||
spin_lock(&found->lock);
|
||||
found->total_bytes += total_bytes;
|
||||
found->bytes_used += bytes_used;
|
||||
found->disk_used += bytes_used * factor;
|
||||
found->full = 0;
|
||||
spin_unlock(&found->lock);
|
||||
*space_info = found;
|
||||
@ -2675,14 +2687,18 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
if (!found)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&found->block_groups);
|
||||
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
|
||||
INIT_LIST_HEAD(&found->block_groups[i]);
|
||||
init_rwsem(&found->groups_sem);
|
||||
init_waitqueue_head(&found->flush_wait);
|
||||
init_waitqueue_head(&found->allocate_wait);
|
||||
spin_lock_init(&found->lock);
|
||||
found->flags = flags;
|
||||
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
|
||||
BTRFS_BLOCK_GROUP_SYSTEM |
|
||||
BTRFS_BLOCK_GROUP_METADATA);
|
||||
found->total_bytes = total_bytes;
|
||||
found->bytes_used = bytes_used;
|
||||
found->disk_used = bytes_used * factor;
|
||||
found->bytes_pinned = 0;
|
||||
found->bytes_reserved = 0;
|
||||
found->bytes_readonly = 0;
|
||||
@ -2752,26 +2768,32 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
return flags;
|
||||
}
|
||||
|
||||
static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
|
||||
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
{
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
u64 alloc_profile;
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
flags |= root->fs_info->avail_data_alloc_bits &
|
||||
root->fs_info->data_alloc_profile;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
flags |= root->fs_info->avail_system_alloc_bits &
|
||||
root->fs_info->system_alloc_profile;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
flags |= root->fs_info->avail_metadata_alloc_bits &
|
||||
root->fs_info->metadata_alloc_profile;
|
||||
return btrfs_reduce_alloc_profile(root, flags);
|
||||
}
|
||||
|
||||
if (data) {
|
||||
alloc_profile = info->avail_data_alloc_bits &
|
||||
info->data_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
|
||||
} else if (root == root->fs_info->chunk_root) {
|
||||
alloc_profile = info->avail_system_alloc_bits &
|
||||
info->system_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
|
||||
} else {
|
||||
alloc_profile = info->avail_metadata_alloc_bits &
|
||||
info->metadata_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
|
||||
}
|
||||
static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
|
||||
{
|
||||
u64 flags;
|
||||
|
||||
return btrfs_reduce_alloc_profile(root, data);
|
||||
if (data)
|
||||
flags = BTRFS_BLOCK_GROUP_DATA;
|
||||
else if (root == root->fs_info->chunk_root)
|
||||
flags = BTRFS_BLOCK_GROUP_SYSTEM;
|
||||
else
|
||||
flags = BTRFS_BLOCK_GROUP_METADATA;
|
||||
|
||||
return get_alloc_profile(root, flags);
|
||||
}
|
||||
|
||||
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
|
||||
@ -3468,6 +3490,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
int factor;
|
||||
u64 total = num_bytes;
|
||||
u64 old_val;
|
||||
u64 byte_in_group;
|
||||
@ -3486,6 +3509,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
|
||||
cache = btrfs_lookup_block_group(info, bytenr);
|
||||
if (!cache)
|
||||
return -1;
|
||||
if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
factor = 2;
|
||||
else
|
||||
factor = 1;
|
||||
byte_in_group = bytenr - cache->key.objectid;
|
||||
WARN_ON(byte_in_group > cache->key.offset);
|
||||
|
||||
@ -3498,18 +3527,20 @@ static int update_block_group(struct btrfs_trans_handle *trans,
|
||||
old_val += num_bytes;
|
||||
btrfs_set_block_group_used(&cache->item, old_val);
|
||||
cache->reserved -= num_bytes;
|
||||
cache->space_info->bytes_used += num_bytes;
|
||||
cache->space_info->bytes_reserved -= num_bytes;
|
||||
cache->space_info->bytes_used += num_bytes;
|
||||
cache->space_info->disk_used += num_bytes * factor;
|
||||
if (cache->ro)
|
||||
cache->space_info->bytes_readonly -= num_bytes;
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
} else {
|
||||
old_val -= num_bytes;
|
||||
btrfs_set_block_group_used(&cache->item, old_val);
|
||||
cache->space_info->bytes_used -= num_bytes;
|
||||
cache->space_info->disk_used -= num_bytes * factor;
|
||||
if (cache->ro)
|
||||
cache->space_info->bytes_readonly += num_bytes;
|
||||
btrfs_set_block_group_used(&cache->item, old_val);
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
if (mark_free) {
|
||||
@ -4134,6 +4165,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_block_group_index(struct btrfs_block_group_cache *cache)
|
||||
{
|
||||
int index;
|
||||
if (cache->flags & BTRFS_BLOCK_GROUP_RAID10)
|
||||
index = 0;
|
||||
else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1)
|
||||
index = 1;
|
||||
else if (cache->flags & BTRFS_BLOCK_GROUP_DUP)
|
||||
index = 2;
|
||||
else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0)
|
||||
index = 3;
|
||||
else
|
||||
index = 4;
|
||||
return index;
|
||||
}
|
||||
|
||||
enum btrfs_loop_type {
|
||||
LOOP_FIND_IDEAL = 0,
|
||||
LOOP_CACHING_NOWAIT = 1,
|
||||
@ -4167,6 +4214,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
|
||||
int done_chunk_alloc = 0;
|
||||
struct btrfs_space_info *space_info;
|
||||
int last_ptr_loop = 0;
|
||||
int index = 0;
|
||||
int loop = 0;
|
||||
bool found_uncached_bg = false;
|
||||
bool failed_cluster_refill = false;
|
||||
@ -4237,6 +4285,7 @@ ideal_cache:
|
||||
btrfs_put_block_group(block_group);
|
||||
up_read(&space_info->groups_sem);
|
||||
} else {
|
||||
index = get_block_group_index(block_group);
|
||||
goto have_block_group;
|
||||
}
|
||||
} else if (block_group) {
|
||||
@ -4245,7 +4294,8 @@ ideal_cache:
|
||||
}
|
||||
search:
|
||||
down_read(&space_info->groups_sem);
|
||||
list_for_each_entry(block_group, &space_info->block_groups, list) {
|
||||
list_for_each_entry(block_group, &space_info->block_groups[index],
|
||||
list) {
|
||||
u64 offset;
|
||||
int cached;
|
||||
|
||||
@ -4468,10 +4518,14 @@ checks:
|
||||
loop:
|
||||
failed_cluster_refill = false;
|
||||
failed_alloc = false;
|
||||
BUG_ON(index != get_block_group_index(block_group));
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
up_read(&space_info->groups_sem);
|
||||
|
||||
if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
|
||||
goto search;
|
||||
|
||||
/* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
|
||||
* for them to make caching progress. Also
|
||||
* determine the best possible bg to cache
|
||||
@ -4485,6 +4539,7 @@ loop:
|
||||
if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
|
||||
(found_uncached_bg || empty_size || empty_cluster ||
|
||||
allowed_chunk_alloc)) {
|
||||
index = 0;
|
||||
if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
|
||||
found_uncached_bg = false;
|
||||
loop++;
|
||||
@ -4567,6 +4622,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
|
||||
int dump_block_groups)
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
int index = 0;
|
||||
|
||||
spin_lock(&info->lock);
|
||||
printk(KERN_INFO "space_info has %llu free, is %sfull\n",
|
||||
@ -4591,7 +4647,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
|
||||
return;
|
||||
|
||||
down_read(&info->groups_sem);
|
||||
list_for_each_entry(cache, &info->block_groups, list) {
|
||||
again:
|
||||
list_for_each_entry(cache, &info->block_groups[index], list) {
|
||||
spin_lock(&cache->lock);
|
||||
printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
|
||||
"%llu pinned %llu reserved\n",
|
||||
@ -4603,6 +4660,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
|
||||
btrfs_dump_free_space(cache, bytes);
|
||||
spin_unlock(&cache->lock);
|
||||
}
|
||||
if (++index < BTRFS_NR_RAID_TYPES)
|
||||
goto again;
|
||||
up_read(&info->groups_sem);
|
||||
}
|
||||
|
||||
@ -7447,6 +7506,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __link_block_group(struct btrfs_space_info *space_info,
|
||||
struct btrfs_block_group_cache *cache)
|
||||
{
|
||||
int index = get_block_group_index(cache);
|
||||
|
||||
down_write(&space_info->groups_sem);
|
||||
list_add_tail(&cache->list, &space_info->block_groups[index]);
|
||||
up_write(&space_info->groups_sem);
|
||||
}
|
||||
|
||||
int btrfs_read_block_groups(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
@ -7468,10 +7537,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
|
||||
|
||||
while (1) {
|
||||
ret = find_first_block_group(root, path, &key);
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
goto error;
|
||||
}
|
||||
if (ret > 0)
|
||||
break;
|
||||
if (ret != 0)
|
||||
goto error;
|
||||
|
||||
@ -7540,9 +7607,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
|
||||
cache->space_info->bytes_super += cache->bytes_super;
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
|
||||
down_write(&space_info->groups_sem);
|
||||
list_add_tail(&cache->list, &space_info->block_groups);
|
||||
up_write(&space_info->groups_sem);
|
||||
__link_block_group(space_info, cache);
|
||||
|
||||
ret = btrfs_add_block_group_cache(root->fs_info, cache);
|
||||
BUG_ON(ret);
|
||||
@ -7551,6 +7616,22 @@ int btrfs_read_block_groups(struct btrfs_root *root)
|
||||
if (btrfs_chunk_readonly(root, cache->key.objectid))
|
||||
set_block_group_readonly(cache);
|
||||
}
|
||||
|
||||
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
|
||||
if (!(get_alloc_profile(root, space_info->flags) &
|
||||
(BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_DUP)))
|
||||
continue;
|
||||
/*
|
||||
* avoid allocating from un-mirrored block group if there are
|
||||
* mirrored block groups.
|
||||
*/
|
||||
list_for_each_entry(cache, &space_info->block_groups[3], list)
|
||||
set_block_group_readonly(cache);
|
||||
list_for_each_entry(cache, &space_info->block_groups[4], list)
|
||||
set_block_group_readonly(cache);
|
||||
}
|
||||
ret = 0;
|
||||
error:
|
||||
btrfs_free_path(path);
|
||||
@ -7614,9 +7695,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
||||
cache->space_info->bytes_super += cache->bytes_super;
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
|
||||
down_write(&cache->space_info->groups_sem);
|
||||
list_add_tail(&cache->list, &cache->space_info->block_groups);
|
||||
up_write(&cache->space_info->groups_sem);
|
||||
__link_block_group(cache->space_info, cache);
|
||||
|
||||
ret = btrfs_add_block_group_cache(root->fs_info, cache);
|
||||
BUG_ON(ret);
|
||||
|
@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
struct list_head *head = &root->fs_info->space_info;
|
||||
struct btrfs_space_info *found;
|
||||
u64 total_used = 0;
|
||||
u64 data_used = 0;
|
||||
int bits = dentry->d_sb->s_blocksize_bits;
|
||||
__be32 *fsid = (__be32 *)root->fs_info->fsid;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(found, head, list) {
|
||||
if (found->flags & (BTRFS_BLOCK_GROUP_DUP|
|
||||
BTRFS_BLOCK_GROUP_RAID10|
|
||||
BTRFS_BLOCK_GROUP_RAID1)) {
|
||||
total_used += found->bytes_used;
|
||||
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
data_used += found->bytes_used;
|
||||
else
|
||||
data_used += found->total_bytes;
|
||||
}
|
||||
|
||||
total_used += found->bytes_used;
|
||||
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
data_used += found->bytes_used;
|
||||
else
|
||||
data_used += found->total_bytes;
|
||||
}
|
||||
list_for_each_entry_rcu(found, head, list)
|
||||
total_used += found->disk_used;
|
||||
rcu_read_unlock();
|
||||
|
||||
buf->f_namelen = BTRFS_NAME_LEN;
|
||||
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
|
||||
buf->f_bfree = buf->f_blocks - (total_used >> bits);
|
||||
buf->f_bavail = buf->f_blocks - (data_used >> bits);
|
||||
buf->f_bavail = buf->f_bfree;
|
||||
buf->f_bsize = dentry->d_sb->s_blocksize;
|
||||
buf->f_type = BTRFS_SUPER_MAGIC;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user