btrfs: fix deadlock with extent-same and readpage

->readpage() does page_lock() before extent_lock(), we do the opposite in
extent-same. We want to reverse the order in btrfs_extent_same() but it's
not quite straightforward since the page locks are taken inside btrfs_cmp_data().

So I split btrfs_cmp_data() into 3 parts with a small context structure that
is passed between them. The first, btrfs_cmp_data_prepare() gathers up the
pages needed (taking page lock as required) and puts them on our context
structure. At this point, we are safe to lock the extent range. Afterwards,
we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free()
to clean up our context.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
Mark Fasheh 2015-06-30 14:42:05 -07:00 committed by Chris Mason
parent 207910ddee
commit f441460202

View File

@ -2765,14 +2765,11 @@ out:
return ret; return ret;
} }
static struct page *extent_same_get_page(struct inode *inode, u64 off) static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
{ {
struct page *page; struct page *page;
pgoff_t index;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
index = off >> PAGE_CACHE_SHIFT;
page = grab_cache_page(inode->i_mapping, index); page = grab_cache_page(inode->i_mapping, index);
if (!page) if (!page)
return NULL; return NULL;
@ -2793,6 +2790,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
return page; return page;
} }
static int gather_extent_pages(struct inode *inode, struct page **pages,
int num_pages, u64 off)
{
int i;
pgoff_t index = off >> PAGE_CACHE_SHIFT;
for (i = 0; i < num_pages; i++) {
pages[i] = extent_same_get_page(inode, index + i);
if (!pages[i])
return -ENOMEM;
}
return 0;
}
static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
{ {
/* do any pending delalloc/csum calc on src, one way or /* do any pending delalloc/csum calc on src, one way or
@ -2818,52 +2829,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
} }
} }
static void btrfs_double_unlock(struct inode *inode1, u64 loff1, static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
struct inode *inode2, u64 loff2, u64 len)
{ {
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode1->i_mutex);
mutex_unlock(&inode2->i_mutex); mutex_unlock(&inode2->i_mutex);
} }
static void btrfs_double_lock(struct inode *inode1, u64 loff1, static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
struct inode *inode2, u64 loff2, u64 len) {
if (inode1 < inode2)
swap(inode1, inode2);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
if (inode1 != inode2)
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
}
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len)
{
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
}
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len)
{ {
if (inode1 < inode2) { if (inode1 < inode2) {
swap(inode1, inode2); swap(inode1, inode2);
swap(loff1, loff2); swap(loff1, loff2);
} }
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
lock_extent_range(inode1, loff1, len); lock_extent_range(inode1, loff1, len);
if (inode1 != inode2) { if (inode1 != inode2)
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
lock_extent_range(inode2, loff2, len); lock_extent_range(inode2, loff2, len);
}
struct cmp_pages {
int num_pages;
struct page **src_pages;
struct page **dst_pages;
};
static void btrfs_cmp_data_free(struct cmp_pages *cmp)
{
int i;
struct page *pg;
for (i = 0; i < cmp->num_pages; i++) {
pg = cmp->src_pages[i];
if (pg)
page_cache_release(pg);
pg = cmp->dst_pages[i];
if (pg)
page_cache_release(pg);
} }
kfree(cmp->src_pages);
kfree(cmp->dst_pages);
}
static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
struct inode *dst, u64 dst_loff,
u64 len, struct cmp_pages *cmp)
{
int ret;
int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
struct page **src_pgarr, **dst_pgarr;
/*
* We must gather up all the pages before we initiate our
* extent locking. We use an array for the page pointers. Size
* of the array is bounded by len, which is in turn bounded by
* BTRFS_MAX_DEDUPE_LEN.
*/
src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
if (!src_pgarr || !dst_pgarr) {
kfree(src_pgarr);
kfree(dst_pgarr);
return -ENOMEM;
}
cmp->num_pages = num_pages;
cmp->src_pages = src_pgarr;
cmp->dst_pages = dst_pgarr;
ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
if (ret)
goto out;
ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
out:
if (ret)
btrfs_cmp_data_free(cmp);
return 0;
} }
static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
u64 dst_loff, u64 len) u64 dst_loff, u64 len, struct cmp_pages *cmp)
{ {
int ret = 0; int ret = 0;
int i;
struct page *src_page, *dst_page; struct page *src_page, *dst_page;
unsigned int cmp_len = PAGE_CACHE_SIZE; unsigned int cmp_len = PAGE_CACHE_SIZE;
void *addr, *dst_addr; void *addr, *dst_addr;
i = 0;
while (len) { while (len) {
if (len < PAGE_CACHE_SIZE) if (len < PAGE_CACHE_SIZE)
cmp_len = len; cmp_len = len;
src_page = extent_same_get_page(src, loff); BUG_ON(i >= cmp->num_pages);
if (!src_page)
return -EINVAL; src_page = cmp->src_pages[i];
dst_page = extent_same_get_page(dst, dst_loff); dst_page = cmp->dst_pages[i];
if (!dst_page) {
page_cache_release(src_page);
return -EINVAL;
}
addr = kmap_atomic(src_page); addr = kmap_atomic(src_page);
dst_addr = kmap_atomic(dst_page); dst_addr = kmap_atomic(dst_page);
@ -2875,15 +2954,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
kunmap_atomic(addr); kunmap_atomic(addr);
kunmap_atomic(dst_addr); kunmap_atomic(dst_addr);
page_cache_release(src_page);
page_cache_release(dst_page);
if (ret) if (ret)
break; break;
loff += cmp_len;
dst_loff += cmp_len;
len -= cmp_len; len -= cmp_len;
i++;
} }
return ret; return ret;
@ -2914,6 +2990,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
{ {
int ret; int ret;
u64 len = olen; u64 len = olen;
struct cmp_pages cmp;
/* /*
* btrfs_clone() can't handle extents in the same file * btrfs_clone() can't handle extents in the same file
@ -2926,7 +3003,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
if (len == 0) if (len == 0)
return 0; return 0;
btrfs_double_lock(src, loff, dst, dst_loff, len); btrfs_double_inode_lock(src, dst);
ret = extent_same_check_offsets(src, loff, &len, olen); ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret) if (ret)
@ -2943,13 +3020,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
goto out_unlock; goto out_unlock;
} }
ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
if (ret)
goto out_unlock;
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
/* pass original length for comparison so we stay within i_size */ /* pass original length for comparison so we stay within i_size */
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
if (ret == 0) if (ret == 0)
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
btrfs_cmp_data_free(&cmp);
out_unlock: out_unlock:
btrfs_double_unlock(src, loff, dst, dst_loff, len); btrfs_double_inode_unlock(src, dst);
return ret; return ret;
} }