New code for 5.10:
- Don't WARN_ON weird states that unprivileged users can create. - Don't invalidate page cache when direct writes want to fall back to buffered. - Fix some problems when readahead ios fail. - Fix a problem where inline data pages weren't getting flushed during an unshare operation. - Rework iomap to support arbitrarily many blocks per page in preparation to support THP for the page cache. - Fix a bug in the blocksize < pagesize buffered io path where we could fail to initialize the many-blocks-per-page uptodate bitmap correctly when the backing page is actually up to date. This could cause us to forget to write out dirty pages. - Split out the generic_write_sync at the end of the directio write path so that btrfs can drop the inode lock before sync'ing the file. - Call inode_dio_end before trying to sync the file after a O_DSYNC direct write (instead of afterwards) to match the behavior of the old directio code. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAl9yB04ACgkQ+H93GTRK tOuZxw/+IrBV3HV45PtqQX+HC2F4ebax26cIJrmCQD0neiu16I7H3COjIGN/YOGw bN04VirC3bG4BtzVHO/eRHQOCwCevIpP3LkhT6yOfOgkO4Z9Xn/O7E+7uYtgT5Qi dBqOFe/aoB6+uHEHaioWUTxF1MlsVqEK/yPWjbSIdQGKFVE03Azj4V5QHtBouF2+ pNEk7lbBnF0ua3biambeyDO3JTR9dsziIPH8QzQ4M/fMuNLfR2v0s6d4Ol/ndVrC Lp3RtThLcioAXh8xSPMO6RVUFfK97SLgNCRngApFbIJn85z9yq7eI7llnhO+XcHF FBJ+XottlwJFDt+0xNUaHmjkfUH9GoK8VeFOd3zHvp6xgZZpDkjG2JJk9ZC8Qnn5 xg4grGngWshNdxFBf8S/O73bAJ1SyRcD5ePYGyMfiij3beGJ0aulKGoYOdDfC/4c hHcUc8XpjHSobg5gklQijBif0WIQos1Z4OyDK9d2LqrJOO0NUypO/t2YIdgPFzkj rXLmWlKsUYSZyefI5Z8q0AVy7TQGxstS9poC3lkXlsszQ1E5BNup0/bhCGTgCW+5 az9m41KXxPEDLxieOvIAUhHSSP02IAGQ9Lvvat1GnGfEqShAEWS/IvmIxHDbvyNW lZ0NLqNKsItKBH0oIPsrP7fHz2ES1hUIMIaLbApUwKpUcAxrCLY= =ocIt -----END PGP SIGNATURE----- Merge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull iomap updates from Darrick Wong: "There's not a lot of new stuff going on here -- a little bit of code refactoring to make iomap workable with btrfs' fsync locking model, cleanups in preparation for adding THP support for filesystems, and fixing a data corruption issue for blocksize < pagesize filesystems. Summary: - Don't WARN_ON weird states that unprivileged users can create. - Don't invalidate page cache when direct writes want to fall back to buffered. - Fix some problems when readahead ios fail. - Fix a problem where inline data pages weren't getting flushed during an unshare operation. - Rework iomap to support arbitrarily many blocks per page in preparation to support THP for the page cache. - Fix a bug in the blocksize < pagesize buffered io path where we could fail to initialize the many-blocks-per-page uptodate bitmap correctly when the backing page is actually up to date. This could cause us to forget to write out dirty pages. - Split out the generic_write_sync at the end of the directio write path so that btrfs can drop the inode lock before sync'ing the file. - Call inode_dio_end before trying to sync the file after a O_DSYNC direct write (instead of afterwards) to match the behavior of the old directio code" * tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: iomap: Call inode_dio_end() before generic_write_sync() iomap: Allow filesystem to call iomap_dio_complete without i_rwsem iomap: Set all uptodate bits for an Uptodate page iomap: Change calling convention for zeroing iomap: Convert iomap_write_end types iomap: Convert write_count to write_bytes_pending iomap: Convert read_count to read_bytes_pending iomap: Support arbitrarily many blocks per page iomap: Use bitmap ops to set uptodate bits iomap: Use kzalloc to allocate iomap_page fs: Introduce i_blocks_per_page iomap: Fix misplaced page flushing iomap: Use round_down/round_up macros in __iomap_write_begin iomap: Mark read blocks uptodate in write_begin iomap: Clear page error before beginning a write iomap: Fix direct I/O write consistency check iomap: fix WARN_ON_ONCE() from unprivileged users
This commit is contained in:
commit
37187df45a
13
fs/dax.c
13
fs/dax.c
@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
||||
struct iomap *iomap)
|
||||
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
|
||||
{
|
||||
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
|
||||
pgoff_t pgoff;
|
||||
long rc, id;
|
||||
void *kaddr;
|
||||
bool page_aligned = false;
|
||||
|
||||
unsigned offset = offset_in_page(pos);
|
||||
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
||||
|
||||
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
|
||||
IS_ALIGNED(size, PAGE_SIZE))
|
||||
(size == PAGE_SIZE))
|
||||
page_aligned = true;
|
||||
|
||||
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
|
||||
@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
||||
id = dax_read_lock();
|
||||
|
||||
if (page_aligned)
|
||||
rc = dax_zero_page_range(iomap->dax_dev, pgoff,
|
||||
size >> PAGE_SHIFT);
|
||||
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
|
||||
else
|
||||
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
if (rc < 0) {
|
||||
@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
||||
dax_flush(iomap->dax_dev, kaddr + offset, size);
|
||||
}
|
||||
dax_read_unlock(id);
|
||||
return 0;
|
||||
return size;
|
||||
}
|
||||
|
||||
static loff_t
|
||||
|
@ -22,18 +22,25 @@
|
||||
#include "../internal.h"
|
||||
|
||||
/*
|
||||
* Structure allocated for each page when block size < PAGE_SIZE to track
|
||||
* sub-page uptodate status and I/O completions.
|
||||
* Structure allocated for each page or THP when block size < page size
|
||||
* to track sub-page uptodate status and I/O completions.
|
||||
*/
|
||||
struct iomap_page {
|
||||
atomic_t read_count;
|
||||
atomic_t write_count;
|
||||
atomic_t read_bytes_pending;
|
||||
atomic_t write_bytes_pending;
|
||||
spinlock_t uptodate_lock;
|
||||
DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
|
||||
unsigned long uptodate[];
|
||||
};
|
||||
|
||||
static inline struct iomap_page *to_iomap_page(struct page *page)
|
||||
{
|
||||
/*
|
||||
* per-block data is stored in the head page. Callers should
|
||||
* not be dealing with tail pages (and if they are, they can
|
||||
* call thp_head() first.
|
||||
*/
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
|
||||
if (page_has_private(page))
|
||||
return (struct iomap_page *)page_private(page);
|
||||
return NULL;
|
||||
@ -45,20 +52,16 @@ static struct iomap_page *
|
||||
iomap_page_create(struct inode *inode, struct page *page)
|
||||
{
|
||||
struct iomap_page *iop = to_iomap_page(page);
|
||||
unsigned int nr_blocks = i_blocks_per_page(inode, page);
|
||||
|
||||
if (iop || i_blocksize(inode) == PAGE_SIZE)
|
||||
if (iop || nr_blocks <= 1)
|
||||
return iop;
|
||||
|
||||
iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
|
||||
atomic_set(&iop->read_count, 0);
|
||||
atomic_set(&iop->write_count, 0);
|
||||
iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
spin_lock_init(&iop->uptodate_lock);
|
||||
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
|
||||
|
||||
/*
|
||||
* migrate_page_move_mapping() assumes that pages with private data have
|
||||
* their count elevated by 1.
|
||||
*/
|
||||
if (PageUptodate(page))
|
||||
bitmap_fill(iop->uptodate, nr_blocks);
|
||||
attach_page_private(page, iop);
|
||||
return iop;
|
||||
}
|
||||
@ -67,11 +70,14 @@ static void
|
||||
iomap_page_release(struct page *page)
|
||||
{
|
||||
struct iomap_page *iop = detach_page_private(page);
|
||||
unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
|
||||
|
||||
if (!iop)
|
||||
return;
|
||||
WARN_ON_ONCE(atomic_read(&iop->read_count));
|
||||
WARN_ON_ONCE(atomic_read(&iop->write_count));
|
||||
WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
|
||||
WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
|
||||
WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
|
||||
PageUptodate(page));
|
||||
kfree(iop);
|
||||
}
|
||||
|
||||
@ -142,19 +148,11 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
|
||||
struct inode *inode = page->mapping->host;
|
||||
unsigned first = off >> inode->i_blkbits;
|
||||
unsigned last = (off + len - 1) >> inode->i_blkbits;
|
||||
bool uptodate = true;
|
||||
unsigned long flags;
|
||||
unsigned int i;
|
||||
|
||||
spin_lock_irqsave(&iop->uptodate_lock, flags);
|
||||
for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
|
||||
if (i >= first && i <= last)
|
||||
set_bit(i, iop->uptodate);
|
||||
else if (!test_bit(i, iop->uptodate))
|
||||
uptodate = false;
|
||||
}
|
||||
|
||||
if (uptodate)
|
||||
bitmap_set(iop->uptodate, first, last - first + 1);
|
||||
if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
|
||||
SetPageUptodate(page);
|
||||
spin_unlock_irqrestore(&iop->uptodate_lock, flags);
|
||||
}
|
||||
@ -171,13 +169,6 @@ iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
|
||||
static void
|
||||
iomap_read_finish(struct iomap_page *iop, struct page *page)
|
||||
{
|
||||
if (!iop || atomic_dec_and_test(&iop->read_count))
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
static void
|
||||
iomap_read_page_end_io(struct bio_vec *bvec, int error)
|
||||
{
|
||||
@ -191,7 +182,8 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error)
|
||||
iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
|
||||
}
|
||||
|
||||
iomap_read_finish(iop, page);
|
||||
if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending))
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -271,30 +263,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||
}
|
||||
|
||||
ctx->cur_page_in_bio = true;
|
||||
if (iop)
|
||||
atomic_add(plen, &iop->read_bytes_pending);
|
||||
|
||||
/*
|
||||
* Try to merge into a previous segment if we can.
|
||||
*/
|
||||
/* Try to merge into a previous segment if we can */
|
||||
sector = iomap_sector(iomap, pos);
|
||||
if (ctx->bio && bio_end_sector(ctx->bio) == sector)
|
||||
if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
|
||||
if (__bio_try_merge_page(ctx->bio, page, plen, poff,
|
||||
&same_page))
|
||||
goto done;
|
||||
is_contig = true;
|
||||
|
||||
if (is_contig &&
|
||||
__bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) {
|
||||
if (!same_page && iop)
|
||||
atomic_inc(&iop->read_count);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we start a new segment we need to increase the read count, and we
|
||||
* need to do so before submitting any previous full bio to make sure
|
||||
* that we don't prematurely unlock the page.
|
||||
*/
|
||||
if (iop)
|
||||
atomic_inc(&iop->read_count);
|
||||
|
||||
if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
|
||||
if (!is_contig || bio_full(ctx->bio, plen)) {
|
||||
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
|
||||
gfp_t orig_gfp = gfp;
|
||||
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
@ -571,13 +552,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
|
||||
{
|
||||
struct iomap_page *iop = iomap_page_create(inode, page);
|
||||
loff_t block_size = i_blocksize(inode);
|
||||
loff_t block_start = pos & ~(block_size - 1);
|
||||
loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
|
||||
loff_t block_start = round_down(pos, block_size);
|
||||
loff_t block_end = round_up(pos + len, block_size);
|
||||
unsigned from = offset_in_page(pos), to = from + len, poff, plen;
|
||||
int status;
|
||||
|
||||
if (PageUptodate(page))
|
||||
return 0;
|
||||
ClearPageError(page);
|
||||
|
||||
do {
|
||||
iomap_adjust_read_range(inode, iop, &block_start,
|
||||
@ -594,14 +575,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
|
||||
if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
|
||||
return -EIO;
|
||||
zero_user_segments(page, poff, from, to, poff + plen);
|
||||
iomap_set_range_uptodate(page, poff, plen);
|
||||
continue;
|
||||
} else {
|
||||
int status = iomap_read_page_sync(block_start, page,
|
||||
poff, plen, srcmap);
|
||||
if (status)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = iomap_read_page_sync(block_start, page, poff, plen,
|
||||
srcmap);
|
||||
if (status)
|
||||
return status;
|
||||
iomap_set_range_uptodate(page, poff, plen);
|
||||
} while ((block_start += plen) < block_end);
|
||||
|
||||
return 0;
|
||||
@ -685,9 +665,8 @@ iomap_set_page_dirty(struct page *page)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
|
||||
|
||||
static int
|
||||
__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
|
||||
unsigned copied, struct page *page)
|
||||
static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
|
||||
size_t copied, struct page *page)
|
||||
{
|
||||
flush_dcache_page(page);
|
||||
|
||||
@ -709,15 +688,15 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int
|
||||
iomap_write_end_inline(struct inode *inode, struct page *page,
|
||||
struct iomap *iomap, loff_t pos, unsigned copied)
|
||||
static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
|
||||
struct iomap *iomap, loff_t pos, size_t copied)
|
||||
{
|
||||
void *addr;
|
||||
|
||||
WARN_ON_ONCE(!PageUptodate(page));
|
||||
BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
|
||||
|
||||
flush_dcache_page(page);
|
||||
addr = kmap_atomic(page);
|
||||
memcpy(iomap->inline_data + pos, addr + pos, copied);
|
||||
kunmap_atomic(addr);
|
||||
@ -726,13 +705,14 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int
|
||||
iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, struct iomap *iomap, struct iomap *srcmap)
|
||||
/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
|
||||
static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
|
||||
size_t copied, struct page *page, struct iomap *iomap,
|
||||
struct iomap *srcmap)
|
||||
{
|
||||
const struct iomap_page_ops *page_ops = iomap->page_ops;
|
||||
loff_t old_size = inode->i_size;
|
||||
int ret;
|
||||
size_t ret;
|
||||
|
||||
if (srcmap->type == IOMAP_INLINE) {
|
||||
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
|
||||
@ -811,13 +791,8 @@ again:
|
||||
|
||||
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
|
||||
|
||||
flush_dcache_page(page);
|
||||
|
||||
status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
|
||||
copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
|
||||
srcmap);
|
||||
if (unlikely(status < 0))
|
||||
break;
|
||||
copied = status;
|
||||
|
||||
cond_resched();
|
||||
|
||||
@ -891,11 +866,8 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||
|
||||
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
|
||||
srcmap);
|
||||
if (unlikely(status <= 0)) {
|
||||
if (WARN_ON_ONCE(status == 0))
|
||||
return -EIO;
|
||||
return status;
|
||||
}
|
||||
if (WARN_ON_ONCE(status == 0))
|
||||
return -EIO;
|
||||
|
||||
cond_resched();
|
||||
|
||||
@ -928,11 +900,13 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_file_unshare);
|
||||
|
||||
static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
|
||||
unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
|
||||
static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
|
||||
struct iomap *iomap, struct iomap *srcmap)
|
||||
{
|
||||
struct page *page;
|
||||
int status;
|
||||
unsigned offset = offset_in_page(pos);
|
||||
unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
|
||||
|
||||
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
|
||||
if (status)
|
||||
@ -944,38 +918,33 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
|
||||
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
|
||||
}
|
||||
|
||||
static loff_t
|
||||
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
|
||||
void *data, struct iomap *iomap, struct iomap *srcmap)
|
||||
static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
|
||||
loff_t length, void *data, struct iomap *iomap,
|
||||
struct iomap *srcmap)
|
||||
{
|
||||
bool *did_zero = data;
|
||||
loff_t written = 0;
|
||||
int status;
|
||||
|
||||
/* already zeroed? we're done. */
|
||||
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||
return count;
|
||||
return length;
|
||||
|
||||
do {
|
||||
unsigned offset, bytes;
|
||||
|
||||
offset = offset_in_page(pos);
|
||||
bytes = min_t(loff_t, PAGE_SIZE - offset, count);
|
||||
s64 bytes;
|
||||
|
||||
if (IS_DAX(inode))
|
||||
status = dax_iomap_zero(pos, offset, bytes, iomap);
|
||||
bytes = dax_iomap_zero(pos, length, iomap);
|
||||
else
|
||||
status = iomap_zero(inode, pos, offset, bytes, iomap,
|
||||
srcmap);
|
||||
if (status < 0)
|
||||
return status;
|
||||
bytes = iomap_zero(inode, pos, length, iomap, srcmap);
|
||||
if (bytes < 0)
|
||||
return bytes;
|
||||
|
||||
pos += bytes;
|
||||
count -= bytes;
|
||||
length -= bytes;
|
||||
written += bytes;
|
||||
if (did_zero)
|
||||
*did_zero = true;
|
||||
} while (count > 0);
|
||||
} while (length > 0);
|
||||
|
||||
return written;
|
||||
}
|
||||
@ -1070,7 +1039,7 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
|
||||
|
||||
static void
|
||||
iomap_finish_page_writeback(struct inode *inode, struct page *page,
|
||||
int error)
|
||||
int error, unsigned int len)
|
||||
{
|
||||
struct iomap_page *iop = to_iomap_page(page);
|
||||
|
||||
@ -1079,10 +1048,10 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
|
||||
mapping_set_error(inode->i_mapping, -EIO);
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
|
||||
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) <= 0);
|
||||
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
|
||||
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
|
||||
|
||||
if (!iop || atomic_dec_and_test(&iop->write_count))
|
||||
if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
|
||||
end_page_writeback(page);
|
||||
}
|
||||
|
||||
@ -1116,7 +1085,8 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
||||
|
||||
/* walk each page on bio, ending page IO on them */
|
||||
bio_for_each_segment_all(bv, bio, iter_all)
|
||||
iomap_finish_page_writeback(inode, bv->bv_page, error);
|
||||
iomap_finish_page_writeback(inode, bv->bv_page, error,
|
||||
bv->bv_len);
|
||||
bio_put(bio);
|
||||
}
|
||||
/* The ioend has been freed by bio_put() */
|
||||
@ -1332,8 +1302,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
|
||||
|
||||
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
|
||||
&same_page);
|
||||
if (iop && !same_page)
|
||||
atomic_inc(&iop->write_count);
|
||||
if (iop)
|
||||
atomic_add(len, &iop->write_bytes_pending);
|
||||
|
||||
if (!merged) {
|
||||
if (bio_full(wpc->ioend->io_bio, len)) {
|
||||
@ -1375,8 +1345,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
|
||||
int error = 0, count = 0, i;
|
||||
LIST_HEAD(submit_list);
|
||||
|
||||
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
|
||||
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) != 0);
|
||||
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
|
||||
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
|
||||
|
||||
/*
|
||||
* Walk through the page to find areas to write back. If we run off the
|
||||
|
@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
|
||||
dio->submit.cookie = submit_bio(bio);
|
||||
}
|
||||
|
||||
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
{
|
||||
const struct iomap_dio_ops *dops = dio->dops;
|
||||
struct kiocb *iocb = dio->iocb;
|
||||
@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
* ->end_io() when necessary, otherwise a racing buffer read would cache
|
||||
* zeros from unwritten extents.
|
||||
*/
|
||||
if (!dio->error &&
|
||||
if (!dio->error && dio->size &&
|
||||
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
|
||||
int err;
|
||||
err = invalidate_inode_pages2_range(inode->i_mapping,
|
||||
@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
dio_warn_stale_pagecache(iocb->ki_filp);
|
||||
}
|
||||
|
||||
inode_dio_end(file_inode(iocb->ki_filp));
|
||||
/*
|
||||
* If this is a DSYNC write, make sure we push it to stable storage now
|
||||
* that we've written data.
|
||||
@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
|
||||
inode_dio_end(file_inode(iocb->ki_filp));
|
||||
kfree(dio);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_dio_complete);
|
||||
|
||||
static void iomap_dio_complete_work(struct work_struct *work)
|
||||
{
|
||||
@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
||||
return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
|
||||
case IOMAP_INLINE:
|
||||
return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
|
||||
case IOMAP_DELALLOC:
|
||||
/*
|
||||
* DIO is not serialised against mmap() access at all, and so
|
||||
* if the page_mkwrite occurs between the writeback and the
|
||||
* iomap_apply() call in the DIO path, then it will see the
|
||||
* DELALLOC block that the page-mkwrite allocated.
|
||||
*/
|
||||
pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n",
|
||||
dio->iocb->ki_filp, current->comm);
|
||||
return -EIO;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return -EIO;
|
||||
@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
||||
* Returns -ENOTBLK In case of a page invalidation invalidation failure for
|
||||
* writes. The callers needs to fall back to buffered I/O in this case.
|
||||
*/
|
||||
ssize_t
|
||||
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct iomap_dio *
|
||||
__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
bool wait_for_completion)
|
||||
{
|
||||
@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct iomap_dio *dio;
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
|
||||
return -EIO;
|
||||
return ERR_PTR(-EIO);
|
||||
|
||||
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
|
||||
if (!dio)
|
||||
return -ENOMEM;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
dio->iocb = iocb;
|
||||
atomic_set(&dio->ref, 1);
|
||||
@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
dio->wait_for_completion = wait_for_completion;
|
||||
if (!atomic_dec_and_test(&dio->ref)) {
|
||||
if (!wait_for_completion)
|
||||
return -EIOCBQUEUED;
|
||||
return ERR_PTR(-EIOCBQUEUED);
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
||||
return iomap_dio_complete(dio);
|
||||
return dio;
|
||||
|
||||
out_free_dio:
|
||||
kfree(dio);
|
||||
return ret;
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__iomap_dio_rw);
|
||||
|
||||
ssize_t
|
||||
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
bool wait_for_completion)
|
||||
{
|
||||
struct iomap_dio *dio;
|
||||
|
||||
dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
|
||||
if (IS_ERR_OR_NULL(dio))
|
||||
return PTR_ERR_OR_ZERO(dio);
|
||||
return iomap_dio_complete(dio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_dio_rw);
|
||||
|
@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct bio *bio = NULL;
|
||||
int block_offset;
|
||||
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
|
||||
int blocks_per_page = i_blocks_per_page(inode, page);
|
||||
sector_t page_start; /* address of page in fs blocks */
|
||||
sector_t pblock;
|
||||
int xlen;
|
||||
|
@ -544,7 +544,7 @@ xfs_discard_page(
|
||||
page, ip->i_ino, offset);
|
||||
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||
PAGE_SIZE / i_blocksize(inode));
|
||||
i_blocks_per_page(inode, page));
|
||||
if (error && !XFS_FORCED_SHUTDOWN(mp))
|
||||
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
|
||||
out_invalidate:
|
||||
|
@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
|
||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||
pgoff_t index);
|
||||
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
||||
struct iomap *iomap);
|
||||
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
|
||||
static inline bool dax_mapping(struct address_space *mapping)
|
||||
{
|
||||
return mapping->host && IS_DAX(mapping->host);
|
||||
|
@ -13,6 +13,7 @@
|
||||
struct address_space;
|
||||
struct fiemap_extent_info;
|
||||
struct inode;
|
||||
struct iomap_dio;
|
||||
struct iomap_writepage_ctx;
|
||||
struct iov_iter;
|
||||
struct kiocb;
|
||||
@ -258,6 +259,10 @@ struct iomap_dio_ops {
|
||||
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
bool wait_for_completion);
|
||||
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
bool wait_for_completion);
|
||||
ssize_t iomap_dio_complete(struct iomap_dio *dio);
|
||||
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
|
@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page,
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* i_blocks_per_page - How many blocks fit in this page.
|
||||
* @inode: The inode which contains the blocks.
|
||||
* @page: The page (head page if the page is a THP).
|
||||
*
|
||||
* If the block size is larger than the size of this page, return zero.
|
||||
*
|
||||
* Context: The caller should hold a refcount on the page to prevent it
|
||||
* from being split.
|
||||
* Return: The number of filesystem blocks covered by this page.
|
||||
*/
|
||||
static inline
|
||||
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
|
||||
{
|
||||
return thp_size(page) >> inode->i_blkbits;
|
||||
}
|
||||
#endif /* _LINUX_PAGEMAP_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user