pmem: Add functions for reading/writing page to/from pmem

This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write().
pmem_do_write() will be used by pmem zero_page_range() as well. Hence
sharing the same code.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Link: https://lore.kernel.org/r/20200228163456.1587-2-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
Vivek Goyal 2020-02-28 11:34:51 -05:00 committed by Dan Williams
parent 338f6dac85
commit 5d64efe797

View File

@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
return BLK_STS_OK; return BLK_STS_OK;
} }
static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, static blk_status_t pmem_do_read(struct pmem_device *pmem,
unsigned int len, unsigned int off, unsigned int op, struct page *page, unsigned int page_off,
sector_t sector) sector_t sector, unsigned int len)
{
blk_status_t rc;
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
return BLK_STS_IOERR;
rc = read_pmem(page, page_off, pmem_addr, len);
flush_dcache_page(page);
return rc;
}
static blk_status_t pmem_do_write(struct pmem_device *pmem,
struct page *page, unsigned int page_off,
sector_t sector, unsigned int len)
{ {
blk_status_t rc = BLK_STS_OK; blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false; bool bad_pmem = false;
@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true; bad_pmem = true;
if (!op_is_write(op)) { /*
if (unlikely(bad_pmem)) * Note that we write the data both before and after
rc = BLK_STS_IOERR; * clearing poison. The write before clear poison
else { * handles situations where the latest written data is
rc = read_pmem(page, off, pmem_addr, len); * preserved and the clear poison operation simply marks
flush_dcache_page(page); * the address range as valid without changing the data.
} * In this case application software can assume that an
} else { * interrupted write will either return the new good
/* * data or an error.
* Note that we write the data both before and after *
* clearing poison. The write before clear poison * However, if pmem_clear_poison() leaves the data in an
* handles situations where the latest written data is * indeterminate state we need to perform the write
* preserved and the clear poison operation simply marks * after clear poison.
* the address range as valid without changing the data. */
* In this case application software can assume that an flush_dcache_page(page);
* interrupted write will either return the new good write_pmem(pmem_addr, page, page_off, len);
* data or an error. if (unlikely(bad_pmem)) {
* rc = pmem_clear_poison(pmem, pmem_off, len);
* However, if pmem_clear_poison() leaves the data in an write_pmem(pmem_addr, page, page_off, len);
* indeterminate state we need to perform the write
* after clear poison.
*/
flush_dcache_page(page);
write_pmem(pmem_addr, page, off, len);
if (unlikely(bad_pmem)) {
rc = pmem_clear_poison(pmem, pmem_off, len);
write_pmem(pmem_addr, page, off, len);
}
} }
return rc; return rc;
@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
do_acct = nd_iostat_start(bio, &start); do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) { bio_for_each_segment(bvec, bio, iter) {
rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, if (op_is_write(bio_op(bio)))
bvec.bv_offset, bio_op(bio), iter.bi_sector); rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
iter.bi_sector, bvec.bv_len);
else
rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
iter.bi_sector, bvec.bv_len);
if (rc) { if (rc) {
bio->bi_status = rc; bio->bi_status = rc;
break; break;
@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct pmem_device *pmem = bdev->bd_queue->queuedata; struct pmem_device *pmem = bdev->bd_queue->queuedata;
blk_status_t rc; blk_status_t rc;
rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, if (op_is_write(op))
0, op, sector); rc = pmem_do_write(pmem, page, 0, sector,
hpage_nr_pages(page) * PAGE_SIZE);
else
rc = pmem_do_read(pmem, page, 0, sector,
hpage_nr_pages(page) * PAGE_SIZE);
/* /*
* The ->rw_page interface is subtle and tricky. The core * The ->rw_page interface is subtle and tricky. The core
* retries on any error, so we can only invoke page_endio() in * retries on any error, so we can only invoke page_endio() in