Andrew Morton 3fe968f167 [PATCH] blkmtd: use clear_page_dirty()
SetPageDirty() and ClearPageDirty() are low-level thing which filesystems
shouldn't be using.  They bypass dirty page accounting.

Cc: David Woodhouse <dwmw2@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-12-12 08:57:41 -08:00

821 lines
20 KiB
C

/*
* $Id: blkmtd.c,v 1.27 2005/11/07 11:14:24 gleixner Exp $
*
* blkmtd.c - use a block device as a fake MTD
*
* Author: Simon Evans <spse@secret.org.uk>
*
* Copyright (C) 2001,2002 Simon Evans
*
* Licence: GPL
*
* How it works:
* The driver uses raw/io to read/write the device and the page
* cache to cache access. Writes update the page cache with the
* new data and mark it dirty and add the page into a BIO which
* is then written out.
*
* It can be loaded Read-Only to prevent erases and writes to the
* medium.
*
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/pagemap.h>
#include <linux/list.h>
#include <linux/init.h>
#include <linux/mtd/mtd.h>
#define err(format, arg...) printk(KERN_ERR "blkmtd: " format "\n" , ## arg)
#define info(format, arg...) printk(KERN_INFO "blkmtd: " format "\n" , ## arg)
#define warn(format, arg...) printk(KERN_WARNING "blkmtd: " format "\n" , ## arg)
#define crit(format, arg...) printk(KERN_CRIT "blkmtd: " format "\n" , ## arg)
/* Default erase size in K, always make it a multiple of PAGE_SIZE */
#define CONFIG_MTD_BLKDEV_ERASESIZE (128 << 10) /* 128KiB */
#define VERSION "$Revision: 1.27 $"
/* Info for the block device */
struct blkmtd_dev {
struct list_head list;
struct block_device *blkdev;
struct mtd_info mtd_info;
struct semaphore wrbuf_mutex;
};
/* Static info about the MTD, used in cleanup_module */
static LIST_HEAD(blkmtd_device_list);
static void blkmtd_sync(struct mtd_info *mtd);
#define MAX_DEVICES 4
/* Module parameters passed by insmod/modprobe */
static char *device[MAX_DEVICES]; /* the block device to use */
static int erasesz[MAX_DEVICES]; /* optional default erase size */
static int ro[MAX_DEVICES]; /* optional read only flag */
static int sync;
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Simon Evans <spse@secret.org.uk>");
MODULE_DESCRIPTION("Emulate an MTD using a block device");
module_param_array(device, charp, NULL, 0);
MODULE_PARM_DESC(device, "block device to use");
module_param_array(erasesz, int, NULL, 0);
MODULE_PARM_DESC(erasesz, "optional erase size to use in KiB. eg 4=4KiB.");
module_param_array(ro, bool, NULL, 0);
MODULE_PARM_DESC(ro, "1=Read only, writes and erases cause errors");
module_param(sync, bool, 0);
MODULE_PARM_DESC(sync, "1=Synchronous writes");
/* completion handler for BIO reads */
static int bi_read_complete(struct bio *bio, unsigned int bytes_done, int error)
{
if (bio->bi_size)
return 1;
complete((struct completion*)bio->bi_private);
return 0;
}
/* completion handler for BIO writes */
static int bi_write_complete(struct bio *bio, unsigned int bytes_done, int error)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
if (bio->bi_size)
return 1;
if(!uptodate)
err("bi_write_complete: not uptodate\n");
do {
struct page *page = bvec->bv_page;
DEBUG(3, "Cleaning up page %ld\n", page->index);
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
if (uptodate) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
clear_page_dirty(page);
unlock_page(page);
page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
complete((struct completion*)bio->bi_private);
return 0;
}
/* read one page from the block device */
static int blkmtd_readpage(struct blkmtd_dev *dev, struct page *page)
{
struct bio *bio;
struct completion event;
int err = -ENOMEM;
if(PageUptodate(page)) {
DEBUG(2, "blkmtd: readpage page %ld is already upto date\n", page->index);
unlock_page(page);
return 0;
}
ClearPageUptodate(page);
ClearPageError(page);
bio = bio_alloc(GFP_KERNEL, 1);
if(bio) {
init_completion(&event);
bio->bi_bdev = dev->blkdev;
bio->bi_sector = page->index << (PAGE_SHIFT-9);
bio->bi_private = &event;
bio->bi_end_io = bi_read_complete;
if(bio_add_page(bio, page, PAGE_SIZE, 0) == PAGE_SIZE) {
submit_bio(READ_SYNC, bio);
wait_for_completion(&event);
err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO;
bio_put(bio);
}
}
if(err)
SetPageError(page);
else
SetPageUptodate(page);
flush_dcache_page(page);
unlock_page(page);
return err;
}
/* write out the current BIO and wait for it to finish */
static int blkmtd_write_out(struct bio *bio)
{
struct completion event;
int err;
if(!bio->bi_vcnt) {
bio_put(bio);
return 0;
}
init_completion(&event);
bio->bi_private = &event;
bio->bi_end_io = bi_write_complete;
submit_bio(WRITE_SYNC, bio);
wait_for_completion(&event);
DEBUG(3, "submit_bio completed, bi_vcnt = %d\n", bio->bi_vcnt);
err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO;
bio_put(bio);
return err;
}
/**
* blkmtd_add_page - add a page to the current BIO
* @bio: bio to add to (NULL to alloc initial bio)
* @blkdev: block device
* @page: page to add
* @pagecnt: pages left to add
*
* Adds a page to the current bio, allocating it if necessary. If it cannot be
* added, the current bio is written out and a new one is allocated. Returns
* the new bio to add or NULL on error
*/
static struct bio *blkmtd_add_page(struct bio *bio, struct block_device *blkdev,
struct page *page, int pagecnt)
{
retry:
if(!bio) {
bio = bio_alloc(GFP_KERNEL, pagecnt);
if(!bio)
return NULL;
bio->bi_sector = page->index << (PAGE_SHIFT-9);
bio->bi_bdev = blkdev;
}
if(bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) {
blkmtd_write_out(bio);
bio = NULL;
goto retry;
}
return bio;
}
/**
* write_pages - write block of data to device via the page cache
* @dev: device to write to
* @buf: data source or NULL if erase (output is set to 0xff)
* @to: offset into output device
* @len: amount to data to write
* @retlen: amount of data written
*
* Grab pages from the page cache and fill them with the source data.
* Non page aligned start and end result in a readin of the page and
* part of the page being modified. Pages are added to the bio and then written
* out.
*/
static int write_pages(struct blkmtd_dev *dev, const u_char *buf, loff_t to,
size_t len, size_t *retlen)
{
int pagenr, offset;
size_t start_len = 0, end_len;
int pagecnt = 0;
int err = 0;
struct bio *bio = NULL;
size_t thislen = 0;
pagenr = to >> PAGE_SHIFT;
offset = to & ~PAGE_MASK;
DEBUG(2, "blkmtd: write_pages: buf = %p to = %ld len = %zd pagenr = %d offset = %d\n",
buf, (long)to, len, pagenr, offset);
/* see if we have to do a partial write at the start */
if(offset) {
start_len = ((offset + len) > PAGE_SIZE) ? PAGE_SIZE - offset : len;
len -= start_len;
}
/* calculate the length of the other two regions */
end_len = len & ~PAGE_MASK;
len -= end_len;
if(start_len)
pagecnt++;
if(len)
pagecnt += len >> PAGE_SHIFT;
if(end_len)
pagecnt++;
down(&dev->wrbuf_mutex);
DEBUG(3, "blkmtd: write: start_len = %zd len = %zd end_len = %zd pagecnt = %d\n",
start_len, len, end_len, pagecnt);
if(start_len) {
/* do partial start region */
struct page *page;
DEBUG(3, "blkmtd: write: doing partial start, page = %d len = %zd offset = %d\n",
pagenr, start_len, offset);
BUG_ON(!buf);
page = read_cache_page(dev->blkdev->bd_inode->i_mapping, pagenr, (filler_t *)blkmtd_readpage, dev);
lock_page(page);
if(PageDirty(page)) {
err("to = %lld start_len = %zd len = %zd end_len = %zd pagenr = %d\n",
to, start_len, len, end_len, pagenr);
BUG();
}
memcpy(page_address(page)+offset, buf, start_len);
set_page_dirty(page);
SetPageUptodate(page);
buf += start_len;
thislen = start_len;
bio = blkmtd_add_page(bio, dev->blkdev, page, pagecnt);
if(!bio) {
err = -ENOMEM;
err("bio_add_page failed\n");
goto write_err;
}
pagecnt--;
pagenr++;
}
/* Now do the main loop to a page aligned, n page sized output */
if(len) {
int pagesc = len >> PAGE_SHIFT;
DEBUG(3, "blkmtd: write: whole pages start = %d, count = %d\n",
pagenr, pagesc);
while(pagesc) {
struct page *page;
/* see if page is in the page cache */
DEBUG(3, "blkmtd: write: grabbing page %d from page cache\n", pagenr);
page = grab_cache_page(dev->blkdev->bd_inode->i_mapping, pagenr);
if(PageDirty(page)) {
BUG();
}
if(!page) {
warn("write: cannot grab cache page %d", pagenr);
err = -ENOMEM;
goto write_err;
}
if(!buf) {
memset(page_address(page), 0xff, PAGE_SIZE);
} else {
memcpy(page_address(page), buf, PAGE_SIZE);
buf += PAGE_SIZE;
}
bio = blkmtd_add_page(bio, dev->blkdev, page, pagecnt);
if(!bio) {
err = -ENOMEM;
err("bio_add_page failed\n");
goto write_err;
}
pagenr++;
pagecnt--;
set_page_dirty(page);
SetPageUptodate(page);
pagesc--;
thislen += PAGE_SIZE;
}
}
if(end_len) {
/* do the third region */
struct page *page;
DEBUG(3, "blkmtd: write: doing partial end, page = %d len = %zd\n",
pagenr, end_len);
BUG_ON(!buf);
page = read_cache_page(dev->blkdev->bd_inode->i_mapping, pagenr, (filler_t *)blkmtd_readpage, dev);
lock_page(page);
if(PageDirty(page)) {
err("to = %lld start_len = %zd len = %zd end_len = %zd pagenr = %d\n",
to, start_len, len, end_len, pagenr);
BUG();
}
memcpy(page_address(page), buf, end_len);
set_page_dirty(page);
SetPageUptodate(page);
DEBUG(3, "blkmtd: write: writing out partial end\n");
thislen += end_len;
bio = blkmtd_add_page(bio, dev->blkdev, page, pagecnt);
if(!bio) {
err = -ENOMEM;
err("bio_add_page failed\n");
goto write_err;
}
pagenr++;
}
DEBUG(3, "blkmtd: write: got %d vectors to write\n", bio->bi_vcnt);
write_err:
if(bio)
blkmtd_write_out(bio);
DEBUG(2, "blkmtd: write: end, retlen = %zd, err = %d\n", *retlen, err);
up(&dev->wrbuf_mutex);
if(retlen)
*retlen = thislen;
return err;
}
/* erase a specified part of the device */
static int blkmtd_erase(struct mtd_info *mtd, struct erase_info *instr)
{
struct blkmtd_dev *dev = mtd->priv;
struct mtd_erase_region_info *einfo = mtd->eraseregions;
int numregions = mtd->numeraseregions;
size_t from;
u_long len;
int err = -EIO;
size_t retlen;
instr->state = MTD_ERASING;
from = instr->addr;
len = instr->len;
/* check erase region has valid start and length */
DEBUG(2, "blkmtd: erase: dev = `%s' from = 0x%zx len = 0x%lx\n",
mtd->name+9, from, len);
while(numregions) {
DEBUG(3, "blkmtd: checking erase region = 0x%08X size = 0x%X num = 0x%x\n",
einfo->offset, einfo->erasesize, einfo->numblocks);
if(from >= einfo->offset
&& from < einfo->offset + (einfo->erasesize * einfo->numblocks)) {
if(len == einfo->erasesize
&& ( (from - einfo->offset) % einfo->erasesize == 0))
break;
}
numregions--;
einfo++;
}
if(!numregions) {
/* Not a valid erase block */
err("erase: invalid erase request 0x%lX @ 0x%08zX", len, from);
instr->state = MTD_ERASE_FAILED;
err = -EIO;
}
if(instr->state != MTD_ERASE_FAILED) {
/* do the erase */
DEBUG(3, "Doing erase from = %zd len = %ld\n", from, len);
err = write_pages(dev, NULL, from, len, &retlen);
if(err || retlen != len) {
err("erase failed err = %d", err);
instr->state = MTD_ERASE_FAILED;
} else {
instr->state = MTD_ERASE_DONE;
}
}
DEBUG(3, "blkmtd: erase: checking callback\n");
mtd_erase_callback(instr);
DEBUG(2, "blkmtd: erase: finished (err = %d)\n", err);
return err;
}
/* read a range of the data via the page cache */
static int blkmtd_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf)
{
struct blkmtd_dev *dev = mtd->priv;
int err = 0;
int offset;
int pagenr, pages;
size_t thislen = 0;
DEBUG(2, "blkmtd: read: dev = `%s' from = %lld len = %zd buf = %p\n",
mtd->name+9, from, len, buf);
if(from > mtd->size)
return -EINVAL;
if(from + len > mtd->size)
len = mtd->size - from;
pagenr = from >> PAGE_SHIFT;
offset = from - (pagenr << PAGE_SHIFT);
pages = (offset+len+PAGE_SIZE-1) >> PAGE_SHIFT;
DEBUG(3, "blkmtd: read: pagenr = %d offset = %d, pages = %d\n",
pagenr, offset, pages);
while(pages) {
struct page *page;
int cpylen;
DEBUG(3, "blkmtd: read: looking for page: %d\n", pagenr);
page = read_cache_page(dev->blkdev->bd_inode->i_mapping, pagenr, (filler_t *)blkmtd_readpage, dev);
if(IS_ERR(page)) {
err = -EIO;
goto readerr;
}
cpylen = (PAGE_SIZE > len) ? len : PAGE_SIZE;
if(offset+cpylen > PAGE_SIZE)
cpylen = PAGE_SIZE-offset;
memcpy(buf + thislen, page_address(page) + offset, cpylen);
offset = 0;
len -= cpylen;
thislen += cpylen;
pagenr++;
pages--;
if(!PageDirty(page))
page_cache_release(page);
}
readerr:
if(retlen)
*retlen = thislen;
DEBUG(2, "blkmtd: end read: retlen = %zd, err = %d\n", thislen, err);
return err;
}
/* write data to the underlying device */
static int blkmtd_write(struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const u_char *buf)
{
struct blkmtd_dev *dev = mtd->priv;
int err;
if(!len)
return 0;
DEBUG(2, "blkmtd: write: dev = `%s' to = %lld len = %zd buf = %p\n",
mtd->name+9, to, len, buf);
if(to >= mtd->size) {
return -ENOSPC;
}
if(to + len > mtd->size) {
len = mtd->size - to;
}
err = write_pages(dev, buf, to, len, retlen);
if(err > 0)
err = 0;
DEBUG(2, "blkmtd: write: end, err = %d\n", err);
return err;
}
/* sync the device - wait until the write queue is empty */
static void blkmtd_sync(struct mtd_info *mtd)
{
/* Currently all writes are synchronous */
}
static void free_device(struct blkmtd_dev *dev)
{
DEBUG(2, "blkmtd: free_device() dev = %p\n", dev);
if(dev) {
kfree(dev->mtd_info.eraseregions);
kfree(dev->mtd_info.name);
if(dev->blkdev) {
invalidate_inode_pages(dev->blkdev->bd_inode->i_mapping);
close_bdev_excl(dev->blkdev);
}
kfree(dev);
}
}
/* For a given size and initial erase size, calculate the number
* and size of each erase region. Goes round the loop twice,
* once to find out how many regions, then allocates space,
* then round the loop again to fill it in.
*/
static struct mtd_erase_region_info *calc_erase_regions(
size_t erase_size, size_t total_size, int *regions)
{
struct mtd_erase_region_info *info = NULL;
DEBUG(2, "calc_erase_regions, es = %zd size = %zd regions = %d\n",
erase_size, total_size, *regions);
/* Make any user specified erasesize be a power of 2
and at least PAGE_SIZE */
if(erase_size) {
int es = erase_size;
erase_size = 1;
while(es != 1) {
es >>= 1;
erase_size <<= 1;
}
if(erase_size < PAGE_SIZE)
erase_size = PAGE_SIZE;
} else {
erase_size = CONFIG_MTD_BLKDEV_ERASESIZE;
}
*regions = 0;
do {
int tot_size = total_size;
int er_size = erase_size;
int count = 0, offset = 0, regcnt = 0;
while(tot_size) {
count = tot_size / er_size;
if(count) {
tot_size = tot_size % er_size;
if(info) {
DEBUG(2, "adding to erase info off=%d er=%d cnt=%d\n",
offset, er_size, count);
(info+regcnt)->offset = offset;
(info+regcnt)->erasesize = er_size;
(info+regcnt)->numblocks = count;
(*regions)++;
}
regcnt++;
offset += (count * er_size);
}
while(er_size > tot_size)
er_size >>= 1;
}
if(info == NULL) {
info = kmalloc(regcnt * sizeof(struct mtd_erase_region_info), GFP_KERNEL);
if(!info)
break;
}
} while(!(*regions));
DEBUG(2, "calc_erase_regions done, es = %zd size = %zd regions = %d\n",
erase_size, total_size, *regions);
return info;
}
extern dev_t __init name_to_dev_t(const char *line);
static struct blkmtd_dev *add_device(char *devname, int readonly, int erase_size)
{
struct block_device *bdev;
int mode;
struct blkmtd_dev *dev;
if(!devname)
return NULL;
/* Get a handle on the device */
#ifdef MODULE
mode = (readonly) ? O_RDONLY : O_RDWR;
bdev = open_bdev_excl(devname, mode, NULL);
#else
mode = (readonly) ? FMODE_READ : FMODE_WRITE;
bdev = open_by_devnum(name_to_dev_t(devname), mode);
#endif
if(IS_ERR(bdev)) {
err("error: cannot open device %s", devname);
DEBUG(2, "blkmtd: opening bdev returned %ld\n", PTR_ERR(bdev));
return NULL;
}
DEBUG(1, "blkmtd: found a block device major = %d, minor = %d\n",
MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
if(MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
err("attempting to use an MTD device as a block device");
blkdev_put(bdev);
return NULL;
}
dev = kmalloc(sizeof(struct blkmtd_dev), GFP_KERNEL);
if(dev == NULL) {
blkdev_put(bdev);
return NULL;
}
memset(dev, 0, sizeof(struct blkmtd_dev));
dev->blkdev = bdev;
if(!readonly) {
init_MUTEX(&dev->wrbuf_mutex);
}
dev->mtd_info.size = dev->blkdev->bd_inode->i_size & PAGE_MASK;
/* Setup the MTD structure */
/* make the name contain the block device in */
dev->mtd_info.name = kmalloc(sizeof("blkmtd: ") + strlen(devname), GFP_KERNEL);
if(dev->mtd_info.name == NULL)
goto devinit_err;
sprintf(dev->mtd_info.name, "blkmtd: %s", devname);
dev->mtd_info.eraseregions = calc_erase_regions(erase_size, dev->mtd_info.size,
&dev->mtd_info.numeraseregions);
if(dev->mtd_info.eraseregions == NULL)
goto devinit_err;
dev->mtd_info.erasesize = dev->mtd_info.eraseregions->erasesize;
DEBUG(1, "blkmtd: init: found %d erase regions\n",
dev->mtd_info.numeraseregions);
if(readonly) {
dev->mtd_info.type = MTD_ROM;
dev->mtd_info.flags = MTD_CAP_ROM;
} else {
dev->mtd_info.type = MTD_RAM;
dev->mtd_info.flags = MTD_CAP_RAM;
dev->mtd_info.erase = blkmtd_erase;
dev->mtd_info.write = blkmtd_write;
dev->mtd_info.writev = default_mtd_writev;
dev->mtd_info.sync = blkmtd_sync;
}
dev->mtd_info.read = blkmtd_read;
dev->mtd_info.readv = default_mtd_readv;
dev->mtd_info.priv = dev;
dev->mtd_info.owner = THIS_MODULE;
list_add(&dev->list, &blkmtd_device_list);
if (add_mtd_device(&dev->mtd_info)) {
/* Device didnt get added, so free the entry */
list_del(&dev->list);
goto devinit_err;
} else {
info("mtd%d: [%s] erase_size = %dKiB %s",
dev->mtd_info.index, dev->mtd_info.name + strlen("blkmtd: "),
dev->mtd_info.erasesize >> 10,
readonly ? "(read-only)" : "");
}
return dev;
devinit_err:
free_device(dev);
return NULL;
}
/* Cleanup and exit - sync the device and kill of the kernel thread */
static void __devexit cleanup_blkmtd(void)
{
struct list_head *temp1, *temp2;
/* Remove the MTD devices */
list_for_each_safe(temp1, temp2, &blkmtd_device_list) {
struct blkmtd_dev *dev = list_entry(temp1, struct blkmtd_dev,
list);
blkmtd_sync(&dev->mtd_info);
del_mtd_device(&dev->mtd_info);
info("mtd%d: [%s] removed", dev->mtd_info.index,
dev->mtd_info.name + strlen("blkmtd: "));
list_del(&dev->list);
free_device(dev);
}
}
#ifndef MODULE
/* Handle kernel boot params */
static int __init param_blkmtd_device(char *str)
{
int i;
for(i = 0; i < MAX_DEVICES; i++) {
device[i] = str;
DEBUG(2, "blkmtd: device setup: %d = %s\n", i, device[i]);
strsep(&str, ",");
}
return 1;
}
static int __init param_blkmtd_erasesz(char *str)
{
int i;
for(i = 0; i < MAX_DEVICES; i++) {
char *val = strsep(&str, ",");
if(val)
erasesz[i] = simple_strtoul(val, NULL, 0);
DEBUG(2, "blkmtd: erasesz setup: %d = %d\n", i, erasesz[i]);
}
return 1;
}
static int __init param_blkmtd_ro(char *str)
{
int i;
for(i = 0; i < MAX_DEVICES; i++) {
char *val = strsep(&str, ",");
if(val)
ro[i] = simple_strtoul(val, NULL, 0);
DEBUG(2, "blkmtd: ro setup: %d = %d\n", i, ro[i]);
}
return 1;
}
static int __init param_blkmtd_sync(char *str)
{
if(str[0] == '1')
sync = 1;
return 1;
}
__setup("blkmtd_device=", param_blkmtd_device);
__setup("blkmtd_erasesz=", param_blkmtd_erasesz);
__setup("blkmtd_ro=", param_blkmtd_ro);
__setup("blkmtd_sync=", param_blkmtd_sync);
#endif
/* Startup */
static int __init init_blkmtd(void)
{
int i;
info("version " VERSION);
/* Check args - device[0] is the bare minimum*/
if(!device[0]) {
err("error: missing `device' name\n");
return -EINVAL;
}
for(i = 0; i < MAX_DEVICES; i++)
add_device(device[i], ro[i], erasesz[i] << 10);
if(list_empty(&blkmtd_device_list))
return -EINVAL;
return 0;
}
module_init(init_blkmtd);
module_exit(cleanup_blkmtd);