diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm-fs.c linux-dm/drivers/md/dm-fs.c --- linux/drivers/md/dm-fs.c Thu Jan 1 01:00:00 1970 +++ linux-dm/drivers/md/dm-fs.c Wed Aug 29 11:02:20 2001 @@ -0,0 +1,341 @@ +/* + * dm.c + * + * Copyright (C) 2001 Sistina Software + * + * This software is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * procfs and devfs handling for device mapper + * + * Changelog + * + * 16/08/2001 - First version [Joe Thornber] + */ + +#include "dm.h" + +#include +#include + +/* + * /dev/device-mapper/control is the control char device used to + * create/destroy mapping devices. + * + * When a mapping device called is created it appears as + * /dev/device-mapper/. In addition the interface to control the + * mapping will appear in /proc/device-mapper/. + */ + +const char *_fs_dir = "device-mapper"; +const char *_control_name = "control"; + +static struct proc_dir_entry *_proc_dir; +static struct proc_dir_entry *_control; + +static devfs_handle_t _dev_dir; + +static int line_splitter(struct file *file, const char *buffer, + unsigned long count, void *data); +static int process_control(const char *b, const char *e, int minor); +static int process_table(const char *b, const char *e, int minor); +static int get_word(const char *b, const char *e, + const char **wb, const char **we); +static int tok_cmp(const char *str, const char *b, const char *e); +static void tok_cpy(char *dest, size_t max, + const char *b, const char *e); + +typedef int (*process_fn)(const char *b, const char *e, int minor); + +struct pf_data { + process_fn fn; + int minor; +}; + +int dm_init_fs(void) +{ + struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL); + + if (!pfd) + return 0; + + _dev_dir = devfs_mk_dir(0, _fs_dir, NULL); + + if (!(_proc_dir = create_proc_entry(_fs_dir, S_IFDIR, &proc_root))) + goto fail; + + if (!(_control = create_proc_entry(_control_name, S_IWUSR, _proc_dir))) + goto fail; + + _control->write_proc = line_splitter; + + pfd->fn = process_control; + pfd->minor = -1; + _control->data = pfd; + + return 0; + + fail: + dm_fin_fs(); + return -ENOMEM; +} + +void dm_fin_fs(void) +{ + if (_control) { + remove_proc_entry(_control_name, _proc_dir); + _control = 0; + } + + if (_proc_dir) { + remove_proc_entry(_fs_dir, &proc_root); + _proc_dir = 0; + } + + if (_dev_dir) + devfs_unregister(_dev_dir); +} + +int dm_fs_add(struct mapped_device *md) +{ + struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL); + + if (!pfd) + return -ENOMEM; + + pfd->fn = process_table; + pfd->minor = MINOR(md->dev); + + if (!(md->pde = create_proc_entry(md->name, S_IRUGO | S_IWUSR, + _proc_dir))) { + kfree(pfd); + return -ENOMEM; + } + + md->pde->write_proc = line_splitter; + md->pde->data = pfd; + + md->devfs_entry = + devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER, + MAJOR(md->dev), MINOR(md->dev), + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, + &dm_blk_dops, NULL); + + if (!md->devfs_entry) { + kfree(pfd); + remove_proc_entry(md->name, _proc_dir); + md->pde = 0; + return -ENOMEM; + } + + return 0; +} + +int dm_fs_remove(struct mapped_device *md) +{ + if (md->pde) { + kfree(md->pde->data); + remove_proc_entry(md->name, _proc_dir); + md->pde = 0; + } + + devfs_unregister(md->devfs_entry); + md->devfs_entry = 0; + return 0; +} + +static int process_control(const char *b, const char *e, int minor) +{ + const char *wb, *we; + char name[64]; + int create = 0; + + /* + * create [minor] + * remove + */ + if (get_word(b, e, &wb, &we)) + return -EINVAL; + b = we; + + if (!tok_cmp("create", wb, we)) + create = 1; + + else if (tok_cmp("remove", wb, we)) + return -EINVAL; + + if (get_word(b, e, &wb, &we)) + return -EINVAL; + b = we; + + tok_cpy(name, sizeof(name), wb, we); + + if (!create) + return dm_remove(name); + + else { + if (!get_word(b, e, &wb, &we)) { + minor = simple_strtol(wb, (char **) &we, 10); + + if (we == wb) + return -EINVAL; + } + + return dm_create(name, minor); + } + + return -EINVAL; +} + +static int process_table(const char *b, const char *e, int minor) +{ + const char *wb, *we; + struct mapped_device *md = dm_find_by_minor(minor); + void *context; + int r; + + if (!md) + return -ENXIO; + + if (get_word(b, e, &wb, &we)) + return -EINVAL; + + if (!tok_cmp("begin", b, e)) { + /* suspend the device if it's active */ + dm_suspend(md); + + /* start loading a table */ + dm_table_start(md); + + } else if (!tok_cmp("end", b, e)) { + /* activate the device ... ... */ + dm_table_complete(md); + dm_activate(md); + + } else { + /* add the new entry */ + char target[64]; + struct target *t; + offset_t start, size, high; + size_t len; + + if (get_number(&b, e, &start)) + return -EINVAL; + + if (get_number(&b, e, &size)) + return -EINVAL; + + if (get_word(b, e, &wb, &we)) + return -EINVAL; + + len = we - wb; + if (len > sizeof(target)) + return -EINVAL; + + strncpy(target, wb, len); + target[len] = '\0'; + + if (!(t = dm_get_target(target))) + return -EINVAL; + + /* check there isn't a gap */ + if ((md->num_targets && + start != md->highs[md->num_targets - 1] + 1) || + (!md->num_targets && start)) { + WARN("gap in target ranges"); + return -EINVAL; + } + + high = start + (size - 1); + if ((r = t->ctr(start, high, md, we, e, &context))) + return r; + + if ((r = dm_table_add_entry(md, high, t->map, context))) + return r; + } + + return 0; +} + +static int get_word(const char *b, const char *e, + const char **wb, const char **we) +{ + b = eat_space(b, e); + + if (b == e) + return -EINVAL; + + *wb = b; + while(b != e && !isspace((int) *b)) + b++; + *we = b; + return 0; +} + +static int line_splitter(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int r; + const char *b = buffer, *e = buffer + count, *lb; + struct pf_data *pfd = (struct pf_data *) data; + + while(b < e) { + b = eat_space(b, e); + if (b == e) + break; + + lb = b; + while((b != e) && *b != '\n') + b++; + + if ((r = pfd->fn(lb, b, pfd->minor))) + return r; + } + + return count; +} + +static int tok_cmp(const char *str, const char *b, const char *e) +{ + while (*str && b != e) { + if (*str < *b) + return -1; + + if (*str > *b) + return 1; + + str++, b++; + } + + if (!*str && b == e) + return 0; + + if (*str) + return 1; + + return -1; +} + +static void tok_cpy(char *dest, size_t max, + const char *b, const char *e) +{ + size_t len = e - b; + if (len > --max) + len = max; + strncpy(dest, b, len); + dest[len] = '\0'; +} diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm-table.c linux-dm/drivers/md/dm-table.c --- linux/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 +++ linux-dm/drivers/md/dm-table.c Wed Aug 29 11:03:08 2001 @@ -0,0 +1,178 @@ +/* + * dm-table.c + * + * Copyright (C) 2001 Sistina Software + * + * This software is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Changelog + * + * 16/08/2001 - First version [Joe Thornber] + */ + +#include "dm.h" + +static int alloc_targets(struct mapped_device *md, int num); + +static inline ulong round_up(ulong n, ulong size) +{ + ulong r = n % size; + return n + (r ? (size - r) : 0); +} + +static inline ulong div_up(ulong n, ulong size) +{ + return round_up(n, size) / size; +} + +static offset_t high(struct mapped_device *md, int l, int n) +{ + while (1) { + if (n >= md->counts[l]) + return (offset_t) -1; + + if (l == md->depth - 1) + return md->index[l][((n + 1) * KEYS_PER_NODE) - 1]; + + l++; + n = (n + 1) * (KEYS_PER_NODE + 1) - 1; + } +} + +static int setup_btree_index(int l, struct mapped_device *md) +{ + int n, c, cn; + + for (n = 0, cn = 0; n < md->counts[l]; n++) { + offset_t *k = md->index[l] + (n * KEYS_PER_NODE); + + for (c = 0; c < KEYS_PER_NODE; c++) + k[c] = high(md, l + 1, cn++); + cn++; + } + + return 0; +} + +void dm_free_table(struct mapped_device *md) +{ + int i; + for (i = 0; i < md->depth; i++) { + vfree(md->index[i]); + md->index[i] = 0; + } + + vfree(md->targets); + + md->highs = 0; + md->targets = 0; + + md->num_targets = 0; + md->num_allocated = 0; +} + +int dm_table_start(struct mapped_device *md) +{ + int r; + set_bit(DM_LOADING, &md->state); + + dm_free_table(md); + if ((r = alloc_targets(md, 64))) + return r; + + return 0; +} + +int dm_table_add_entry(struct mapped_device *md, offset_t high, + dm_map_fn target, void *context) +{ + if (md->num_targets >= md->num_targets && + alloc_targets(md, md->num_allocated * 2)) + return -ENOMEM; + + md->highs[md->num_targets] = high; + md->targets[md->num_targets].map = target; + md->targets[md->num_targets].private = context; + + md->num_targets++; + return 0; +} + +int dm_table_complete(struct mapped_device *md) +{ + int n, i; + + clear_bit(DM_LOADING, &md->state); + + /* how many indexes will the btree have ? */ + for (n = div_up(md->num_targets, KEYS_PER_NODE), i = 1; n != 1; i++) + n = div_up(n, KEYS_PER_NODE + 1); + + md->depth = i; + md->counts[md->depth - 1] = div_up(md->num_targets, KEYS_PER_NODE); + + while (--i) + md->counts[i - 1] = div_up(md->counts[i], KEYS_PER_NODE + 1); + + for (i = 0; i < md->depth; i++) { + size_t s = NODE_SIZE * md->counts[i]; + md->index[i] = vmalloc(s); + memset(md->index[i], -1, s); + } + + /* bottom layer is easy */ + md->index[md->depth - 1] = md->highs; + + /* fill in higher levels */ + for (i = md->depth - 1; i; i--) + setup_btree_index(i - 1, md); + + set_bit(DM_LOADED, &md->state); + return 0; +} + +static int alloc_targets(struct mapped_device *md, int num) +{ + offset_t *n_highs; + struct target_instance *n_targets; + + if (!(n_highs = vmalloc(sizeof(*n_highs) * num))) + return -ENOMEM; + + if (!(n_targets = vmalloc(sizeof(*n_targets) * num))) { + vfree(n_highs); + return -ENOMEM; + } + + if (md->num_targets) { + memcpy(n_highs, md->highs, + sizeof(*n_highs) * md->num_targets); + + memcpy(n_targets, md->targets, + sizeof(*n_targets) * md->num_targets); + } + + vfree(md->highs); + vfree(md->targets); + + md->num_allocated = num; + md->highs = n_highs; + md->targets = n_targets; + + return 0; +} diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm-target.c linux-dm/drivers/md/dm-target.c --- linux/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 +++ linux-dm/drivers/md/dm-target.c Wed Aug 29 10:56:38 2001 @@ -0,0 +1,176 @@ +/* + * dm-target.c + * + * Copyright (C) 2001 Sistina Software + * + * This software is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * 16/08/2001 - First Version [Joe Thornber] + */ + +#include "dm.h" + +static struct target *_targets; +static spinlock_t _lock = SPIN_LOCK_UNLOCKED; + +struct target *__get_target(const char *name) +{ + struct target *t; + for (t = _targets; t && strcmp(t->name, name); t = t->next) + ; + return t; +} + +struct target *dm_get_target(const char *name) +{ + struct target *t; + + spin_lock(&_lock); + t = __get_target(name); + spin_unlock(&_lock); + + return t; +} + +int register_map_target(const char *name, dm_ctr_fn ctr, + dm_dtr_fn dtr, dm_map_fn map) +{ + struct target *t = kmalloc(sizeof(*t) + strlen(name) + 1, GFP_KERNEL); + + if (!t) + return -ENOMEM; + + spin_lock(&_lock); + if (__get_target(name)) { + WARN("mapper(%s) already registered\n", name); + spin_unlock(&_lock); + return -1; /* FIXME: what's a good return value ? */ + } + + t->name = (char *) (t + 1); + strcpy(t->name, name); + + t->ctr = ctr; + t->dtr = dtr; + t->map = map; + + t->next = _targets; + _targets = t; + + spin_unlock(&_lock); + return 0; +} + + +/* + * now for a couple of simple targets: + * + * 'io-err' target always fails an io, useful for bringing up LV's + * that have holes in them. + * + * 'linear' target maps a linear range of a device + */ +static int io_err_ctr(offset_t b, offset_t e, struct mapped_device *md, + const char *cb, const char *ce, void **result) +{ + /* this takes no arguments */ + *result = 0; + return 0; +} + +static void io_err_dtr(void *c) +{ + /* empty */ +} + +static int io_err_map(struct buffer_head *bh, void *context) +{ + buffer_IO_error(bh); + return 0; +} + + +struct linear_c { + kdev_t dev; + int offset; /* FIXME: we need a signed offset type */ +}; + +static int linear_ctr(offset_t low, offset_t high, struct mapped_device *md, + const char *cb, const char *ce, void **result) +{ + /* context string should be of the form: + * + */ + struct linear_c *lc; + unsigned int major, minor, start; + int r; + + if ((r = get_number(&cb, ce, &major))) + return r; + + if ((r = get_number(&cb, ce, &minor))) + return r; + + if ((r = get_number(&cb, ce, &start))) + return r; + + if (!(lc = kmalloc(sizeof(lc), GFP_KERNEL))) { + WARN("couldn't allocate memory for linear context\n"); + return -EINVAL; + } + + lc->dev = MKDEV((int) major, (int) minor); + lc->offset = (int) start - (int) low; + + if ((r = dm_add_device(md, lc->dev))) { + kfree(lc); + return r; + } + + *result = lc; + return 0; +} + +static void linear_dtr(void *c) +{ + kfree(c); +} + +static int linear_map(struct buffer_head *bh, void *context) +{ + struct linear_c *lc = (struct linear_c *) context; + + bh->b_rdev = lc->dev; + bh->b_rsector = bh->b_rsector + lc->offset; + return 1; +} + +int dm_std_targets(void) +{ + int ret; + +#define xx(n, fn) \ + if ((ret = register_map_target(n, \ + fn ## _ctr, fn ## _dtr, fn ## _map) < 0)) return ret + + xx("io-err", io_err); + xx("linear", linear); +#undef xx + + return 0; +} diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm.c linux-dm/drivers/md/dm.c --- linux/drivers/md/dm.c Thu Jan 1 01:00:00 1970 +++ linux-dm/drivers/md/dm.c Thu Aug 30 14:03:06 2001 @@ -0,0 +1,684 @@ +/* + * device-mapper.c + * + * Copyright (C) 2001 Sistina Software + * + * This software is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Changelog + * + * 14/08/2001 - First Version [Joe Thornber] + */ + +#include "dm.h" + +/* defines for blk.h */ +#define MAJOR_NR DM_BLK_MAJOR +#define DEVICE_NR(device) MINOR(device) /* has no partition bits */ +#define DEVICE_NAME "device-mapper" /* name for messaging */ +#define DEVICE_NO_RANDOM /* no entropy to contribute */ +#define DEVICE_OFF(d) /* do-nothing */ + +#include + +#define MAX_DEVICES 64 +#define DEFAULT_READ_AHEAD 64 + +const char *_name = "device-mapper"; +int _version[3] = {0, 1, 0}; + +struct io_hook { + struct mapped_device *md; + void (*end_io)(struct buffer_head *bh, int uptodate); + void *context; +}; + +#define rl down_read(&_dev_lock) +#define ru up_read(&_dev_lock) +#define wl down_write(&_dev_lock) +#define wu up_write(&_dev_lock) + +struct rw_semaphore _dev_lock; +static struct mapped_device *_devs[MAX_DEVICES]; + +/* block device arrays */ +static int _block_size[MAX_DEVICES]; +static int _blksize_size[MAX_DEVICES]; +static int _hardsect_size[MAX_DEVICES]; + +static int blk_open(struct inode *inode, struct file *file); +static int blk_close(struct inode *inode, struct file *file); +static int blk_ioctl(struct inode *inode, struct file *file, + uint command, ulong a); + +struct block_device_operations dm_blk_dops = { + open: blk_open, + release: blk_close, + ioctl: blk_ioctl +}; + +static int request(request_queue_t *q, int rw, struct buffer_head *bh); + +/* + * setup and teardown the driver + */ +static int init(void) +{ + int ret; + + init_rwsem(&_dev_lock); + + if ((ret = dm_init_fs())) + return ret; + + if (dm_std_targets()) + return -EIO; /* FIXME: better error value */ + + /* set up the arrays */ + read_ahead[MAJOR_NR] = DEFAULT_READ_AHEAD; + blk_size[MAJOR_NR] = _block_size; + blksize_size[MAJOR_NR] = _blksize_size; + hardsect_size[MAJOR_NR] = _hardsect_size; + + if (devfs_register_blkdev(MAJOR_NR, _name, &dm_blk_dops) < 0) { + printk(KERN_ERR "%s -- register_blkdev failed\n", _name); + return -EIO; + } + + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), request); + + printk(KERN_INFO "%s %d.%d.%d initialised\n", _name, + _version[0], _version[1], _version[2]); + return 0; +} + +static void fin(void) +{ + dm_fin_fs(); + + if (devfs_unregister_blkdev(MAJOR_NR, _name) < 0) + printk(KERN_ERR "%s -- unregister_blkdev failed\n", _name); + + read_ahead[MAJOR_NR] = 0; + blk_size[MAJOR_NR] = 0; + blksize_size[MAJOR_NR] = 0; + hardsect_size[MAJOR_NR] = 0; + + printk(KERN_INFO "%s %d.%d.%d cleaned up\n", _name, + _version[0], _version[1], _version[2]); +} + +/* + * block device functions + */ +static int blk_open(struct inode *inode, struct file *file) +{ + int minor = MINOR(inode->i_rdev); + struct mapped_device *md; + + if (minor >= MAX_DEVICES) + return -ENXIO; + + wl; + md = _devs[minor]; + + if (!md || !is_active(md)) { + wu; + return -ENXIO; + } + + md->use_count++; + wu; + + MOD_INC_USE_COUNT; + return 0; +} + +static int blk_close(struct inode *inode, struct file *file) +{ + int minor = MINOR(inode->i_rdev); + struct mapped_device *md; + + if (minor >= MAX_DEVICES) + return -ENXIO; + + wl; + md = _devs[minor]; + if (!md || md->use_count < 1) { + WARN("reference count in mapped_device incorrect"); + wu; + return -ENXIO; + } + + md->use_count--; + wu; + + MOD_DEC_USE_COUNT; + return 0; +} + +static int blk_ioctl(struct inode *inode, struct file *file, + uint command, ulong a) +{ + /* FIXME: check in the latest Rubini that all expected ioctl's + are supported */ + + int minor = MINOR(inode->i_rdev); + long size; + + switch (command) { + case BLKGETSIZE: + size = _block_size[minor] * 1024 / _hardsect_size[minor]; + if (copy_to_user((void *) a, &size, sizeof(long))) + return -EFAULT; + break; + + case BLKFLSBUF: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + fsync_dev(inode->i_rdev); + invalidate_buffers(inode->i_rdev); + return 0; + + case BLKRAGET: + if (copy_to_user((void *) a, &read_ahead[MAJOR(inode->i_rdev)], + sizeof(long))) + return -EFAULT; + return 0; + + case BLKRASET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + read_ahead[MAJOR(inode->i_rdev)] = a; + return 0; + + case BLKRRPART: + return -EINVAL; + + default: + printk(KERN_WARNING "%s - unknown block ioctl %d", + _name, command); + return -EINVAL; + } + + return 0; +} + +/* FIXME: These should have their own slab */ +inline static struct io_hook *alloc_io_hook(void) +{ + return kmalloc(sizeof(struct io_hook), GFP_NOIO); +} + +inline static void free_io_hook(struct io_hook *ih) +{ + kfree(ih); +} + +inline static struct deferred_io *alloc_deferred(void) +{ + return kmalloc(sizeof(struct deferred_io), GFP_NOIO); +} + +inline static void free_deferred(struct deferred_io *di) +{ + kfree(di); +} + +static void dec_pending(struct buffer_head *bh, int uptodate) +{ + struct io_hook *ih = bh->b_private; + + if (atomic_dec_and_test(&ih->md->pending)) + /* nudge anyone waiting on suspend queue */ + wake_up_interruptible(&ih->md->wait); + + bh->b_end_io = ih->end_io; + bh->b_private = ih->context; + free_io_hook(ih); + + bh->b_end_io(bh, uptodate); +} + +static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw) +{ + struct deferred_io *di = alloc_deferred(); + + if (!di) + return -ENOMEM; + + wl; + if (test_bit(DM_ACTIVE, &md->state)) { + wu; + return 0; + } + + di->bh = bh; + di->rw = rw; + di->next = md->deferred; + md->deferred = di; + wu; + + return 1; +} + + +inline static int __map_buffer(struct mapped_device *md, + struct buffer_head *bh, int node) +{ + dm_map_fn fn; + void *context; + struct io_hook *ih = 0; + int r; + struct target_instance *ti = md->targets + node; + + fn = ti->map; + context = ti->private; + + if (!fn) + return 0; + + ih = alloc_io_hook(); + + if (!ih) + return 0; + + ih->md = md; + ih->end_io = bh->b_end_io; + ih->context = bh->b_private; + + r = fn(bh, context); + + if (r > 0) { + /* hook the end io request fn */ + atomic_inc(&md->pending); + bh->b_end_io = dec_pending; + bh->b_private = ih; + + } else if (r == 0) + /* we don't need to hook */ + free_io_hook(ih); + + else if (r < 0) { + free_io_hook(ih); + return 0; + } + + return 1; +} + +inline static int __find_node(struct mapped_device *md, struct buffer_head *bh) +{ + int i = 0, l, r = 0; + offset_t *node; + + /* search the btree for the correct target */ + for (l = 0; l < md->depth; l++) { + r = ((KEYS_PER_NODE + 1) * r) + i; + node = md->index[l] + (r * KEYS_PER_NODE); + + for (i = 0; i < KEYS_PER_NODE; i++) + if (node[i] >= bh->b_rsector) + break; + } + + return (KEYS_PER_NODE * r) + i; +} + +static int request(request_queue_t *q, int rw, struct buffer_head *bh) +{ + struct mapped_device *md; + int r, minor = MINOR(bh->b_rdev); + + if (minor >= MAX_DEVICES) + return -ENXIO; + + rl; + md = _devs[minor]; + + if (!md || !test_bit(DM_LOADED, &md->state)) + goto bad; + + /* if we're suspended we have to queue this io for later */ + if (!test_bit(DM_ACTIVE, &md->state)) { + ru; + r = queue_io(md, bh, rw); + + if (r < 0) { + buffer_IO_error(bh); + return 0; + + } else if (r > 0) + return 0; /* deferred successfully */ + + rl; /* FIXME: there's still a race here */ + } + + if (!__map_buffer(md, bh, __find_node(md, bh))) + goto bad; + + ru; + return 1; + + bad: + ru; + buffer_IO_error(bh); + return 0; +} + +static inline int __specific_dev(int minor) +{ + if (minor > MAX_DEVICES) { + WARN("request for a mapped_device > than MAX_DEVICES"); + return 0; + } + + if (!_devs[minor]) + return minor; + + return -1; +} + +static inline int __any_old_dev(void) +{ + int i; + + for (i = 0; i < MAX_DEVICES; i++) + if (!_devs[i]) + return i; + + return -1; +} + +static struct mapped_device *alloc_dev(int minor) +{ + struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); + memset(md, 0, sizeof(*md)); + + wl; + minor = (minor < 0) ? __any_old_dev() : __specific_dev(minor); + + if (minor < 0) { + WARN("no free devices available"); + wu; + kfree(md); + return 0; + } + + md->dev = MKDEV(DM_BLK_MAJOR, minor); + md->name[0] = '\0'; + md->state = 0; + + init_waitqueue_head(&md->wait); + + _devs[minor] = md; + wu; + + return md; +} + +static inline struct mapped_device *__find_name(const char *name) +{ + int i; + for (i = 0; i < MAX_DEVICES; i++) + if (_devs[i] && !strcmp(_devs[i]->name, name)) + return _devs[i]; + + return 0; +} + +static int open_dev(struct dev_list *d) +{ + int err; + + if (!(d->bd = bdget(kdev_t_to_nr(d->dev)))) + return -ENOMEM; + + if ((err = blkdev_get(d->bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE))) { + bdput(d->bd); + return err; + } + + return 0; +} + +static void close_dev(struct dev_list *d) +{ + blkdev_put(d->bd, BDEV_FILE); + bdput(d->bd); + d->bd = 0; +} + +static int __find_hardsect_size(struct mapped_device *md) +{ + int r = INT_MAX, s; + struct dev_list *dl; + + for (dl = md->devices; dl; dl = dl->next) { + s = get_hardsect_size(dl->dev); + if (s < r) + r = s; + } + + return r; +} + +struct mapped_device *dm_find_by_name(const char *name) +{ + struct mapped_device *md; + + rl; + md = __find_name(name); + ru; + + return md; +} + +struct mapped_device *dm_find_by_minor(int minor) +{ + struct mapped_device *md; + + rl; + md = _devs[minor]; + ru; + + return md; +} + +int dm_create(const char *name, int minor) +{ + int r; + struct mapped_device *md; + + if (minor >= MAX_DEVICES) + return -ENXIO; + + if (!(md = alloc_dev(minor))) + return -ENOMEM; + + wl; + if (__find_name(name)) { + WARN("device with that name already exists"); + kfree(md); + wu; + return -EINVAL; + } + + strcpy(md->name, name); + _devs[minor] = md; + + if ((r = dm_fs_add(md))) { + wu; + return r; + } + wu; + + return 0; +} + +int dm_remove(const char *name) +{ + struct mapped_device *md; + struct dev_list *d, *n; + int minor, r; + + wl; + if (!(md = __find_name(name))) { + wu; + return -ENXIO; + } + + if (md->use_count) { + wu; + return -EPERM; + } + + if ((r = dm_fs_remove(md))) { + wu; + return r; + } + + dm_free_table(md); + for (d = md->devices; d; d = n) { + n = d->next; + kfree(d); + } + + minor = MINOR(md->dev); + kfree(md); + _devs[minor] = 0; + wu; + + return 0; +} + +int dm_add_device(struct mapped_device *md, kdev_t dev) +{ + struct dev_list *d = kmalloc(sizeof(*d), GFP_KERNEL); + + if (!d) + return -EINVAL; + + d->dev = dev; + d->next = md->devices; + md->devices = d; + + return 0; +} + +static void __flush_deferred_io(struct mapped_device *md) +{ + struct deferred_io *c, *n; + + for (c = md->deferred, md->deferred = 0; c; c = n) { + n = c->next; + generic_make_request(c->rw, c->bh); + free_deferred(c); + } +} + +int dm_activate(struct mapped_device *md) +{ + int ret, minor; + struct dev_list *d, *od; + + wl; + + if (is_active(md)) { + wu; + return 0; + } + + if (!md->num_targets) { + wu; + return -ENXIO; + } + + /* open all the devices */ + for (d = md->devices; d; d = d->next) + if ((ret = open_dev(d))) + goto bad; + + minor = MINOR(md->dev); + + _block_size[minor] = (md->highs[md->num_targets - 1] + 1) >> 1; + _blksize_size[minor] = BLOCK_SIZE; /* FIXME: this depends on + the mapping table */ + _hardsect_size[minor] = __find_hardsect_size(md); + + register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]); + + set_bit(DM_ACTIVE, &md->state); + + __flush_deferred_io(md); + wu; + + return 0; + + bad: + od = d; + for (d = md->devices; d != od; d = d->next) + close_dev(d); + ru; + + return ret; +} + +void dm_suspend(struct mapped_device *md) +{ + DECLARE_WAITQUEUE(wait, current); + struct dev_list *d; + if (!is_active(md)) + return; + + /* wait for all the pending io to flush */ + add_wait_queue(&md->wait, &wait); + current->state = TASK_INTERRUPTIBLE; + do { + wl; + if (!atomic_read(&md->pending)) + break; + + wu; + schedule(); + + } while (1); + + current->state = TASK_RUNNING; + remove_wait_queue(&md->wait, &wait); + + /* close all the devices */ + for (d = md->devices; d; d = d->next) + close_dev(d); + + clear_bit(DM_ACTIVE, &md->state); + wu; +} + + +/* + * module hooks + */ +module_init(init); +module_exit(fin); + +/* + * Local variables: + * c-file-style: "linux" + * End: + */ diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm.h linux-dm/drivers/md/dm.h --- linux/drivers/md/dm.h Thu Jan 1 01:00:00 1970 +++ linux-dm/drivers/md/dm.h Thu Aug 30 13:54:05 2001 @@ -0,0 +1,268 @@ +/* + * dm.h + * + * Copyright (C) 2001 Sistina Software + * + * This software is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Internal header file for device mapper + * + * Changelog + * + * 16/08/2001 - First version [Joe Thornber] + */ + +/* + * This driver attempts to provide a generic way of specifying logical + * devices which are mapped onto other devices. + * + * It does this by mapping sections of the logical device onto 'targets'. + * + * When the logical device is accessed the make_request function looks up + * the correct target for the given sector, and then asks this target + * to do the remapping. + * + * (dm-table.c) A btree like structure is used to hold the sector + * range -> target mapping. Because we know all the entries in the + * btree in advance we can make a very compact tree, omitting pointers + * to child nodes, (child nodes locations can be calculated). Each + * node of the btree is 1 level cache line in size, this gives a small + * performance boost. + * + * A userland test program for the btree gave the following results on a + * 1 Gigahertz Athlon machine: + * + * entries in btree lookups per second + * ---------------- ------------------ + * 5 25,000,000 + * 1000 7,700,000 + * 10,000,000 3,800,000 + * + * Of course these results should be taken with a pinch of salt; the lookups + * were sequential and there were no other applications (other than X + emacs) + * running to give any pressure on the level 1 cache. + * + * Typical LVM users would find they have very few targets for each + * LV (probably less than 10). + * + * (dm-target.c) Target types are not hard coded, instead the + * register_mapping_type function should be called. A target type is + * specified using three functions (see the header): + * + * dm_ctr_fn - takes a string and contructs a target specific piece of + * context data. + * dm_dtr_fn - destroy contexts. + * dm_map_fn - function that takes a buffer_head and some previously + * constructed context and performs the remapping. + * + * Currently there are two two trivial mappers, which are + * automatically registered: 'linear', and 'io_error'. Linear alone + * is enough to implement most LVM features (omitting striped volumes + * and snapshots). + * + * (dm-fs.c) The driver is controlled through a /proc interface: + * /proc/device-mapper/control allows you to create and remove devices + * by 'cat'ing a line of the following format: + * + * create [minor no] + * remove + * + * /proc/device-mapper/ accepts the mapping table: + * + * begin + * ... + * ... + * end + * + * The begin/end lines are nasty, they should be handled by open/close + * for the file. + * + * At the moment the table assumes 32 bit keys (sectors), the move to + * 64 bits will involve no interface changes, since the tables will be + * read in as ascii data. A different table implementation can + * therefor be provided at another time. Either just by changing offset_t + * to 64 bits, or maybe implementing a structure which looks up the keys in + * stages (ie, 32 bits at a time). + * + * More interesting targets: + * + * striped mapping; given a stripe size and a number of device regions + * this would stripe data across the regions. Especially useful, since + * we could limit each striped region to a 32 bit area and then avoid + * nasty 64 bit %'s. + * + * mirror mapping (reflector ?); would set off a kernel thread slowly + * copying data from one region to another, ensuring that any new + * writes got copied to both destinations correctly. Great for + * implementing pvmove. Not sure how userland would be notified that + * the copying process had completed. Possibly by reading a /proc entry + * for the LV. Could also use poll() for this kind of thing. + */ + + +#ifndef DM_INTERNAL_H +#define DM_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_DEPTH 16 +#define NODE_SIZE L1_CACHE_BYTES +#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t)) +#define DM_NAME_LEN 64 + +enum { + DM_LOADED = 0, + DM_LOADING, + DM_ACTIVE, +}; + +/* devices that a metadevice should uses and hence open/close */ +struct dev_list { + kdev_t dev; + struct block_device *bd; + struct dev_list *next; +}; + +/* io that had to be deferred while we were suspended */ +struct deferred_io { + int rw; + struct buffer_head *bh; + struct deferred_io *next; +}; + +/* btree leaf, these do the actual mapping */ +struct target_instance { + dm_map_fn map; + void *private; +}; + +struct mapped_device { + kdev_t dev; + char name[DM_NAME_LEN]; + + int use_count; + int state; + + wait_queue_head_t wait; + atomic_t pending; /* # of 'in flight' buffers */ + + /* a list of io's that arrived while we were suspended */ + struct deferred_io *deferred; + + /* btree table */ + int depth; + int counts[MAX_DEPTH]; /* in nodes */ + offset_t *index[MAX_DEPTH]; + + int num_targets; + int num_allocated; + offset_t *highs; + struct target_instance *targets; + + /* used by dm-fs.c */ + devfs_handle_t devfs_entry; + struct proc_dir_entry *pde; + + /* a list of devices used by this md */ + struct dev_list *devices; +}; + +/* information about a target type */ +struct target { + char *name; + dm_ctr_fn ctr; + dm_dtr_fn dtr; + dm_map_fn map; + + struct target *next; +}; + +extern struct block_device_operations dm_blk_dops; + +/* dm-target.c */ +struct target *dm_get_target(const char *name); +int dm_std_targets(void); + +/* dm.c */ +struct mapped_device *dm_find_by_name(const char *name); +struct mapped_device *dm_find_by_minor(int minor); + +int dm_create(const char *name, int minor); +int dm_remove(const char *name); + +int dm_activate(struct mapped_device *md); +void dm_suspend(struct mapped_device *md); + +/* dm-table.c */ +int dm_table_start(struct mapped_device *md); +int dm_table_add_entry(struct mapped_device *md, offset_t high, + dm_map_fn target, void *context); +int dm_table_complete(struct mapped_device *md); +void dm_free_table(struct mapped_device *md); + + +/* dm-fs.c */ +int dm_init_fs(void); +void dm_fin_fs(void); + +int dm_fs_add(struct mapped_device *md); +int dm_fs_remove(struct mapped_device *md); + + +#define WARN(f, x...) printk(KERN_WARNING "device-mapper: " f "\n" , ## x) + +inline static int is_active(struct mapped_device *md) +{ + return test_bit(DM_ACTIVE, &md->state); +} + +inline static const char *eat_space(const char *b, const char *e) +{ + while(b != e && isspace((int) *b)) + b++; + + return b; +} + +inline static int get_number(const char **b, const char *e, unsigned int *n) +{ + char *ptr; + *b = eat_space(*b, e); + if (*b >= e) + return -EINVAL; + + *n = simple_strtoul(*b, &ptr, 10); + if (ptr == *b) + return -EINVAL; + *b = ptr; + + return 0; +} + +#endif diff -ruNX /home/joe/packages/2.4/dontdiff linux/include/linux/device-mapper.h linux-dm/include/linux/device-mapper.h --- linux/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 +++ linux-dm/include/linux/device-mapper.h Tue Aug 28 11:35:56 2001 @@ -0,0 +1,61 @@ +/* + * device-mapper.h + * + * Copyright (C) 2001 Sistina Software + * + * This software is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Changelog + * + * 14/08/2001 - First version [Joe Thornber] + */ + +#ifndef DEVICE_MAPPER_H +#define DEVICE_MAPPER_H + +#ifdef __KERNEL__ + +#include + +/* FIXME: Use value from local range for now, for co-existence with LVM 1 */ +#define DM_BLK_MAJOR 124 + +struct mapped_device; +typedef unsigned int offset_t; + +/* constructor, destructor and map fn types */ +typedef int (*dm_ctr_fn)(offset_t b, offset_t e, struct mapped_device *md, + const char *cb, const char *ce, void **result); +typedef void (*dm_dtr_fn)(void *c); +typedef int (*dm_map_fn)(struct buffer_head *bh, void *context); + +int register_map_target(const char *name, dm_ctr_fn ctr, dm_dtr_fn dtr, + dm_map_fn map); + +/* contructors should call this to make sure any destination devices + are handled correctly (ie. opened/closed) */ +int dm_add_device(struct mapped_device *md, kdev_t dev); + +#endif +#endif + +/* + * Local variables: + * c-file-style: "linux" + * End: + */