1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-06 17:18:29 +03:00
lvm2/driver/device-mapper/patches/00_latest
2001-08-31 09:14:55 +00:00

1733 lines
38 KiB
Plaintext

diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm-fs.c linux-dm/drivers/md/dm-fs.c
--- linux/drivers/md/dm-fs.c Thu Jan 1 01:00:00 1970
+++ linux-dm/drivers/md/dm-fs.c Wed Aug 29 11:02:20 2001
@@ -0,0 +1,341 @@
+/*
+ * dm.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * procfs and devfs handling for device mapper
+ *
+ * Changelog
+ *
+ * 16/08/2001 - First version [Joe Thornber]
+ */
+
+#include "dm.h"
+
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+
+/*
+ * /dev/device-mapper/control is the control char device used to
+ * create/destroy mapping devices.
+ *
+ * When a mapping device called <name> is created it appears as
+ * /dev/device-mapper/<name>. In addition the interface to control the
+ * mapping will appear in /proc/device-mapper/<name>.
+ */
+
+const char *_fs_dir = "device-mapper";
+const char *_control_name = "control";
+
+static struct proc_dir_entry *_proc_dir;
+static struct proc_dir_entry *_control;
+
+static devfs_handle_t _dev_dir;
+
+static int line_splitter(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+static int process_control(const char *b, const char *e, int minor);
+static int process_table(const char *b, const char *e, int minor);
+static int get_word(const char *b, const char *e,
+ const char **wb, const char **we);
+static int tok_cmp(const char *str, const char *b, const char *e);
+static void tok_cpy(char *dest, size_t max,
+ const char *b, const char *e);
+
+typedef int (*process_fn)(const char *b, const char *e, int minor);
+
+struct pf_data {
+ process_fn fn;
+ int minor;
+};
+
+int dm_init_fs(void)
+{
+ struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL);
+
+ if (!pfd)
+ return 0;
+
+ _dev_dir = devfs_mk_dir(0, _fs_dir, NULL);
+
+ if (!(_proc_dir = create_proc_entry(_fs_dir, S_IFDIR, &proc_root)))
+ goto fail;
+
+ if (!(_control = create_proc_entry(_control_name, S_IWUSR, _proc_dir)))
+ goto fail;
+
+ _control->write_proc = line_splitter;
+
+ pfd->fn = process_control;
+ pfd->minor = -1;
+ _control->data = pfd;
+
+ return 0;
+
+ fail:
+ dm_fin_fs();
+ return -ENOMEM;
+}
+
+void dm_fin_fs(void)
+{
+ if (_control) {
+ remove_proc_entry(_control_name, _proc_dir);
+ _control = 0;
+ }
+
+ if (_proc_dir) {
+ remove_proc_entry(_fs_dir, &proc_root);
+ _proc_dir = 0;
+ }
+
+ if (_dev_dir)
+ devfs_unregister(_dev_dir);
+}
+
+int dm_fs_add(struct mapped_device *md)
+{
+ struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL);
+
+ if (!pfd)
+ return -ENOMEM;
+
+ pfd->fn = process_table;
+ pfd->minor = MINOR(md->dev);
+
+ if (!(md->pde = create_proc_entry(md->name, S_IRUGO | S_IWUSR,
+ _proc_dir))) {
+ kfree(pfd);
+ return -ENOMEM;
+ }
+
+ md->pde->write_proc = line_splitter;
+ md->pde->data = pfd;
+
+ md->devfs_entry =
+ devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER,
+ MAJOR(md->dev), MINOR(md->dev),
+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+ &dm_blk_dops, NULL);
+
+ if (!md->devfs_entry) {
+ kfree(pfd);
+ remove_proc_entry(md->name, _proc_dir);
+ md->pde = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int dm_fs_remove(struct mapped_device *md)
+{
+ if (md->pde) {
+ kfree(md->pde->data);
+ remove_proc_entry(md->name, _proc_dir);
+ md->pde = 0;
+ }
+
+ devfs_unregister(md->devfs_entry);
+ md->devfs_entry = 0;
+ return 0;
+}
+
+static int process_control(const char *b, const char *e, int minor)
+{
+ const char *wb, *we;
+ char name[64];
+ int create = 0;
+
+ /*
+ * create <name> [minor]
+ * remove <name>
+ */
+ if (get_word(b, e, &wb, &we))
+ return -EINVAL;
+ b = we;
+
+ if (!tok_cmp("create", wb, we))
+ create = 1;
+
+ else if (tok_cmp("remove", wb, we))
+ return -EINVAL;
+
+ if (get_word(b, e, &wb, &we))
+ return -EINVAL;
+ b = we;
+
+ tok_cpy(name, sizeof(name), wb, we);
+
+ if (!create)
+ return dm_remove(name);
+
+ else {
+ if (!get_word(b, e, &wb, &we)) {
+ minor = simple_strtol(wb, (char **) &we, 10);
+
+ if (we == wb)
+ return -EINVAL;
+ }
+
+ return dm_create(name, minor);
+ }
+
+ return -EINVAL;
+}
+
+static int process_table(const char *b, const char *e, int minor)
+{
+ const char *wb, *we;
+ struct mapped_device *md = dm_find_by_minor(minor);
+ void *context;
+ int r;
+
+ if (!md)
+ return -ENXIO;
+
+ if (get_word(b, e, &wb, &we))
+ return -EINVAL;
+
+ if (!tok_cmp("begin", b, e)) {
+ /* suspend the device if it's active */
+ dm_suspend(md);
+
+ /* start loading a table */
+ dm_table_start(md);
+
+ } else if (!tok_cmp("end", b, e)) {
+ /* activate the device ... <evil chuckle> ... */
+ dm_table_complete(md);
+ dm_activate(md);
+
+ } else {
+ /* add the new entry */
+ char target[64];
+ struct target *t;
+ offset_t start, size, high;
+ size_t len;
+
+ if (get_number(&b, e, &start))
+ return -EINVAL;
+
+ if (get_number(&b, e, &size))
+ return -EINVAL;
+
+ if (get_word(b, e, &wb, &we))
+ return -EINVAL;
+
+ len = we - wb;
+ if (len > sizeof(target))
+ return -EINVAL;
+
+ strncpy(target, wb, len);
+ target[len] = '\0';
+
+ if (!(t = dm_get_target(target)))
+ return -EINVAL;
+
+ /* check there isn't a gap */
+ if ((md->num_targets &&
+ start != md->highs[md->num_targets - 1] + 1) ||
+ (!md->num_targets && start)) {
+ WARN("gap in target ranges");
+ return -EINVAL;
+ }
+
+ high = start + (size - 1);
+ if ((r = t->ctr(start, high, md, we, e, &context)))
+ return r;
+
+ if ((r = dm_table_add_entry(md, high, t->map, context)))
+ return r;
+ }
+
+ return 0;
+}
+
+static int get_word(const char *b, const char *e,
+ const char **wb, const char **we)
+{
+ b = eat_space(b, e);
+
+ if (b == e)
+ return -EINVAL;
+
+ *wb = b;
+ while(b != e && !isspace((int) *b))
+ b++;
+ *we = b;
+ return 0;
+}
+
+static int line_splitter(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ int r;
+ const char *b = buffer, *e = buffer + count, *lb;
+ struct pf_data *pfd = (struct pf_data *) data;
+
+ while(b < e) {
+ b = eat_space(b, e);
+ if (b == e)
+ break;
+
+ lb = b;
+ while((b != e) && *b != '\n')
+ b++;
+
+ if ((r = pfd->fn(lb, b, pfd->minor)))
+ return r;
+ }
+
+ return count;
+}
+
+static int tok_cmp(const char *str, const char *b, const char *e)
+{
+ while (*str && b != e) {
+ if (*str < *b)
+ return -1;
+
+ if (*str > *b)
+ return 1;
+
+ str++, b++;
+ }
+
+ if (!*str && b == e)
+ return 0;
+
+ if (*str)
+ return 1;
+
+ return -1;
+}
+
+static void tok_cpy(char *dest, size_t max,
+ const char *b, const char *e)
+{
+ size_t len = e - b;
+ if (len > --max)
+ len = max;
+ strncpy(dest, b, len);
+ dest[len] = '\0';
+}
diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm-table.c linux-dm/drivers/md/dm-table.c
--- linux/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970
+++ linux-dm/drivers/md/dm-table.c Wed Aug 29 11:03:08 2001
@@ -0,0 +1,178 @@
+/*
+ * dm-table.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Changelog
+ *
+ * 16/08/2001 - First version [Joe Thornber]
+ */
+
+#include "dm.h"
+
+static int alloc_targets(struct mapped_device *md, int num);
+
+static inline ulong round_up(ulong n, ulong size)
+{
+ ulong r = n % size;
+ return n + (r ? (size - r) : 0);
+}
+
+static inline ulong div_up(ulong n, ulong size)
+{
+ return round_up(n, size) / size;
+}
+
+static offset_t high(struct mapped_device *md, int l, int n)
+{
+ while (1) {
+ if (n >= md->counts[l])
+ return (offset_t) -1;
+
+ if (l == md->depth - 1)
+ return md->index[l][((n + 1) * KEYS_PER_NODE) - 1];
+
+ l++;
+ n = (n + 1) * (KEYS_PER_NODE + 1) - 1;
+ }
+}
+
+static int setup_btree_index(int l, struct mapped_device *md)
+{
+ int n, c, cn;
+
+ for (n = 0, cn = 0; n < md->counts[l]; n++) {
+ offset_t *k = md->index[l] + (n * KEYS_PER_NODE);
+
+ for (c = 0; c < KEYS_PER_NODE; c++)
+ k[c] = high(md, l + 1, cn++);
+ cn++;
+ }
+
+ return 0;
+}
+
+void dm_free_table(struct mapped_device *md)
+{
+ int i;
+ for (i = 0; i < md->depth; i++) {
+ vfree(md->index[i]);
+ md->index[i] = 0;
+ }
+
+ vfree(md->targets);
+
+ md->highs = 0;
+ md->targets = 0;
+
+ md->num_targets = 0;
+ md->num_allocated = 0;
+}
+
+int dm_table_start(struct mapped_device *md)
+{
+ int r;
+ set_bit(DM_LOADING, &md->state);
+
+ dm_free_table(md);
+ if ((r = alloc_targets(md, 64)))
+ return r;
+
+ return 0;
+}
+
+int dm_table_add_entry(struct mapped_device *md, offset_t high,
+ dm_map_fn target, void *context)
+{
+ if (md->num_targets >= md->num_targets &&
+ alloc_targets(md, md->num_allocated * 2))
+ return -ENOMEM;
+
+ md->highs[md->num_targets] = high;
+ md->targets[md->num_targets].map = target;
+ md->targets[md->num_targets].private = context;
+
+ md->num_targets++;
+ return 0;
+}
+
+int dm_table_complete(struct mapped_device *md)
+{
+ int n, i;
+
+ clear_bit(DM_LOADING, &md->state);
+
+ /* how many indexes will the btree have ? */
+ for (n = div_up(md->num_targets, KEYS_PER_NODE), i = 1; n != 1; i++)
+ n = div_up(n, KEYS_PER_NODE + 1);
+
+ md->depth = i;
+ md->counts[md->depth - 1] = div_up(md->num_targets, KEYS_PER_NODE);
+
+ while (--i)
+ md->counts[i - 1] = div_up(md->counts[i], KEYS_PER_NODE + 1);
+
+ for (i = 0; i < md->depth; i++) {
+ size_t s = NODE_SIZE * md->counts[i];
+ md->index[i] = vmalloc(s);
+ memset(md->index[i], -1, s);
+ }
+
+ /* bottom layer is easy */
+ md->index[md->depth - 1] = md->highs;
+
+ /* fill in higher levels */
+ for (i = md->depth - 1; i; i--)
+ setup_btree_index(i - 1, md);
+
+ set_bit(DM_LOADED, &md->state);
+ return 0;
+}
+
+static int alloc_targets(struct mapped_device *md, int num)
+{
+ offset_t *n_highs;
+ struct target_instance *n_targets;
+
+ if (!(n_highs = vmalloc(sizeof(*n_highs) * num)))
+ return -ENOMEM;
+
+ if (!(n_targets = vmalloc(sizeof(*n_targets) * num))) {
+ vfree(n_highs);
+ return -ENOMEM;
+ }
+
+ if (md->num_targets) {
+ memcpy(n_highs, md->highs,
+ sizeof(*n_highs) * md->num_targets);
+
+ memcpy(n_targets, md->targets,
+ sizeof(*n_targets) * md->num_targets);
+ }
+
+ vfree(md->highs);
+ vfree(md->targets);
+
+ md->num_allocated = num;
+ md->highs = n_highs;
+ md->targets = n_targets;
+
+ return 0;
+}
diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm-target.c linux-dm/drivers/md/dm-target.c
--- linux/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970
+++ linux-dm/drivers/md/dm-target.c Wed Aug 29 10:56:38 2001
@@ -0,0 +1,176 @@
+/*
+ * dm-target.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * 16/08/2001 - First Version [Joe Thornber]
+ */
+
+#include "dm.h"
+
+static struct target *_targets;
+static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+
+struct target *__get_target(const char *name)
+{
+ struct target *t;
+ for (t = _targets; t && strcmp(t->name, name); t = t->next)
+ ;
+ return t;
+}
+
+struct target *dm_get_target(const char *name)
+{
+ struct target *t;
+
+ spin_lock(&_lock);
+ t = __get_target(name);
+ spin_unlock(&_lock);
+
+ return t;
+}
+
+int register_map_target(const char *name, dm_ctr_fn ctr,
+ dm_dtr_fn dtr, dm_map_fn map)
+{
+ struct target *t = kmalloc(sizeof(*t) + strlen(name) + 1, GFP_KERNEL);
+
+ if (!t)
+ return -ENOMEM;
+
+ spin_lock(&_lock);
+ if (__get_target(name)) {
+ WARN("mapper(%s) already registered\n", name);
+ spin_unlock(&_lock);
+ return -1; /* FIXME: what's a good return value ? */
+ }
+
+ t->name = (char *) (t + 1);
+ strcpy(t->name, name);
+
+ t->ctr = ctr;
+ t->dtr = dtr;
+ t->map = map;
+
+ t->next = _targets;
+ _targets = t;
+
+ spin_unlock(&_lock);
+ return 0;
+}
+
+
+/*
+ * now for a couple of simple targets:
+ *
+ * 'io-err' target always fails an io, useful for bringing up LV's
+ * that have holes in them.
+ *
+ * 'linear' target maps a linear range of a device
+ */
+static int io_err_ctr(offset_t b, offset_t e, struct mapped_device *md,
+ const char *cb, const char *ce, void **result)
+{
+ /* this takes no arguments */
+ *result = 0;
+ return 0;
+}
+
+static void io_err_dtr(void *c)
+{
+ /* empty */
+}
+
+static int io_err_map(struct buffer_head *bh, void *context)
+{
+ buffer_IO_error(bh);
+ return 0;
+}
+
+
+struct linear_c {
+ kdev_t dev;
+ int offset; /* FIXME: we need a signed offset type */
+};
+
+static int linear_ctr(offset_t low, offset_t high, struct mapped_device *md,
+ const char *cb, const char *ce, void **result)
+{
+ /* context string should be of the form:
+ * <major> <minor> <offset>
+ */
+ struct linear_c *lc;
+ unsigned int major, minor, start;
+ int r;
+
+ if ((r = get_number(&cb, ce, &major)))
+ return r;
+
+ if ((r = get_number(&cb, ce, &minor)))
+ return r;
+
+ if ((r = get_number(&cb, ce, &start)))
+ return r;
+
+ if (!(lc = kmalloc(sizeof(lc), GFP_KERNEL))) {
+ WARN("couldn't allocate memory for linear context\n");
+ return -EINVAL;
+ }
+
+ lc->dev = MKDEV((int) major, (int) minor);
+ lc->offset = (int) start - (int) low;
+
+ if ((r = dm_add_device(md, lc->dev))) {
+ kfree(lc);
+ return r;
+ }
+
+ *result = lc;
+ return 0;
+}
+
+static void linear_dtr(void *c)
+{
+ kfree(c);
+}
+
+static int linear_map(struct buffer_head *bh, void *context)
+{
+ struct linear_c *lc = (struct linear_c *) context;
+
+ bh->b_rdev = lc->dev;
+ bh->b_rsector = bh->b_rsector + lc->offset;
+ return 1;
+}
+
+int dm_std_targets(void)
+{
+ int ret;
+
+#define xx(n, fn) \
+ if ((ret = register_map_target(n, \
+ fn ## _ctr, fn ## _dtr, fn ## _map) < 0)) return ret
+
+ xx("io-err", io_err);
+ xx("linear", linear);
+#undef xx
+
+ return 0;
+}
diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm.c linux-dm/drivers/md/dm.c
--- linux/drivers/md/dm.c Thu Jan 1 01:00:00 1970
+++ linux-dm/drivers/md/dm.c Thu Aug 30 14:03:06 2001
@@ -0,0 +1,684 @@
+/*
+ * device-mapper.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Changelog
+ *
+ * 14/08/2001 - First Version [Joe Thornber]
+ */
+
+#include "dm.h"
+
+/* defines for blk.h */
+#define MAJOR_NR DM_BLK_MAJOR
+#define DEVICE_NR(device) MINOR(device) /* has no partition bits */
+#define DEVICE_NAME "device-mapper" /* name for messaging */
+#define DEVICE_NO_RANDOM /* no entropy to contribute */
+#define DEVICE_OFF(d) /* do-nothing */
+
+#include <linux/blk.h>
+
+#define MAX_DEVICES 64
+#define DEFAULT_READ_AHEAD 64
+
+const char *_name = "device-mapper";
+int _version[3] = {0, 1, 0};
+
+struct io_hook {
+ struct mapped_device *md;
+ void (*end_io)(struct buffer_head *bh, int uptodate);
+ void *context;
+};
+
+#define rl down_read(&_dev_lock)
+#define ru up_read(&_dev_lock)
+#define wl down_write(&_dev_lock)
+#define wu up_write(&_dev_lock)
+
+struct rw_semaphore _dev_lock;
+static struct mapped_device *_devs[MAX_DEVICES];
+
+/* block device arrays */
+static int _block_size[MAX_DEVICES];
+static int _blksize_size[MAX_DEVICES];
+static int _hardsect_size[MAX_DEVICES];
+
+static int blk_open(struct inode *inode, struct file *file);
+static int blk_close(struct inode *inode, struct file *file);
+static int blk_ioctl(struct inode *inode, struct file *file,
+ uint command, ulong a);
+
+struct block_device_operations dm_blk_dops = {
+ open: blk_open,
+ release: blk_close,
+ ioctl: blk_ioctl
+};
+
+static int request(request_queue_t *q, int rw, struct buffer_head *bh);
+
+/*
+ * setup and teardown the driver
+ */
+static int init(void)
+{
+ int ret;
+
+ init_rwsem(&_dev_lock);
+
+ if ((ret = dm_init_fs()))
+ return ret;
+
+ if (dm_std_targets())
+ return -EIO; /* FIXME: better error value */
+
+ /* set up the arrays */
+ read_ahead[MAJOR_NR] = DEFAULT_READ_AHEAD;
+ blk_size[MAJOR_NR] = _block_size;
+ blksize_size[MAJOR_NR] = _blksize_size;
+ hardsect_size[MAJOR_NR] = _hardsect_size;
+
+ if (devfs_register_blkdev(MAJOR_NR, _name, &dm_blk_dops) < 0) {
+ printk(KERN_ERR "%s -- register_blkdev failed\n", _name);
+ return -EIO;
+ }
+
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), request);
+
+ printk(KERN_INFO "%s %d.%d.%d initialised\n", _name,
+ _version[0], _version[1], _version[2]);
+ return 0;
+}
+
+static void fin(void)
+{
+ dm_fin_fs();
+
+ if (devfs_unregister_blkdev(MAJOR_NR, _name) < 0)
+ printk(KERN_ERR "%s -- unregister_blkdev failed\n", _name);
+
+ read_ahead[MAJOR_NR] = 0;
+ blk_size[MAJOR_NR] = 0;
+ blksize_size[MAJOR_NR] = 0;
+ hardsect_size[MAJOR_NR] = 0;
+
+ printk(KERN_INFO "%s %d.%d.%d cleaned up\n", _name,
+ _version[0], _version[1], _version[2]);
+}
+
+/*
+ * block device functions
+ */
+static int blk_open(struct inode *inode, struct file *file)
+{
+ int minor = MINOR(inode->i_rdev);
+ struct mapped_device *md;
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ wl;
+ md = _devs[minor];
+
+ if (!md || !is_active(md)) {
+ wu;
+ return -ENXIO;
+ }
+
+ md->use_count++;
+ wu;
+
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+
+static int blk_close(struct inode *inode, struct file *file)
+{
+ int minor = MINOR(inode->i_rdev);
+ struct mapped_device *md;
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ wl;
+ md = _devs[minor];
+ if (!md || md->use_count < 1) {
+ WARN("reference count in mapped_device incorrect");
+ wu;
+ return -ENXIO;
+ }
+
+ md->use_count--;
+ wu;
+
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+static int blk_ioctl(struct inode *inode, struct file *file,
+ uint command, ulong a)
+{
+ /* FIXME: check in the latest Rubini that all expected ioctl's
+ are supported */
+
+ int minor = MINOR(inode->i_rdev);
+ long size;
+
+ switch (command) {
+ case BLKGETSIZE:
+ size = _block_size[minor] * 1024 / _hardsect_size[minor];
+ if (copy_to_user((void *) a, &size, sizeof(long)))
+ return -EFAULT;
+ break;
+
+ case BLKFLSBUF:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ fsync_dev(inode->i_rdev);
+ invalidate_buffers(inode->i_rdev);
+ return 0;
+
+ case BLKRAGET:
+ if (copy_to_user((void *) a, &read_ahead[MAJOR(inode->i_rdev)],
+ sizeof(long)))
+ return -EFAULT;
+ return 0;
+
+ case BLKRASET:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ read_ahead[MAJOR(inode->i_rdev)] = a;
+ return 0;
+
+ case BLKRRPART:
+ return -EINVAL;
+
+ default:
+ printk(KERN_WARNING "%s - unknown block ioctl %d",
+ _name, command);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* FIXME: These should have their own slab */
+inline static struct io_hook *alloc_io_hook(void)
+{
+ return kmalloc(sizeof(struct io_hook), GFP_NOIO);
+}
+
+inline static void free_io_hook(struct io_hook *ih)
+{
+ kfree(ih);
+}
+
+inline static struct deferred_io *alloc_deferred(void)
+{
+ return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
+}
+
+inline static void free_deferred(struct deferred_io *di)
+{
+ kfree(di);
+}
+
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
+ struct io_hook *ih = bh->b_private;
+
+ if (atomic_dec_and_test(&ih->md->pending))
+ /* nudge anyone waiting on suspend queue */
+ wake_up_interruptible(&ih->md->wait);
+
+ bh->b_end_io = ih->end_io;
+ bh->b_private = ih->context;
+ free_io_hook(ih);
+
+ bh->b_end_io(bh, uptodate);
+}
+
+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
+{
+ struct deferred_io *di = alloc_deferred();
+
+ if (!di)
+ return -ENOMEM;
+
+ wl;
+ if (test_bit(DM_ACTIVE, &md->state)) {
+ wu;
+ return 0;
+ }
+
+ di->bh = bh;
+ di->rw = rw;
+ di->next = md->deferred;
+ md->deferred = di;
+ wu;
+
+ return 1;
+}
+
+
+inline static int __map_buffer(struct mapped_device *md,
+ struct buffer_head *bh, int node)
+{
+ dm_map_fn fn;
+ void *context;
+ struct io_hook *ih = 0;
+ int r;
+ struct target_instance *ti = md->targets + node;
+
+ fn = ti->map;
+ context = ti->private;
+
+ if (!fn)
+ return 0;
+
+ ih = alloc_io_hook();
+
+ if (!ih)
+ return 0;
+
+ ih->md = md;
+ ih->end_io = bh->b_end_io;
+ ih->context = bh->b_private;
+
+ r = fn(bh, context);
+
+ if (r > 0) {
+ /* hook the end io request fn */
+ atomic_inc(&md->pending);
+ bh->b_end_io = dec_pending;
+ bh->b_private = ih;
+
+ } else if (r == 0)
+ /* we don't need to hook */
+ free_io_hook(ih);
+
+ else if (r < 0) {
+ free_io_hook(ih);
+ return 0;
+ }
+
+ return 1;
+}
+
+inline static int __find_node(struct mapped_device *md, struct buffer_head *bh)
+{
+ int i = 0, l, r = 0;
+ offset_t *node;
+
+ /* search the btree for the correct target */
+ for (l = 0; l < md->depth; l++) {
+ r = ((KEYS_PER_NODE + 1) * r) + i;
+ node = md->index[l] + (r * KEYS_PER_NODE);
+
+ for (i = 0; i < KEYS_PER_NODE; i++)
+ if (node[i] >= bh->b_rsector)
+ break;
+ }
+
+ return (KEYS_PER_NODE * r) + i;
+}
+
+static int request(request_queue_t *q, int rw, struct buffer_head *bh)
+{
+ struct mapped_device *md;
+ int r, minor = MINOR(bh->b_rdev);
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ rl;
+ md = _devs[minor];
+
+ if (!md || !test_bit(DM_LOADED, &md->state))
+ goto bad;
+
+ /* if we're suspended we have to queue this io for later */
+ if (!test_bit(DM_ACTIVE, &md->state)) {
+ ru;
+ r = queue_io(md, bh, rw);
+
+ if (r < 0) {
+ buffer_IO_error(bh);
+ return 0;
+
+ } else if (r > 0)
+ return 0; /* deferred successfully */
+
+ rl; /* FIXME: there's still a race here */
+ }
+
+ if (!__map_buffer(md, bh, __find_node(md, bh)))
+ goto bad;
+
+ ru;
+ return 1;
+
+ bad:
+ ru;
+ buffer_IO_error(bh);
+ return 0;
+}
+
+static inline int __specific_dev(int minor)
+{
+ if (minor > MAX_DEVICES) {
+ WARN("request for a mapped_device > than MAX_DEVICES");
+ return 0;
+ }
+
+ if (!_devs[minor])
+ return minor;
+
+ return -1;
+}
+
+static inline int __any_old_dev(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_DEVICES; i++)
+ if (!_devs[i])
+ return i;
+
+ return -1;
+}
+
+static struct mapped_device *alloc_dev(int minor)
+{
+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+ memset(md, 0, sizeof(*md));
+
+ wl;
+ minor = (minor < 0) ? __any_old_dev() : __specific_dev(minor);
+
+ if (minor < 0) {
+ WARN("no free devices available");
+ wu;
+ kfree(md);
+ return 0;
+ }
+
+ md->dev = MKDEV(DM_BLK_MAJOR, minor);
+ md->name[0] = '\0';
+ md->state = 0;
+
+ init_waitqueue_head(&md->wait);
+
+ _devs[minor] = md;
+ wu;
+
+ return md;
+}
+
+static inline struct mapped_device *__find_name(const char *name)
+{
+ int i;
+ for (i = 0; i < MAX_DEVICES; i++)
+ if (_devs[i] && !strcmp(_devs[i]->name, name))
+ return _devs[i];
+
+ return 0;
+}
+
+static int open_dev(struct dev_list *d)
+{
+ int err;
+
+ if (!(d->bd = bdget(kdev_t_to_nr(d->dev))))
+ return -ENOMEM;
+
+ if ((err = blkdev_get(d->bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE))) {
+ bdput(d->bd);
+ return err;
+ }
+
+ return 0;
+}
+
+static void close_dev(struct dev_list *d)
+{
+ blkdev_put(d->bd, BDEV_FILE);
+ bdput(d->bd);
+ d->bd = 0;
+}
+
+static int __find_hardsect_size(struct mapped_device *md)
+{
+ int r = INT_MAX, s;
+ struct dev_list *dl;
+
+ for (dl = md->devices; dl; dl = dl->next) {
+ s = get_hardsect_size(dl->dev);
+ if (s < r)
+ r = s;
+ }
+
+ return r;
+}
+
+struct mapped_device *dm_find_by_name(const char *name)
+{
+ struct mapped_device *md;
+
+ rl;
+ md = __find_name(name);
+ ru;
+
+ return md;
+}
+
+struct mapped_device *dm_find_by_minor(int minor)
+{
+ struct mapped_device *md;
+
+ rl;
+ md = _devs[minor];
+ ru;
+
+ return md;
+}
+
+int dm_create(const char *name, int minor)
+{
+ int r;
+ struct mapped_device *md;
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ if (!(md = alloc_dev(minor)))
+ return -ENOMEM;
+
+ wl;
+ if (__find_name(name)) {
+ WARN("device with that name already exists");
+ kfree(md);
+ wu;
+ return -EINVAL;
+ }
+
+ strcpy(md->name, name);
+ _devs[minor] = md;
+
+ if ((r = dm_fs_add(md))) {
+ wu;
+ return r;
+ }
+ wu;
+
+ return 0;
+}
+
+int dm_remove(const char *name)
+{
+ struct mapped_device *md;
+ struct dev_list *d, *n;
+ int minor, r;
+
+ wl;
+ if (!(md = __find_name(name))) {
+ wu;
+ return -ENXIO;
+ }
+
+ if (md->use_count) {
+ wu;
+ return -EPERM;
+ }
+
+ if ((r = dm_fs_remove(md))) {
+ wu;
+ return r;
+ }
+
+ dm_free_table(md);
+ for (d = md->devices; d; d = n) {
+ n = d->next;
+ kfree(d);
+ }
+
+ minor = MINOR(md->dev);
+ kfree(md);
+ _devs[minor] = 0;
+ wu;
+
+ return 0;
+}
+
+int dm_add_device(struct mapped_device *md, kdev_t dev)
+{
+ struct dev_list *d = kmalloc(sizeof(*d), GFP_KERNEL);
+
+ if (!d)
+ return -EINVAL;
+
+ d->dev = dev;
+ d->next = md->devices;
+ md->devices = d;
+
+ return 0;
+}
+
+static void __flush_deferred_io(struct mapped_device *md)
+{
+ struct deferred_io *c, *n;
+
+ for (c = md->deferred, md->deferred = 0; c; c = n) {
+ n = c->next;
+ generic_make_request(c->rw, c->bh);
+ free_deferred(c);
+ }
+}
+
+int dm_activate(struct mapped_device *md)
+{
+ int ret, minor;
+ struct dev_list *d, *od;
+
+ wl;
+
+ if (is_active(md)) {
+ wu;
+ return 0;
+ }
+
+ if (!md->num_targets) {
+ wu;
+ return -ENXIO;
+ }
+
+ /* open all the devices */
+ for (d = md->devices; d; d = d->next)
+ if ((ret = open_dev(d)))
+ goto bad;
+
+ minor = MINOR(md->dev);
+
+ _block_size[minor] = (md->highs[md->num_targets - 1] + 1) >> 1;
+ _blksize_size[minor] = BLOCK_SIZE; /* FIXME: this depends on
+ the mapping table */
+ _hardsect_size[minor] = __find_hardsect_size(md);
+
+ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]);
+
+ set_bit(DM_ACTIVE, &md->state);
+
+ __flush_deferred_io(md);
+ wu;
+
+ return 0;
+
+ bad:
+ od = d;
+ for (d = md->devices; d != od; d = d->next)
+ close_dev(d);
+ ru;
+
+ return ret;
+}
+
+void dm_suspend(struct mapped_device *md)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ struct dev_list *d;
+ if (!is_active(md))
+ return;
+
+ /* wait for all the pending io to flush */
+ add_wait_queue(&md->wait, &wait);
+ current->state = TASK_INTERRUPTIBLE;
+ do {
+ wl;
+ if (!atomic_read(&md->pending))
+ break;
+
+ wu;
+ schedule();
+
+ } while (1);
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&md->wait, &wait);
+
+ /* close all the devices */
+ for (d = md->devices; d; d = d->next)
+ close_dev(d);
+
+ clear_bit(DM_ACTIVE, &md->state);
+ wu;
+}
+
+
+/*
+ * module hooks
+ */
+module_init(init);
+module_exit(fin);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff -ruNX /home/joe/packages/2.4/dontdiff linux/drivers/md/dm.h linux-dm/drivers/md/dm.h
--- linux/drivers/md/dm.h Thu Jan 1 01:00:00 1970
+++ linux-dm/drivers/md/dm.h Thu Aug 30 13:54:05 2001
@@ -0,0 +1,268 @@
+/*
+ * dm.h
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Internal header file for device mapper
+ *
+ * Changelog
+ *
+ * 16/08/2001 - First version [Joe Thornber]
+ */
+
+/*
+ * This driver attempts to provide a generic way of specifying logical
+ * devices which are mapped onto other devices.
+ *
+ * It does this by mapping sections of the logical device onto 'targets'.
+ *
+ * When the logical device is accessed the make_request function looks up
+ * the correct target for the given sector, and then asks this target
+ * to do the remapping.
+ *
+ * (dm-table.c) A btree like structure is used to hold the sector
+ * range -> target mapping. Because we know all the entries in the
+ * btree in advance we can make a very compact tree, omitting pointers
+ * to child nodes, (child nodes locations can be calculated). Each
+ * node of the btree is 1 level cache line in size, this gives a small
+ * performance boost.
+ *
+ * A userland test program for the btree gave the following results on a
+ * 1 Gigahertz Athlon machine:
+ *
+ * entries in btree lookups per second
+ * ---------------- ------------------
+ * 5 25,000,000
+ * 1000 7,700,000
+ * 10,000,000 3,800,000
+ *
+ * Of course these results should be taken with a pinch of salt; the lookups
+ * were sequential and there were no other applications (other than X + emacs)
+ * running to give any pressure on the level 1 cache.
+ *
+ * Typical LVM users would find they have very few targets for each
+ * LV (probably less than 10).
+ *
+ * (dm-target.c) Target types are not hard coded, instead the
+ * register_mapping_type function should be called. A target type is
+ * specified using three functions (see the header):
+ *
+ * dm_ctr_fn - takes a string and contructs a target specific piece of
+ * context data.
+ * dm_dtr_fn - destroy contexts.
+ * dm_map_fn - function that takes a buffer_head and some previously
+ * constructed context and performs the remapping.
+ *
+ * Currently there are two two trivial mappers, which are
+ * automatically registered: 'linear', and 'io_error'. Linear alone
+ * is enough to implement most LVM features (omitting striped volumes
+ * and snapshots).
+ *
+ * (dm-fs.c) The driver is controlled through a /proc interface:
+ * /proc/device-mapper/control allows you to create and remove devices
+ * by 'cat'ing a line of the following format:
+ *
+ * create <device name> [minor no]
+ * remove <device name>
+ *
+ * /proc/device-mapper/<device name> accepts the mapping table:
+ *
+ * begin
+ * <sector start> <length> <target name> <target args>...
+ * ...
+ * end
+ *
+ * The begin/end lines are nasty, they should be handled by open/close
+ * for the file.
+ *
+ * At the moment the table assumes 32 bit keys (sectors), the move to
+ * 64 bits will involve no interface changes, since the tables will be
+ * read in as ascii data. A different table implementation can
+ * therefor be provided at another time. Either just by changing offset_t
+ * to 64 bits, or maybe implementing a structure which looks up the keys in
+ * stages (ie, 32 bits at a time).
+ *
+ * More interesting targets:
+ *
+ * striped mapping; given a stripe size and a number of device regions
+ * this would stripe data across the regions. Especially useful, since
+ * we could limit each striped region to a 32 bit area and then avoid
+ * nasty 64 bit %'s.
+ *
+ * mirror mapping (reflector ?); would set off a kernel thread slowly
+ * copying data from one region to another, ensuring that any new
+ * writes got copied to both destinations correctly. Great for
+ * implementing pvmove. Not sure how userland would be notified that
+ * the copying process had completed. Possibly by reading a /proc entry
+ * for the LV. Could also use poll() for this kind of thing.
+ */
+
+
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
+#include <linux/version.h>
+#include <linux/major.h>
+#include <linux/iobuf.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/compatmac.h>
+#include <linux/cache.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+
+#define MAX_DEPTH 16
+#define NODE_SIZE L1_CACHE_BYTES
+#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t))
+#define DM_NAME_LEN 64
+
+enum {
+ DM_LOADED = 0,
+ DM_LOADING,
+ DM_ACTIVE,
+};
+
+/* devices that a metadevice should uses and hence open/close */
+struct dev_list {
+ kdev_t dev;
+ struct block_device *bd;
+ struct dev_list *next;
+};
+
+/* io that had to be deferred while we were suspended */
+struct deferred_io {
+ int rw;
+ struct buffer_head *bh;
+ struct deferred_io *next;
+};
+
+/* btree leaf, these do the actual mapping */
+struct target_instance {
+ dm_map_fn map;
+ void *private;
+};
+
+struct mapped_device {
+ kdev_t dev;
+ char name[DM_NAME_LEN];
+
+ int use_count;
+ int state;
+
+ wait_queue_head_t wait;
+ atomic_t pending; /* # of 'in flight' buffers */
+
+ /* a list of io's that arrived while we were suspended */
+ struct deferred_io *deferred;
+
+ /* btree table */
+ int depth;
+ int counts[MAX_DEPTH]; /* in nodes */
+ offset_t *index[MAX_DEPTH];
+
+ int num_targets;
+ int num_allocated;
+ offset_t *highs;
+ struct target_instance *targets;
+
+ /* used by dm-fs.c */
+ devfs_handle_t devfs_entry;
+ struct proc_dir_entry *pde;
+
+ /* a list of devices used by this md */
+ struct dev_list *devices;
+};
+
+/* information about a target type */
+struct target {
+ char *name;
+ dm_ctr_fn ctr;
+ dm_dtr_fn dtr;
+ dm_map_fn map;
+
+ struct target *next;
+};
+
+extern struct block_device_operations dm_blk_dops;
+
+/* dm-target.c */
+struct target *dm_get_target(const char *name);
+int dm_std_targets(void);
+
+/* dm.c */
+struct mapped_device *dm_find_by_name(const char *name);
+struct mapped_device *dm_find_by_minor(int minor);
+
+int dm_create(const char *name, int minor);
+int dm_remove(const char *name);
+
+int dm_activate(struct mapped_device *md);
+void dm_suspend(struct mapped_device *md);
+
+/* dm-table.c */
+int dm_table_start(struct mapped_device *md);
+int dm_table_add_entry(struct mapped_device *md, offset_t high,
+ dm_map_fn target, void *context);
+int dm_table_complete(struct mapped_device *md);
+void dm_free_table(struct mapped_device *md);
+
+
+/* dm-fs.c */
+int dm_init_fs(void);
+void dm_fin_fs(void);
+
+int dm_fs_add(struct mapped_device *md);
+int dm_fs_remove(struct mapped_device *md);
+
+
+#define WARN(f, x...) printk(KERN_WARNING "device-mapper: " f "\n" , ## x)
+
+inline static int is_active(struct mapped_device *md)
+{
+ return test_bit(DM_ACTIVE, &md->state);
+}
+
+inline static const char *eat_space(const char *b, const char *e)
+{
+ while(b != e && isspace((int) *b))
+ b++;
+
+ return b;
+}
+
+inline static int get_number(const char **b, const char *e, unsigned int *n)
+{
+ char *ptr;
+ *b = eat_space(*b, e);
+ if (*b >= e)
+ return -EINVAL;
+
+ *n = simple_strtoul(*b, &ptr, 10);
+ if (ptr == *b)
+ return -EINVAL;
+ *b = ptr;
+
+ return 0;
+}
+
+#endif
diff -ruNX /home/joe/packages/2.4/dontdiff linux/include/linux/device-mapper.h linux-dm/include/linux/device-mapper.h
--- linux/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970
+++ linux-dm/include/linux/device-mapper.h Tue Aug 28 11:35:56 2001
@@ -0,0 +1,61 @@
+/*
+ * device-mapper.h
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Changelog
+ *
+ * 14/08/2001 - First version [Joe Thornber]
+ */
+
+#ifndef DEVICE_MAPPER_H
+#define DEVICE_MAPPER_H
+
+#ifdef __KERNEL__
+
+#include <linux/major.h>
+
+/* FIXME: Use value from local range for now, for co-existence with LVM 1 */
+#define DM_BLK_MAJOR 124
+
+struct mapped_device;
+typedef unsigned int offset_t;
+
+/* constructor, destructor and map fn types */
+typedef int (*dm_ctr_fn)(offset_t b, offset_t e, struct mapped_device *md,
+ const char *cb, const char *ce, void **result);
+typedef void (*dm_dtr_fn)(void *c);
+typedef int (*dm_map_fn)(struct buffer_head *bh, void *context);
+
+int register_map_target(const char *name, dm_ctr_fn ctr, dm_dtr_fn dtr,
+ dm_map_fn map);
+
+/* contructors should call this to make sure any destination devices
+ are handled correctly (ie. opened/closed) */
+int dm_add_device(struct mapped_device *md, kdev_t dev);
+
+#endif
+#endif
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */