1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00

o added proper suspend/resume support, it now waits for all 'in flight' io's

to complete.

  
  moved comment to dm.h
This commit is contained in:
Joe Thornber 2001-08-28 13:04:44 +00:00
parent 0d3e8e743a
commit bfba809c79
5 changed files with 190 additions and 95 deletions

View File

@ -38,6 +38,7 @@
struct mapped_device;
typedef unsigned int offset_t;
/* constructor, destructor and map fn types */
typedef int (*dm_ctr_fn)(offset_t b, offset_t e, struct mapped_device *md,
const char *cb, const char *ce, void **result);
typedef void (*dm_dtr_fn)(void *c);

View File

@ -66,7 +66,7 @@ struct pf_data {
int minor;
};
int dm_init_fs()
int dm_init_fs(void)
{
struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL);

View File

@ -94,7 +94,7 @@ int dm_start_table(struct mapped_device *md)
set_bit(DM_LOADING, &md->state);
dm_free_table(md);
if ((r = alloc_targets(md, 2))) /* FIXME: increase once debugged 256 ? */
if ((r = alloc_targets(md, 64)))
return r;
return 0;

View File

@ -25,13 +25,6 @@
* 14/08/2001 - First Version [Joe Thornber]
*/
/* TODO:
*
* dm_ctr_fn should provide the sector sizes, and hardsector_sizes set
* to the smallest of these.
*/
#include "dm.h"
/* defines for blk.h */
@ -43,84 +36,18 @@
#include <linux/blk.h>
/*
* This driver attempts to provide a generic way of specifying logical
* devices which are mapped onto other devices.
*
* It does this by mapping sections of the logical device onto 'targets'.
*
* When the logical device is accessed the make_request function looks up
* the correct target for the given sector, and then asks this target
* to do the remapping.
*
* A btree like structure is used to hold the sector range -> target
* mapping. Because we know all the entries in the btree in advance
* we can make a very compact tree, omitting pointers to child nodes,
* (child nodes locations can be calculated). Each node of the btree is
* 1 level cache line in size, this gives a small performance boost.
*
* A userland test program for the btree gave the following results on a
* 1 Gigahertz Athlon machine:
*
* entries in btree lookups per second
* ---------------- ------------------
* 5 25,000,000
* 1000 7,700,000
* 10,000,000 3,800,000
*
* Of course these results should be taken with a pinch of salt; the lookups
* were sequential and there were no other applications (other than X + emacs)
* running to give any pressure on the level 1 cache.
*
* Typically LVM users would find they have very few targets for each
* LV (probably less than 10).
*
* Target types are not hard coded, instead the
* register_mapping_type function should be called. A target type
* is specified using three functions (see the header):
*
* dm_ctr_fn - takes a string and contructs a target specific piece of
* context data.
* dm_dtr_fn - destroy contexts.
* dm_map_fn - function that takes a buffer_head and some previously
* constructed context and performs the remapping.
*
* This file contains two trivial mappers, which are automatically
* registered: 'linear', and 'io_error'. Linear alone is enough to
* implement most LVM features (omitting striped volumes and
* snapshots).
*
* The driver is controlled through a /proc interface...
* FIXME: finish
*
* At the moment the table assumes 32 bit keys (sectors), the move to
* 64 bits will involve no interface changes, since the tables will be
* read in as ascii data. A different table implementation can
* therefor be provided at another time. Either just by changing offset_t
* to 64 bits, or maybe implementing a structure which looks up the keys in
* stages (ie, 32 bits at a time).
*
* More interesting targets:
*
* striped mapping; given a stripe size and a number of device regions
* this would stripe data across the regions. Especially useful, since
* we could limit each striped region to a 32 bit area and then avoid
* nasy 64 bit %'s.
*
* mirror mapping (reflector ?); would set off a kernel thread slowly
* copying data from one region to another, ensuring that any new
* writes got copied to both destinations correctly. Great for
* implementing pvmove. Not sure how userland would be notified that
* the copying process had completed. Possibly by reading a /proc entry
* for the LV. Could also use poll() for this kind of thing.
*/
#define MAX_DEVICES 64
#define DEFAULT_READ_AHEAD 64
const char *_name = "device-mapper";
int _version[3] = {0, 1, 0};
struct io_hook {
struct mapped_device *md;
void (*end_io)(struct buffer_head *bh, int uptodate);
void *context;
};
#define rl down_read(&_dev_lock)
#define ru up_read(&_dev_lock)
#define wl down_write(&_dev_lock)
@ -292,14 +219,41 @@ static int blk_ioctl(struct inode *inode, struct file *file,
return 0;
}
/* FIXME: should io_hooks come from their own slab ? */
inline static struct io_hook *alloc_io_hook(void)
{
return kmalloc(sizeof(struct io_hook), GFP_NOIO);
}
inline static void free_io_hook(struct io_hook *ih)
{
kfree(ih);
}
static void dec_pending(struct buffer_head *bh, int uptodate)
{
struct io_hook *ih = bh->b_private;
if (atomic_dec_and_test(&ih->md->pending))
/* nudge anyone waiting on suspend queue */
wake_up_interruptible(&ih->md->wait);
bh->b_end_io = ih->end_io;
bh->b_private = ih->context;
free_io_hook(ih);
bh->b_end_io(bh, uptodate);
}
static int request(request_queue_t *q, int rw, struct buffer_head *bh)
{
struct mapped_device *md;
offset_t *node;
int i = 0, l, next_node = 0, ret = 0;
int i = 0, l, next_node = 0, r = 0;
int minor = MINOR(bh->b_rdev);
dm_map_fn fn;
void *context;
struct io_hook *ih = 0;
if (minor >= MAX_DEVICES)
return -ENXIO;
@ -307,11 +261,10 @@ static int request(request_queue_t *q, int rw, struct buffer_head *bh)
rl;
md = _devs[minor];
if (!md) {
ret = -ENXIO;
goto out;
}
if (!md)
goto bad;
/* search the btree for the correct target */
for (l = 0; l < md->depth; l++) {
next_node = ((KEYS_PER_NODE + 1) * next_node) + i;
node = md->index[l] + (next_node * KEYS_PER_NODE);
@ -325,15 +278,42 @@ static int request(request_queue_t *q, int rw, struct buffer_head *bh)
fn = md->targets[next_node];
context = md->contexts[next_node];
if (fn) {
if ((ret = fn(bh, context)))
atomic_inc(&md->pending);
} else
buffer_IO_error(bh);
if (!fn)
goto bad;
ih = alloc_io_hook();
if (!ih)
goto bad;
ih->md = md;
ih->end_io = bh->b_end_io;
ih->context = bh->b_private;
r = fn(bh, context);
if (r > 0) {
/* hook the end io request fn */
atomic_inc(&md->pending);
bh->b_end_io = dec_pending;
bh->b_private = ih;
} else if (r == 0)
/* we don't need to hook */
free_io_hook(ih);
else if (r < 0) {
free_io_hook(ih);
goto bad;
}
out:
ru;
return ret;
return r;
bad:
ru;
buffer_IO_error(bh);
return 0;
}
static inline int __specific_dev(int minor)
@ -379,6 +359,8 @@ static struct mapped_device *alloc_dev(int minor)
md->name[0] = '\0';
md->state = 0;
init_waitqueue_head(&md->wait);
_devs[minor] = md;
wu;
@ -496,6 +478,11 @@ int dm_remove(const char *name)
return -ENXIO;
}
if (md->in_use) {
wu;
return -EPERM;
}
if ((r = dm_fs_remove(md))) {
wu;
return r;
@ -566,7 +553,6 @@ int dm_activate(struct mapped_device *md)
return 0;
bad:
od = d;
for (d = md->devices; d != od; d = d->next)
close_dev(d);
@ -577,15 +563,33 @@ int dm_activate(struct mapped_device *md)
void dm_suspend(struct mapped_device *md)
{
DECLARE_WAITQUEUE(wait, current);
struct dev_list *d;
if (!is_active(md))
return;
/* wait for all the pending io to flush */
add_wait_queue(&md->wait, &wait);
current->state = TASK_INTERRUPTIBLE;
do {
wl;
if (!atomic_read(&md->pending))
break;
wu;
schedule();
} while (1);
current->state = TASK_RUNNING;
remove_wait_queue(&md->wait, &wait);
/* close all the devices */
for (d = md->devices; d; d = d->next)
close_dev(d);
clear_bit(DM_ACTIVE, &md->state);
wu;
}

View File

@ -27,6 +27,94 @@
* 16/08/2001 - First version [Joe Thornber]
*/
/*
* This driver attempts to provide a generic way of specifying logical
* devices which are mapped onto other devices.
*
* It does this by mapping sections of the logical device onto 'targets'.
*
* When the logical device is accessed the make_request function looks up
* the correct target for the given sector, and then asks this target
* to do the remapping.
*
* (dm-table.c) A btree like structure is used to hold the sector
* range -> target mapping. Because we know all the entries in the
* btree in advance we can make a very compact tree, omitting pointers
* to child nodes, (child nodes locations can be calculated). Each
* node of the btree is 1 level cache line in size, this gives a small
* performance boost.
*
* A userland test program for the btree gave the following results on a
* 1 Gigahertz Athlon machine:
*
* entries in btree lookups per second
* ---------------- ------------------
* 5 25,000,000
* 1000 7,700,000
* 10,000,000 3,800,000
*
* Of course these results should be taken with a pinch of salt; the lookups
* were sequential and there were no other applications (other than X + emacs)
* running to give any pressure on the level 1 cache.
*
* Typical LVM users would find they have very few targets for each
* LV (probably less than 10).
*
* (dm-target.c) Target types are not hard coded, instead the
* register_mapping_type function should be called. A target type is
* specified using three functions (see the header):
*
* dm_ctr_fn - takes a string and contructs a target specific piece of
* context data.
* dm_dtr_fn - destroy contexts.
* dm_map_fn - function that takes a buffer_head and some previously
* constructed context and performs the remapping.
*
* Currently there are two two trivial mappers, which are
* automatically registered: 'linear', and 'io_error'. Linear alone
* is enough to implement most LVM features (omitting striped volumes
* and snapshots).
*
* (dm-fs.c) The driver is controlled through a /proc interface:
* /proc/device-mapper/control allows you to create and remove devices
* by 'cat'ing a line of the following format:
*
* create <device name> [minor no]
* remove <device name>
*
* /proc/device-mapper/<device name> accepts the mapping table:
*
* begin
* <sector start> <length> <target name> <target args>...
* ...
* end
*
* The begin/end lines are nasty, they should be handled by open/close
* for the file.
*
* At the moment the table assumes 32 bit keys (sectors), the move to
* 64 bits will involve no interface changes, since the tables will be
* read in as ascii data. A different table implementation can
* therefor be provided at another time. Either just by changing offset_t
* to 64 bits, or maybe implementing a structure which looks up the keys in
* stages (ie, 32 bits at a time).
*
* More interesting targets:
*
* striped mapping; given a stripe size and a number of device regions
* this would stripe data across the regions. Especially useful, since
* we could limit each striped region to a 32 bit area and then avoid
* nasty 64 bit %'s.
*
* mirror mapping (reflector ?); would set off a kernel thread slowly
* copying data from one region to another, ensuring that any new
* writes got copied to both destinations correctly. Great for
* implementing pvmove. Not sure how userland would be notified that
* the copying process had completed. Possibly by reading a /proc entry
* for the LV. Could also use poll() for this kind of thing.
*/
#ifndef DM_INTERNAL_H
#define DM_INTERNAL_H
@ -66,7 +154,9 @@ struct mapped_device {
int use_count;
int state;
atomic_t pending;
wait_queue_head_t wait;
atomic_t pending; /* # of 'in flight' buffers */
/* btree table */
int depth;