mirror of
git://sourceware.org/git/lvm2.git
synced 2024-12-21 13:34:40 +03:00
o added proper suspend/resume support, it now waits for all 'in flight' io's
to complete. moved comment to dm.h
This commit is contained in:
parent
0d3e8e743a
commit
bfba809c79
@ -38,6 +38,7 @@
|
||||
struct mapped_device;
|
||||
typedef unsigned int offset_t;
|
||||
|
||||
/* constructor, destructor and map fn types */
|
||||
typedef int (*dm_ctr_fn)(offset_t b, offset_t e, struct mapped_device *md,
|
||||
const char *cb, const char *ce, void **result);
|
||||
typedef void (*dm_dtr_fn)(void *c);
|
||||
|
@ -66,7 +66,7 @@ struct pf_data {
|
||||
int minor;
|
||||
};
|
||||
|
||||
int dm_init_fs()
|
||||
int dm_init_fs(void)
|
||||
{
|
||||
struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL);
|
||||
|
||||
|
@ -94,7 +94,7 @@ int dm_start_table(struct mapped_device *md)
|
||||
set_bit(DM_LOADING, &md->state);
|
||||
|
||||
dm_free_table(md);
|
||||
if ((r = alloc_targets(md, 2))) /* FIXME: increase once debugged 256 ? */
|
||||
if ((r = alloc_targets(md, 64)))
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
|
@ -25,13 +25,6 @@
|
||||
* 14/08/2001 - First Version [Joe Thornber]
|
||||
*/
|
||||
|
||||
|
||||
/* TODO:
|
||||
*
|
||||
* dm_ctr_fn should provide the sector sizes, and hardsector_sizes set
|
||||
* to the smallest of these.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
|
||||
/* defines for blk.h */
|
||||
@ -43,84 +36,18 @@
|
||||
|
||||
#include <linux/blk.h>
|
||||
|
||||
/*
|
||||
* This driver attempts to provide a generic way of specifying logical
|
||||
* devices which are mapped onto other devices.
|
||||
*
|
||||
* It does this by mapping sections of the logical device onto 'targets'.
|
||||
*
|
||||
* When the logical device is accessed the make_request function looks up
|
||||
* the correct target for the given sector, and then asks this target
|
||||
* to do the remapping.
|
||||
*
|
||||
* A btree like structure is used to hold the sector range -> target
|
||||
* mapping. Because we know all the entries in the btree in advance
|
||||
* we can make a very compact tree, omitting pointers to child nodes,
|
||||
* (child nodes locations can be calculated). Each node of the btree is
|
||||
* 1 level cache line in size, this gives a small performance boost.
|
||||
*
|
||||
* A userland test program for the btree gave the following results on a
|
||||
* 1 Gigahertz Athlon machine:
|
||||
*
|
||||
* entries in btree lookups per second
|
||||
* ---------------- ------------------
|
||||
* 5 25,000,000
|
||||
* 1000 7,700,000
|
||||
* 10,000,000 3,800,000
|
||||
*
|
||||
* Of course these results should be taken with a pinch of salt; the lookups
|
||||
* were sequential and there were no other applications (other than X + emacs)
|
||||
* running to give any pressure on the level 1 cache.
|
||||
*
|
||||
* Typically LVM users would find they have very few targets for each
|
||||
* LV (probably less than 10).
|
||||
*
|
||||
* Target types are not hard coded, instead the
|
||||
* register_mapping_type function should be called. A target type
|
||||
* is specified using three functions (see the header):
|
||||
*
|
||||
* dm_ctr_fn - takes a string and contructs a target specific piece of
|
||||
* context data.
|
||||
* dm_dtr_fn - destroy contexts.
|
||||
* dm_map_fn - function that takes a buffer_head and some previously
|
||||
* constructed context and performs the remapping.
|
||||
*
|
||||
* This file contains two trivial mappers, which are automatically
|
||||
* registered: 'linear', and 'io_error'. Linear alone is enough to
|
||||
* implement most LVM features (omitting striped volumes and
|
||||
* snapshots).
|
||||
*
|
||||
* The driver is controlled through a /proc interface...
|
||||
* FIXME: finish
|
||||
*
|
||||
* At the moment the table assumes 32 bit keys (sectors), the move to
|
||||
* 64 bits will involve no interface changes, since the tables will be
|
||||
* read in as ascii data. A different table implementation can
|
||||
* therefor be provided at another time. Either just by changing offset_t
|
||||
* to 64 bits, or maybe implementing a structure which looks up the keys in
|
||||
* stages (ie, 32 bits at a time).
|
||||
*
|
||||
* More interesting targets:
|
||||
*
|
||||
* striped mapping; given a stripe size and a number of device regions
|
||||
* this would stripe data across the regions. Especially useful, since
|
||||
* we could limit each striped region to a 32 bit area and then avoid
|
||||
* nasy 64 bit %'s.
|
||||
*
|
||||
* mirror mapping (reflector ?); would set off a kernel thread slowly
|
||||
* copying data from one region to another, ensuring that any new
|
||||
* writes got copied to both destinations correctly. Great for
|
||||
* implementing pvmove. Not sure how userland would be notified that
|
||||
* the copying process had completed. Possibly by reading a /proc entry
|
||||
* for the LV. Could also use poll() for this kind of thing.
|
||||
*/
|
||||
|
||||
#define MAX_DEVICES 64
|
||||
#define DEFAULT_READ_AHEAD 64
|
||||
|
||||
const char *_name = "device-mapper";
|
||||
int _version[3] = {0, 1, 0};
|
||||
|
||||
struct io_hook {
|
||||
struct mapped_device *md;
|
||||
void (*end_io)(struct buffer_head *bh, int uptodate);
|
||||
void *context;
|
||||
};
|
||||
|
||||
#define rl down_read(&_dev_lock)
|
||||
#define ru up_read(&_dev_lock)
|
||||
#define wl down_write(&_dev_lock)
|
||||
@ -292,14 +219,41 @@ static int blk_ioctl(struct inode *inode, struct file *file,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* FIXME: should io_hooks come from their own slab ? */
|
||||
inline static struct io_hook *alloc_io_hook(void)
|
||||
{
|
||||
return kmalloc(sizeof(struct io_hook), GFP_NOIO);
|
||||
}
|
||||
|
||||
inline static void free_io_hook(struct io_hook *ih)
|
||||
{
|
||||
kfree(ih);
|
||||
}
|
||||
|
||||
static void dec_pending(struct buffer_head *bh, int uptodate)
|
||||
{
|
||||
struct io_hook *ih = bh->b_private;
|
||||
|
||||
if (atomic_dec_and_test(&ih->md->pending))
|
||||
/* nudge anyone waiting on suspend queue */
|
||||
wake_up_interruptible(&ih->md->wait);
|
||||
|
||||
bh->b_end_io = ih->end_io;
|
||||
bh->b_private = ih->context;
|
||||
free_io_hook(ih);
|
||||
|
||||
bh->b_end_io(bh, uptodate);
|
||||
}
|
||||
|
||||
static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
||||
{
|
||||
struct mapped_device *md;
|
||||
offset_t *node;
|
||||
int i = 0, l, next_node = 0, ret = 0;
|
||||
int i = 0, l, next_node = 0, r = 0;
|
||||
int minor = MINOR(bh->b_rdev);
|
||||
dm_map_fn fn;
|
||||
void *context;
|
||||
struct io_hook *ih = 0;
|
||||
|
||||
if (minor >= MAX_DEVICES)
|
||||
return -ENXIO;
|
||||
@ -307,11 +261,10 @@ static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
||||
rl;
|
||||
md = _devs[minor];
|
||||
|
||||
if (!md) {
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
if (!md)
|
||||
goto bad;
|
||||
|
||||
/* search the btree for the correct target */
|
||||
for (l = 0; l < md->depth; l++) {
|
||||
next_node = ((KEYS_PER_NODE + 1) * next_node) + i;
|
||||
node = md->index[l] + (next_node * KEYS_PER_NODE);
|
||||
@ -325,15 +278,42 @@ static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
||||
fn = md->targets[next_node];
|
||||
context = md->contexts[next_node];
|
||||
|
||||
if (fn) {
|
||||
if ((ret = fn(bh, context)))
|
||||
atomic_inc(&md->pending);
|
||||
} else
|
||||
buffer_IO_error(bh);
|
||||
if (!fn)
|
||||
goto bad;
|
||||
|
||||
ih = alloc_io_hook();
|
||||
|
||||
if (!ih)
|
||||
goto bad;
|
||||
|
||||
ih->md = md;
|
||||
ih->end_io = bh->b_end_io;
|
||||
ih->context = bh->b_private;
|
||||
|
||||
r = fn(bh, context);
|
||||
|
||||
if (r > 0) {
|
||||
/* hook the end io request fn */
|
||||
atomic_inc(&md->pending);
|
||||
bh->b_end_io = dec_pending;
|
||||
bh->b_private = ih;
|
||||
|
||||
} else if (r == 0)
|
||||
/* we don't need to hook */
|
||||
free_io_hook(ih);
|
||||
|
||||
else if (r < 0) {
|
||||
free_io_hook(ih);
|
||||
goto bad;
|
||||
}
|
||||
|
||||
out:
|
||||
ru;
|
||||
return ret;
|
||||
return r;
|
||||
|
||||
bad:
|
||||
ru;
|
||||
buffer_IO_error(bh);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __specific_dev(int minor)
|
||||
@ -379,6 +359,8 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
md->name[0] = '\0';
|
||||
md->state = 0;
|
||||
|
||||
init_waitqueue_head(&md->wait);
|
||||
|
||||
_devs[minor] = md;
|
||||
wu;
|
||||
|
||||
@ -496,6 +478,11 @@ int dm_remove(const char *name)
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (md->in_use) {
|
||||
wu;
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if ((r = dm_fs_remove(md))) {
|
||||
wu;
|
||||
return r;
|
||||
@ -566,7 +553,6 @@ int dm_activate(struct mapped_device *md)
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
|
||||
od = d;
|
||||
for (d = md->devices; d != od; d = d->next)
|
||||
close_dev(d);
|
||||
@ -577,15 +563,33 @@ int dm_activate(struct mapped_device *md)
|
||||
|
||||
void dm_suspend(struct mapped_device *md)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
struct dev_list *d;
|
||||
if (!is_active(md))
|
||||
return;
|
||||
|
||||
/* wait for all the pending io to flush */
|
||||
add_wait_queue(&md->wait, &wait);
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
do {
|
||||
wl;
|
||||
if (!atomic_read(&md->pending))
|
||||
break;
|
||||
|
||||
wu;
|
||||
schedule();
|
||||
|
||||
} while (1);
|
||||
|
||||
current->state = TASK_RUNNING;
|
||||
remove_wait_queue(&md->wait, &wait);
|
||||
|
||||
/* close all the devices */
|
||||
for (d = md->devices; d; d = d->next)
|
||||
close_dev(d);
|
||||
|
||||
clear_bit(DM_ACTIVE, &md->state);
|
||||
wu;
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,6 +27,94 @@
|
||||
* 16/08/2001 - First version [Joe Thornber]
|
||||
*/
|
||||
|
||||
/*
|
||||
* This driver attempts to provide a generic way of specifying logical
|
||||
* devices which are mapped onto other devices.
|
||||
*
|
||||
* It does this by mapping sections of the logical device onto 'targets'.
|
||||
*
|
||||
* When the logical device is accessed the make_request function looks up
|
||||
* the correct target for the given sector, and then asks this target
|
||||
* to do the remapping.
|
||||
*
|
||||
* (dm-table.c) A btree like structure is used to hold the sector
|
||||
* range -> target mapping. Because we know all the entries in the
|
||||
* btree in advance we can make a very compact tree, omitting pointers
|
||||
* to child nodes, (child nodes locations can be calculated). Each
|
||||
* node of the btree is 1 level cache line in size, this gives a small
|
||||
* performance boost.
|
||||
*
|
||||
* A userland test program for the btree gave the following results on a
|
||||
* 1 Gigahertz Athlon machine:
|
||||
*
|
||||
* entries in btree lookups per second
|
||||
* ---------------- ------------------
|
||||
* 5 25,000,000
|
||||
* 1000 7,700,000
|
||||
* 10,000,000 3,800,000
|
||||
*
|
||||
* Of course these results should be taken with a pinch of salt; the lookups
|
||||
* were sequential and there were no other applications (other than X + emacs)
|
||||
* running to give any pressure on the level 1 cache.
|
||||
*
|
||||
* Typical LVM users would find they have very few targets for each
|
||||
* LV (probably less than 10).
|
||||
*
|
||||
* (dm-target.c) Target types are not hard coded, instead the
|
||||
* register_mapping_type function should be called. A target type is
|
||||
* specified using three functions (see the header):
|
||||
*
|
||||
* dm_ctr_fn - takes a string and contructs a target specific piece of
|
||||
* context data.
|
||||
* dm_dtr_fn - destroy contexts.
|
||||
* dm_map_fn - function that takes a buffer_head and some previously
|
||||
* constructed context and performs the remapping.
|
||||
*
|
||||
* Currently there are two two trivial mappers, which are
|
||||
* automatically registered: 'linear', and 'io_error'. Linear alone
|
||||
* is enough to implement most LVM features (omitting striped volumes
|
||||
* and snapshots).
|
||||
*
|
||||
* (dm-fs.c) The driver is controlled through a /proc interface:
|
||||
* /proc/device-mapper/control allows you to create and remove devices
|
||||
* by 'cat'ing a line of the following format:
|
||||
*
|
||||
* create <device name> [minor no]
|
||||
* remove <device name>
|
||||
*
|
||||
* /proc/device-mapper/<device name> accepts the mapping table:
|
||||
*
|
||||
* begin
|
||||
* <sector start> <length> <target name> <target args>...
|
||||
* ...
|
||||
* end
|
||||
*
|
||||
* The begin/end lines are nasty, they should be handled by open/close
|
||||
* for the file.
|
||||
*
|
||||
* At the moment the table assumes 32 bit keys (sectors), the move to
|
||||
* 64 bits will involve no interface changes, since the tables will be
|
||||
* read in as ascii data. A different table implementation can
|
||||
* therefor be provided at another time. Either just by changing offset_t
|
||||
* to 64 bits, or maybe implementing a structure which looks up the keys in
|
||||
* stages (ie, 32 bits at a time).
|
||||
*
|
||||
* More interesting targets:
|
||||
*
|
||||
* striped mapping; given a stripe size and a number of device regions
|
||||
* this would stripe data across the regions. Especially useful, since
|
||||
* we could limit each striped region to a 32 bit area and then avoid
|
||||
* nasty 64 bit %'s.
|
||||
*
|
||||
* mirror mapping (reflector ?); would set off a kernel thread slowly
|
||||
* copying data from one region to another, ensuring that any new
|
||||
* writes got copied to both destinations correctly. Great for
|
||||
* implementing pvmove. Not sure how userland would be notified that
|
||||
* the copying process had completed. Possibly by reading a /proc entry
|
||||
* for the LV. Could also use poll() for this kind of thing.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DM_INTERNAL_H
|
||||
#define DM_INTERNAL_H
|
||||
|
||||
@ -66,7 +154,9 @@ struct mapped_device {
|
||||
|
||||
int use_count;
|
||||
int state;
|
||||
atomic_t pending;
|
||||
|
||||
wait_queue_head_t wait;
|
||||
atomic_t pending; /* # of 'in flight' buffers */
|
||||
|
||||
/* btree table */
|
||||
int depth;
|
||||
|
Loading…
Reference in New Issue
Block a user