mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-03 05:18:29 +03:00
o added proper suspend/resume support, it now waits for all 'in flight' io's
to complete. moved comment to dm.h
This commit is contained in:
parent
0d3e8e743a
commit
bfba809c79
@ -38,6 +38,7 @@
|
|||||||
struct mapped_device;
|
struct mapped_device;
|
||||||
typedef unsigned int offset_t;
|
typedef unsigned int offset_t;
|
||||||
|
|
||||||
|
/* constructor, destructor and map fn types */
|
||||||
typedef int (*dm_ctr_fn)(offset_t b, offset_t e, struct mapped_device *md,
|
typedef int (*dm_ctr_fn)(offset_t b, offset_t e, struct mapped_device *md,
|
||||||
const char *cb, const char *ce, void **result);
|
const char *cb, const char *ce, void **result);
|
||||||
typedef void (*dm_dtr_fn)(void *c);
|
typedef void (*dm_dtr_fn)(void *c);
|
||||||
|
@ -66,7 +66,7 @@ struct pf_data {
|
|||||||
int minor;
|
int minor;
|
||||||
};
|
};
|
||||||
|
|
||||||
int dm_init_fs()
|
int dm_init_fs(void)
|
||||||
{
|
{
|
||||||
struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL);
|
struct pf_data *pfd = kmalloc(sizeof(*pfd), GFP_KERNEL);
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ int dm_start_table(struct mapped_device *md)
|
|||||||
set_bit(DM_LOADING, &md->state);
|
set_bit(DM_LOADING, &md->state);
|
||||||
|
|
||||||
dm_free_table(md);
|
dm_free_table(md);
|
||||||
if ((r = alloc_targets(md, 2))) /* FIXME: increase once debugged 256 ? */
|
if ((r = alloc_targets(md, 64)))
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -25,13 +25,6 @@
|
|||||||
* 14/08/2001 - First Version [Joe Thornber]
|
* 14/08/2001 - First Version [Joe Thornber]
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* TODO:
|
|
||||||
*
|
|
||||||
* dm_ctr_fn should provide the sector sizes, and hardsector_sizes set
|
|
||||||
* to the smallest of these.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "dm.h"
|
#include "dm.h"
|
||||||
|
|
||||||
/* defines for blk.h */
|
/* defines for blk.h */
|
||||||
@ -43,84 +36,18 @@
|
|||||||
|
|
||||||
#include <linux/blk.h>
|
#include <linux/blk.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* This driver attempts to provide a generic way of specifying logical
|
|
||||||
* devices which are mapped onto other devices.
|
|
||||||
*
|
|
||||||
* It does this by mapping sections of the logical device onto 'targets'.
|
|
||||||
*
|
|
||||||
* When the logical device is accessed the make_request function looks up
|
|
||||||
* the correct target for the given sector, and then asks this target
|
|
||||||
* to do the remapping.
|
|
||||||
*
|
|
||||||
* A btree like structure is used to hold the sector range -> target
|
|
||||||
* mapping. Because we know all the entries in the btree in advance
|
|
||||||
* we can make a very compact tree, omitting pointers to child nodes,
|
|
||||||
* (child nodes locations can be calculated). Each node of the btree is
|
|
||||||
* 1 level cache line in size, this gives a small performance boost.
|
|
||||||
*
|
|
||||||
* A userland test program for the btree gave the following results on a
|
|
||||||
* 1 Gigahertz Athlon machine:
|
|
||||||
*
|
|
||||||
* entries in btree lookups per second
|
|
||||||
* ---------------- ------------------
|
|
||||||
* 5 25,000,000
|
|
||||||
* 1000 7,700,000
|
|
||||||
* 10,000,000 3,800,000
|
|
||||||
*
|
|
||||||
* Of course these results should be taken with a pinch of salt; the lookups
|
|
||||||
* were sequential and there were no other applications (other than X + emacs)
|
|
||||||
* running to give any pressure on the level 1 cache.
|
|
||||||
*
|
|
||||||
* Typically LVM users would find they have very few targets for each
|
|
||||||
* LV (probably less than 10).
|
|
||||||
*
|
|
||||||
* Target types are not hard coded, instead the
|
|
||||||
* register_mapping_type function should be called. A target type
|
|
||||||
* is specified using three functions (see the header):
|
|
||||||
*
|
|
||||||
* dm_ctr_fn - takes a string and contructs a target specific piece of
|
|
||||||
* context data.
|
|
||||||
* dm_dtr_fn - destroy contexts.
|
|
||||||
* dm_map_fn - function that takes a buffer_head and some previously
|
|
||||||
* constructed context and performs the remapping.
|
|
||||||
*
|
|
||||||
* This file contains two trivial mappers, which are automatically
|
|
||||||
* registered: 'linear', and 'io_error'. Linear alone is enough to
|
|
||||||
* implement most LVM features (omitting striped volumes and
|
|
||||||
* snapshots).
|
|
||||||
*
|
|
||||||
* The driver is controlled through a /proc interface...
|
|
||||||
* FIXME: finish
|
|
||||||
*
|
|
||||||
* At the moment the table assumes 32 bit keys (sectors), the move to
|
|
||||||
* 64 bits will involve no interface changes, since the tables will be
|
|
||||||
* read in as ascii data. A different table implementation can
|
|
||||||
* therefor be provided at another time. Either just by changing offset_t
|
|
||||||
* to 64 bits, or maybe implementing a structure which looks up the keys in
|
|
||||||
* stages (ie, 32 bits at a time).
|
|
||||||
*
|
|
||||||
* More interesting targets:
|
|
||||||
*
|
|
||||||
* striped mapping; given a stripe size and a number of device regions
|
|
||||||
* this would stripe data across the regions. Especially useful, since
|
|
||||||
* we could limit each striped region to a 32 bit area and then avoid
|
|
||||||
* nasy 64 bit %'s.
|
|
||||||
*
|
|
||||||
* mirror mapping (reflector ?); would set off a kernel thread slowly
|
|
||||||
* copying data from one region to another, ensuring that any new
|
|
||||||
* writes got copied to both destinations correctly. Great for
|
|
||||||
* implementing pvmove. Not sure how userland would be notified that
|
|
||||||
* the copying process had completed. Possibly by reading a /proc entry
|
|
||||||
* for the LV. Could also use poll() for this kind of thing.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define MAX_DEVICES 64
|
#define MAX_DEVICES 64
|
||||||
#define DEFAULT_READ_AHEAD 64
|
#define DEFAULT_READ_AHEAD 64
|
||||||
|
|
||||||
const char *_name = "device-mapper";
|
const char *_name = "device-mapper";
|
||||||
int _version[3] = {0, 1, 0};
|
int _version[3] = {0, 1, 0};
|
||||||
|
|
||||||
|
struct io_hook {
|
||||||
|
struct mapped_device *md;
|
||||||
|
void (*end_io)(struct buffer_head *bh, int uptodate);
|
||||||
|
void *context;
|
||||||
|
};
|
||||||
|
|
||||||
#define rl down_read(&_dev_lock)
|
#define rl down_read(&_dev_lock)
|
||||||
#define ru up_read(&_dev_lock)
|
#define ru up_read(&_dev_lock)
|
||||||
#define wl down_write(&_dev_lock)
|
#define wl down_write(&_dev_lock)
|
||||||
@ -292,14 +219,41 @@ static int blk_ioctl(struct inode *inode, struct file *file,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* FIXME: should io_hooks come from their own slab ? */
|
||||||
|
inline static struct io_hook *alloc_io_hook(void)
|
||||||
|
{
|
||||||
|
return kmalloc(sizeof(struct io_hook), GFP_NOIO);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void free_io_hook(struct io_hook *ih)
|
||||||
|
{
|
||||||
|
kfree(ih);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dec_pending(struct buffer_head *bh, int uptodate)
|
||||||
|
{
|
||||||
|
struct io_hook *ih = bh->b_private;
|
||||||
|
|
||||||
|
if (atomic_dec_and_test(&ih->md->pending))
|
||||||
|
/* nudge anyone waiting on suspend queue */
|
||||||
|
wake_up_interruptible(&ih->md->wait);
|
||||||
|
|
||||||
|
bh->b_end_io = ih->end_io;
|
||||||
|
bh->b_private = ih->context;
|
||||||
|
free_io_hook(ih);
|
||||||
|
|
||||||
|
bh->b_end_io(bh, uptodate);
|
||||||
|
}
|
||||||
|
|
||||||
static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
struct mapped_device *md;
|
struct mapped_device *md;
|
||||||
offset_t *node;
|
offset_t *node;
|
||||||
int i = 0, l, next_node = 0, ret = 0;
|
int i = 0, l, next_node = 0, r = 0;
|
||||||
int minor = MINOR(bh->b_rdev);
|
int minor = MINOR(bh->b_rdev);
|
||||||
dm_map_fn fn;
|
dm_map_fn fn;
|
||||||
void *context;
|
void *context;
|
||||||
|
struct io_hook *ih = 0;
|
||||||
|
|
||||||
if (minor >= MAX_DEVICES)
|
if (minor >= MAX_DEVICES)
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
@ -307,11 +261,10 @@ static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
|||||||
rl;
|
rl;
|
||||||
md = _devs[minor];
|
md = _devs[minor];
|
||||||
|
|
||||||
if (!md) {
|
if (!md)
|
||||||
ret = -ENXIO;
|
goto bad;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* search the btree for the correct target */
|
||||||
for (l = 0; l < md->depth; l++) {
|
for (l = 0; l < md->depth; l++) {
|
||||||
next_node = ((KEYS_PER_NODE + 1) * next_node) + i;
|
next_node = ((KEYS_PER_NODE + 1) * next_node) + i;
|
||||||
node = md->index[l] + (next_node * KEYS_PER_NODE);
|
node = md->index[l] + (next_node * KEYS_PER_NODE);
|
||||||
@ -325,15 +278,42 @@ static int request(request_queue_t *q, int rw, struct buffer_head *bh)
|
|||||||
fn = md->targets[next_node];
|
fn = md->targets[next_node];
|
||||||
context = md->contexts[next_node];
|
context = md->contexts[next_node];
|
||||||
|
|
||||||
if (fn) {
|
if (!fn)
|
||||||
if ((ret = fn(bh, context)))
|
goto bad;
|
||||||
atomic_inc(&md->pending);
|
|
||||||
} else
|
ih = alloc_io_hook();
|
||||||
buffer_IO_error(bh);
|
|
||||||
|
if (!ih)
|
||||||
|
goto bad;
|
||||||
|
|
||||||
|
ih->md = md;
|
||||||
|
ih->end_io = bh->b_end_io;
|
||||||
|
ih->context = bh->b_private;
|
||||||
|
|
||||||
|
r = fn(bh, context);
|
||||||
|
|
||||||
|
if (r > 0) {
|
||||||
|
/* hook the end io request fn */
|
||||||
|
atomic_inc(&md->pending);
|
||||||
|
bh->b_end_io = dec_pending;
|
||||||
|
bh->b_private = ih;
|
||||||
|
|
||||||
|
} else if (r == 0)
|
||||||
|
/* we don't need to hook */
|
||||||
|
free_io_hook(ih);
|
||||||
|
|
||||||
|
else if (r < 0) {
|
||||||
|
free_io_hook(ih);
|
||||||
|
goto bad;
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
|
||||||
ru;
|
ru;
|
||||||
return ret;
|
return r;
|
||||||
|
|
||||||
|
bad:
|
||||||
|
ru;
|
||||||
|
buffer_IO_error(bh);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __specific_dev(int minor)
|
static inline int __specific_dev(int minor)
|
||||||
@ -379,6 +359,8 @@ static struct mapped_device *alloc_dev(int minor)
|
|||||||
md->name[0] = '\0';
|
md->name[0] = '\0';
|
||||||
md->state = 0;
|
md->state = 0;
|
||||||
|
|
||||||
|
init_waitqueue_head(&md->wait);
|
||||||
|
|
||||||
_devs[minor] = md;
|
_devs[minor] = md;
|
||||||
wu;
|
wu;
|
||||||
|
|
||||||
@ -496,6 +478,11 @@ int dm_remove(const char *name)
|
|||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (md->in_use) {
|
||||||
|
wu;
|
||||||
|
return -EPERM;
|
||||||
|
}
|
||||||
|
|
||||||
if ((r = dm_fs_remove(md))) {
|
if ((r = dm_fs_remove(md))) {
|
||||||
wu;
|
wu;
|
||||||
return r;
|
return r;
|
||||||
@ -566,7 +553,6 @@ int dm_activate(struct mapped_device *md)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bad:
|
bad:
|
||||||
|
|
||||||
od = d;
|
od = d;
|
||||||
for (d = md->devices; d != od; d = d->next)
|
for (d = md->devices; d != od; d = d->next)
|
||||||
close_dev(d);
|
close_dev(d);
|
||||||
@ -577,15 +563,33 @@ int dm_activate(struct mapped_device *md)
|
|||||||
|
|
||||||
void dm_suspend(struct mapped_device *md)
|
void dm_suspend(struct mapped_device *md)
|
||||||
{
|
{
|
||||||
|
DECLARE_WAITQUEUE(wait, current);
|
||||||
struct dev_list *d;
|
struct dev_list *d;
|
||||||
if (!is_active(md))
|
if (!is_active(md))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* wait for all the pending io to flush */
|
||||||
|
add_wait_queue(&md->wait, &wait);
|
||||||
|
current->state = TASK_INTERRUPTIBLE;
|
||||||
|
do {
|
||||||
|
wl;
|
||||||
|
if (!atomic_read(&md->pending))
|
||||||
|
break;
|
||||||
|
|
||||||
|
wu;
|
||||||
|
schedule();
|
||||||
|
|
||||||
|
} while (1);
|
||||||
|
|
||||||
|
current->state = TASK_RUNNING;
|
||||||
|
remove_wait_queue(&md->wait, &wait);
|
||||||
|
|
||||||
/* close all the devices */
|
/* close all the devices */
|
||||||
for (d = md->devices; d; d = d->next)
|
for (d = md->devices; d; d = d->next)
|
||||||
close_dev(d);
|
close_dev(d);
|
||||||
|
|
||||||
clear_bit(DM_ACTIVE, &md->state);
|
clear_bit(DM_ACTIVE, &md->state);
|
||||||
|
wu;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,6 +27,94 @@
|
|||||||
* 16/08/2001 - First version [Joe Thornber]
|
* 16/08/2001 - First version [Joe Thornber]
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This driver attempts to provide a generic way of specifying logical
|
||||||
|
* devices which are mapped onto other devices.
|
||||||
|
*
|
||||||
|
* It does this by mapping sections of the logical device onto 'targets'.
|
||||||
|
*
|
||||||
|
* When the logical device is accessed the make_request function looks up
|
||||||
|
* the correct target for the given sector, and then asks this target
|
||||||
|
* to do the remapping.
|
||||||
|
*
|
||||||
|
* (dm-table.c) A btree like structure is used to hold the sector
|
||||||
|
* range -> target mapping. Because we know all the entries in the
|
||||||
|
* btree in advance we can make a very compact tree, omitting pointers
|
||||||
|
* to child nodes, (child nodes locations can be calculated). Each
|
||||||
|
* node of the btree is 1 level cache line in size, this gives a small
|
||||||
|
* performance boost.
|
||||||
|
*
|
||||||
|
* A userland test program for the btree gave the following results on a
|
||||||
|
* 1 Gigahertz Athlon machine:
|
||||||
|
*
|
||||||
|
* entries in btree lookups per second
|
||||||
|
* ---------------- ------------------
|
||||||
|
* 5 25,000,000
|
||||||
|
* 1000 7,700,000
|
||||||
|
* 10,000,000 3,800,000
|
||||||
|
*
|
||||||
|
* Of course these results should be taken with a pinch of salt; the lookups
|
||||||
|
* were sequential and there were no other applications (other than X + emacs)
|
||||||
|
* running to give any pressure on the level 1 cache.
|
||||||
|
*
|
||||||
|
* Typical LVM users would find they have very few targets for each
|
||||||
|
* LV (probably less than 10).
|
||||||
|
*
|
||||||
|
* (dm-target.c) Target types are not hard coded, instead the
|
||||||
|
* register_mapping_type function should be called. A target type is
|
||||||
|
* specified using three functions (see the header):
|
||||||
|
*
|
||||||
|
* dm_ctr_fn - takes a string and contructs a target specific piece of
|
||||||
|
* context data.
|
||||||
|
* dm_dtr_fn - destroy contexts.
|
||||||
|
* dm_map_fn - function that takes a buffer_head and some previously
|
||||||
|
* constructed context and performs the remapping.
|
||||||
|
*
|
||||||
|
* Currently there are two two trivial mappers, which are
|
||||||
|
* automatically registered: 'linear', and 'io_error'. Linear alone
|
||||||
|
* is enough to implement most LVM features (omitting striped volumes
|
||||||
|
* and snapshots).
|
||||||
|
*
|
||||||
|
* (dm-fs.c) The driver is controlled through a /proc interface:
|
||||||
|
* /proc/device-mapper/control allows you to create and remove devices
|
||||||
|
* by 'cat'ing a line of the following format:
|
||||||
|
*
|
||||||
|
* create <device name> [minor no]
|
||||||
|
* remove <device name>
|
||||||
|
*
|
||||||
|
* /proc/device-mapper/<device name> accepts the mapping table:
|
||||||
|
*
|
||||||
|
* begin
|
||||||
|
* <sector start> <length> <target name> <target args>...
|
||||||
|
* ...
|
||||||
|
* end
|
||||||
|
*
|
||||||
|
* The begin/end lines are nasty, they should be handled by open/close
|
||||||
|
* for the file.
|
||||||
|
*
|
||||||
|
* At the moment the table assumes 32 bit keys (sectors), the move to
|
||||||
|
* 64 bits will involve no interface changes, since the tables will be
|
||||||
|
* read in as ascii data. A different table implementation can
|
||||||
|
* therefor be provided at another time. Either just by changing offset_t
|
||||||
|
* to 64 bits, or maybe implementing a structure which looks up the keys in
|
||||||
|
* stages (ie, 32 bits at a time).
|
||||||
|
*
|
||||||
|
* More interesting targets:
|
||||||
|
*
|
||||||
|
* striped mapping; given a stripe size and a number of device regions
|
||||||
|
* this would stripe data across the regions. Especially useful, since
|
||||||
|
* we could limit each striped region to a 32 bit area and then avoid
|
||||||
|
* nasty 64 bit %'s.
|
||||||
|
*
|
||||||
|
* mirror mapping (reflector ?); would set off a kernel thread slowly
|
||||||
|
* copying data from one region to another, ensuring that any new
|
||||||
|
* writes got copied to both destinations correctly. Great for
|
||||||
|
* implementing pvmove. Not sure how userland would be notified that
|
||||||
|
* the copying process had completed. Possibly by reading a /proc entry
|
||||||
|
* for the LV. Could also use poll() for this kind of thing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef DM_INTERNAL_H
|
#ifndef DM_INTERNAL_H
|
||||||
#define DM_INTERNAL_H
|
#define DM_INTERNAL_H
|
||||||
|
|
||||||
@ -66,7 +154,9 @@ struct mapped_device {
|
|||||||
|
|
||||||
int use_count;
|
int use_count;
|
||||||
int state;
|
int state;
|
||||||
atomic_t pending;
|
|
||||||
|
wait_queue_head_t wait;
|
||||||
|
atomic_t pending; /* # of 'in flight' buffers */
|
||||||
|
|
||||||
/* btree table */
|
/* btree table */
|
||||||
int depth;
|
int depth;
|
||||||
|
Loading…
Reference in New Issue
Block a user