Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  cfq-iosched: remove redundant check for NULL cfqq in cfq_set_request()
  blocK: Restore barrier support for md and probably other virtual devices.
  block: get rid of queue-private command filter
  block: Create bip slabs with embedded integrity vectors
  cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue()
  cfq-iosched: move cfqq initialization out of cfq_find_alloc_queue()
  Trivial typo fixes in Documentation/block/data-integrity.txt.
This commit is contained in:
Linus Torvalds 2009-07-01 10:41:09 -07:00
commit 2027bd9f92
13 changed files with 304 additions and 416 deletions

View File

@ -50,7 +50,7 @@ encouraged them to allow separation of the data and integrity metadata
scatter-gather lists. scatter-gather lists.
The controller will interleave the buffers on write and split them on The controller will interleave the buffers on write and split them on
read. This means that the Linux can DMA the data buffers to and from read. This means that Linux can DMA the data buffers to and from
host memory without changes to the page cache. host memory without changes to the page cache.
Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
@ -66,7 +66,7 @@ software RAID5).
The IP checksum is weaker than the CRC in terms of detecting bit The IP checksum is weaker than the CRC in terms of detecting bit
errors. However, the strength is really in the separation of the data errors. However, the strength is really in the separation of the data
buffers and the integrity metadata. These two distinct buffers much buffers and the integrity metadata. These two distinct buffers must
match up for an I/O to complete. match up for an I/O to complete.
The separation of the data and integrity metadata buffers as well as The separation of the data and integrity metadata buffers as well as

View File

@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
ioctl.o genhd.o scsi_ioctl.o cmd-filter.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o

View File

@ -595,8 +595,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
q->sg_reserved_size = INT_MAX; q->sg_reserved_size = INT_MAX;
blk_set_cmd_filter_defaults(&q->cmd_filter);
/* /*
* all done * all done
*/ */
@ -1172,6 +1170,11 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const int unplug = bio_unplug(bio); const int unplug = bio_unplug(bio);
int rw_flags; int rw_flags;
if (bio_barrier(bio) && bio_has_data(bio) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
/* /*
* low level driver can indicate that it wants pages above a * low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even * certain limit bounced to low memory (ie for highmem, or even
@ -1472,11 +1475,6 @@ static inline void __generic_make_request(struct bio *bio)
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
goto end_io; goto end_io;
} }
if (bio_barrier(bio) && bio_has_data(bio) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
err = -EOPNOTSUPP;
goto end_io;
}
ret = q->make_request_fn(q, bio); ret = q->make_request_fn(q, bio);
} while (ret); } while (ret);
@ -2365,7 +2363,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
__bio_clone(bio, bio_src); __bio_clone(bio, bio_src);
if (bio_integrity(bio_src) && if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp_mask)) bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out; goto free_and_out;
if (bio_ctr && bio_ctr(bio, bio_src, data)) if (bio_ctr && bio_ctr(bio, bio_src, data))

View File

@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT; return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm)) if (blk_verify_command(rq->cmd, has_write_perm))
return -EPERM; return -EPERM;
} else if (!capable(CAP_SYS_RAWIO)) } else if (!capable(CAP_SYS_RAWIO))
return -EPERM; return -EPERM;

View File

@ -70,6 +70,51 @@ struct cfq_rb_root {
}; };
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
/*
* Per process-grouping structure
*/
struct cfq_queue {
/* reference count */
atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
struct list_head fifo;
unsigned long slice_end;
long slice_resid;
unsigned int slice_dispatch;
/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
pid_t pid;
};
/* /*
* Per block device queue structure * Per block device queue structure
*/ */
@ -135,51 +180,11 @@ struct cfq_data {
unsigned int cfq_slice_idle; unsigned int cfq_slice_idle;
struct list_head cic_list; struct list_head cic_list;
};
/* /*
* Per process-grouping structure * Fallback dummy cfqq for extreme OOM conditions
*/ */
struct cfq_queue { struct cfq_queue oom_cfqq;
/* reference count */
atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
struct list_head fifo;
unsigned long slice_end;
long slice_resid;
unsigned int slice_dispatch;
/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
pid_t pid;
}; };
enum cfqq_state_flags { enum cfqq_state_flags {
@ -1641,6 +1646,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
ioc->ioprio_changed = 0; ioc->ioprio_changed = 0;
} }
static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
pid_t pid, int is_sync)
{
RB_CLEAR_NODE(&cfqq->rb_node);
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);
atomic_set(&cfqq->ref, 0);
cfqq->cfqd = cfqd;
cfq_mark_cfqq_prio_changed(cfqq);
if (is_sync) {
if (!cfq_class_idle(cfqq))
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_sync(cfqq);
}
cfqq->pid = pid;
}
static struct cfq_queue * static struct cfq_queue *
cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
struct io_context *ioc, gfp_t gfp_mask) struct io_context *ioc, gfp_t gfp_mask)
@ -1653,56 +1678,40 @@ retry:
/* cic always exists here */ /* cic always exists here */
cfqq = cic_to_cfqq(cic, is_sync); cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq) { /*
* Always try a new alloc if we fell back to the OOM cfqq
* originally, since it should just be a temporary situation.
*/
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = NULL;
if (new_cfqq) { if (new_cfqq) {
cfqq = new_cfqq; cfqq = new_cfqq;
new_cfqq = NULL; new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) { } else if (gfp_mask & __GFP_WAIT) {
/*
* Inform the allocator of the fact that we will
* just repeat this allocation if it fails, to allow
* the allocator to do whatever it needs to attempt to
* free memory.
*/
spin_unlock_irq(cfqd->queue->queue_lock); spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool, new_cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_NOFAIL | __GFP_ZERO, gfp_mask | __GFP_ZERO,
cfqd->queue->node); cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock); spin_lock_irq(cfqd->queue->queue_lock);
goto retry; if (new_cfqq)
goto retry;
} else { } else {
cfqq = kmem_cache_alloc_node(cfq_pool, cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO, gfp_mask | __GFP_ZERO,
cfqd->queue->node); cfqd->queue->node);
if (!cfqq)
goto out;
} }
RB_CLEAR_NODE(&cfqq->rb_node); if (cfqq) {
RB_CLEAR_NODE(&cfqq->p_node); cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
INIT_LIST_HEAD(&cfqq->fifo); cfq_init_prio_data(cfqq, ioc);
cfq_log_cfqq(cfqd, cfqq, "alloced");
atomic_set(&cfqq->ref, 0); } else
cfqq->cfqd = cfqd; cfqq = &cfqd->oom_cfqq;
cfq_mark_cfqq_prio_changed(cfqq);
cfq_init_prio_data(cfqq, ioc);
if (is_sync) {
if (!cfq_class_idle(cfqq))
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_sync(cfqq);
}
cfqq->pid = current->pid;
cfq_log_cfqq(cfqd, cfqq, "alloced");
} }
if (new_cfqq) if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq); kmem_cache_free(cfq_pool, new_cfqq);
out:
WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq; return cfqq;
} }
@ -1735,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
cfqq = *async_cfqq; cfqq = *async_cfqq;
} }
if (!cfqq) { if (!cfqq)
cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
if (!cfqq)
return NULL;
}
/* /*
* pin the queue now that it's allocated, scheduler exit will prune it * pin the queue now that it's allocated, scheduler exit will prune it
@ -2307,10 +2313,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
cfqq = cic_to_cfqq(cic, is_sync); cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq) { if (!cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
if (!cfqq)
goto queue_fail;
cic_set_cfqq(cic, cfqq, is_sync); cic_set_cfqq(cic, cfqq, is_sync);
} }
@ -2465,6 +2467,14 @@ static void *cfq_init_queue(struct request_queue *q)
for (i = 0; i < CFQ_PRIO_LISTS; i++) for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT; cfqd->prio_trees[i] = RB_ROOT;
/*
* Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
* will not attempt to free it.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
atomic_inc(&cfqd->oom_cfqq.ref);
INIT_LIST_HEAD(&cfqd->cic_list); INIT_LIST_HEAD(&cfqd->cic_list);
cfqd->queue = q; cfqd->queue = q;

View File

@ -1,233 +0,0 @@
/*
* Copyright 2004 Peter M. Jones <pjones@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
*
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public Licens
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*
*/
#include <linux/list.h>
#include <linux/genhd.h>
#include <linux/spinlock.h>
#include <linux/capability.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <scsi/scsi.h>
#include <linux/cdrom.h>
int blk_verify_command(struct blk_cmd_filter *filter,
unsigned char *cmd, fmode_t has_write_perm)
{
/* root can do any command. */
if (capable(CAP_SYS_RAWIO))
return 0;
/* if there's no filter set, assume we're filtering everything out */
if (!filter)
return -EPERM;
/* Anybody who can open the device can do a read-safe command */
if (test_bit(cmd[0], filter->read_ok))
return 0;
/* Write-safe commands require a writable open */
if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
return 0;
return -EPERM;
}
EXPORT_SYMBOL(blk_verify_command);
#if 0
/* and now, the sysfs stuff */
static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page,
int rw)
{
char *npage = page;
unsigned long *okbits;
int i;
if (rw == READ)
okbits = filter->read_ok;
else
okbits = filter->write_ok;
for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
if (test_bit(i, okbits)) {
npage += sprintf(npage, "0x%02x", i);
if (i < BLK_SCSI_MAX_CMDS - 1)
sprintf(npage++, " ");
}
}
if (npage != page)
npage += sprintf(npage, "\n");
return npage - page;
}
static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page)
{
return rcf_cmds_show(filter, page, READ);
}
static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter,
char *page)
{
return rcf_cmds_show(filter, page, WRITE);
}
static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter,
const char *page, size_t count, int rw)
{
unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
int cmd, set;
char *p, *status;
if (rw == READ) {
memcpy(&okbits, filter->read_ok, sizeof(okbits));
target_okbits = filter->read_ok;
} else {
memcpy(&okbits, filter->write_ok, sizeof(okbits));
target_okbits = filter->write_ok;
}
while ((p = strsep((char **)&page, " ")) != NULL) {
set = 1;
if (p[0] == '+') {
p++;
} else if (p[0] == '-') {
set = 0;
p++;
}
cmd = simple_strtol(p, &status, 16);
/* either of these cases means invalid input, so do nothing. */
if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS)
return -EINVAL;
if (set)
__set_bit(cmd, okbits);
else
__clear_bit(cmd, okbits);
}
memcpy(target_okbits, okbits, sizeof(okbits));
return count;
}
static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter,
const char *page, size_t count)
{
return rcf_cmds_store(filter, page, count, READ);
}
static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter,
const char *page, size_t count)
{
return rcf_cmds_store(filter, page, count, WRITE);
}
struct rcf_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_cmd_filter *, char *);
ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t);
};
static struct rcf_sysfs_entry rcf_readcmds_entry = {
.attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
.show = rcf_readcmds_show,
.store = rcf_readcmds_store,
};
static struct rcf_sysfs_entry rcf_writecmds_entry = {
.attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
.show = rcf_writecmds_show,
.store = rcf_writecmds_store,
};
static struct attribute *default_attrs[] = {
&rcf_readcmds_entry.attr,
&rcf_writecmds_entry.attr,
NULL,
};
#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
static ssize_t
rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct rcf_sysfs_entry *entry = to_rcf(attr);
struct blk_cmd_filter *filter;
filter = container_of(kobj, struct blk_cmd_filter, kobj);
if (entry->show)
return entry->show(filter, page);
return 0;
}
static ssize_t
rcf_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
struct rcf_sysfs_entry *entry = to_rcf(attr);
struct blk_cmd_filter *filter;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (!entry->store)
return -EINVAL;
filter = container_of(kobj, struct blk_cmd_filter, kobj);
return entry->store(filter, page, length);
}
static struct sysfs_ops rcf_sysfs_ops = {
.show = rcf_attr_show,
.store = rcf_attr_store,
};
static struct kobj_type rcf_ktype = {
.sysfs_ops = &rcf_sysfs_ops,
.default_attrs = default_attrs,
};
int blk_register_filter(struct gendisk *disk)
{
int ret;
struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
&disk_to_dev(disk)->kobj,
"%s", "cmd_filter");
if (ret < 0)
return ret;
return 0;
}
EXPORT_SYMBOL(blk_register_filter);
void blk_unregister_filter(struct gendisk *disk)
{
struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
kobject_put(&filter->kobj);
}
EXPORT_SYMBOL(blk_unregister_filter);
#endif

View File

@ -32,6 +32,11 @@
#include <scsi/scsi_ioctl.h> #include <scsi/scsi_ioctl.h>
#include <scsi/scsi_cmnd.h> #include <scsi/scsi_cmnd.h>
struct blk_cmd_filter {
unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
} blk_default_cmd_filter;
/* Command group 3 is reserved and should never be used. */ /* Command group 3 is reserved and should never be used. */
const unsigned char scsi_command_size_tbl[8] = const unsigned char scsi_command_size_tbl[8] =
{ {
@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p); return put_user(1, p);
} }
void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
{ {
/* Basic read-only commands */ /* Basic read-only commands */
__set_bit(TEST_UNIT_READY, filter->read_ok); __set_bit(TEST_UNIT_READY, filter->read_ok);
@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
__set_bit(GPCMD_SET_STREAMING, filter->write_ok); __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
} }
EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm)
{
struct blk_cmd_filter *filter = &blk_default_cmd_filter;
/* root can do any command. */
if (capable(CAP_SYS_RAWIO))
return 0;
/* if there's no filter set, assume we're filtering everything out */
if (!filter)
return -EPERM;
/* Anybody who can open the device can do a read-safe command */
if (test_bit(cmd[0], filter->read_ok))
return 0;
/* Write-safe commands require a writable open */
if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
return 0;
return -EPERM;
}
EXPORT_SYMBOL(blk_verify_command);
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_hdr *hdr, fmode_t mode) struct sg_io_hdr *hdr, fmode_t mode)
{ {
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT; return -EFAULT;
if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE)) if (blk_verify_command(rq->cmd, mode & FMODE_WRITE))
return -EPERM; return -EPERM;
/* /*
@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error; goto error;
err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE); err = blk_verify_command(rq->cmd, mode & FMODE_WRITE);
if (err) if (err)
goto error; goto error;
@ -645,5 +673,10 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
blk_put_queue(q); blk_put_queue(q);
return err; return err;
} }
EXPORT_SYMBOL(scsi_cmd_ioctl); EXPORT_SYMBOL(scsi_cmd_ioctl);
int __init blk_scsi_ioctl_init(void)
{
blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
return 0;
}

View File

@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
clone->bi_flags |= 1 << BIO_CLONED; clone->bi_flags |= 1 << BIO_CLONED;
if (bio_integrity(bio)) { if (bio_integrity(bio)) {
bio_integrity_clone(clone, bio, GFP_NOIO); bio_integrity_clone(clone, bio, GFP_NOIO, bs);
bio_integrity_trim(clone, bio_integrity_trim(clone,
bio_sector_offset(bio, idx, offset), len); bio_sector_offset(bio, idx, offset), len);
} }
@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone->bi_flags &= ~(1 << BIO_SEG_VALID); clone->bi_flags &= ~(1 << BIO_SEG_VALID);
if (bio_integrity(bio)) { if (bio_integrity(bio)) {
bio_integrity_clone(clone, bio, GFP_NOIO); bio_integrity_clone(clone, bio, GFP_NOIO, bs);
if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
bio_integrity_trim(clone, bio_integrity_trim(clone,

View File

@ -210,13 +210,11 @@ static void sg_put_dev(Sg_device *sdp);
static int sg_allow_access(struct file *filp, unsigned char *cmd) static int sg_allow_access(struct file *filp, unsigned char *cmd)
{ {
struct sg_fd *sfp = (struct sg_fd *)filp->private_data; struct sg_fd *sfp = (struct sg_fd *)filp->private_data;
struct request_queue *q = sfp->parentdp->device->request_queue;
if (sfp->parentdp->device->type == TYPE_SCANNER) if (sfp->parentdp->device->type == TYPE_SCANNER)
return 0; return 0;
return blk_verify_command(&q->cmd_filter, return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE);
cmd, filp->f_mode & FMODE_WRITE);
} }
static int static int

View File

@ -1,7 +1,7 @@
/* /*
* bio-integrity.c - bio data integrity extensions * bio-integrity.c - bio data integrity extensions
* *
* Copyright (C) 2007, 2008 Oracle Corporation * Copyright (C) 2007, 2008, 2009 Oracle Corporation
* Written by: Martin K. Petersen <martin.petersen@oracle.com> * Written by: Martin K. Petersen <martin.petersen@oracle.com>
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
@ -25,11 +25,94 @@
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
static struct kmem_cache *bio_integrity_slab __read_mostly; struct integrity_slab {
static mempool_t *bio_integrity_pool; struct kmem_cache *slab;
static struct bio_set *integrity_bio_set; unsigned short nr_vecs;
char name[8];
};
#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
};
#undef IS
static struct workqueue_struct *kintegrityd_wq; static struct workqueue_struct *kintegrityd_wq;
static inline unsigned int vecs_to_idx(unsigned int nr)
{
switch (nr) {
case 1:
return 0;
case 2 ... 4:
return 1;
case 5 ... 16:
return 2;
case 17 ... 64:
return 3;
case 65 ... 128:
return 4;
case 129 ... BIO_MAX_PAGES:
return 5;
default:
BUG();
}
}
static inline int use_bip_pool(unsigned int idx)
{
if (idx == BIOVEC_NR_POOLS)
return 1;
return 0;
}
/**
* bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
* @gfp_mask: Memory allocation mask
* @nr_vecs: Number of integrity metadata scatter-gather elements
* @bs: bio_set to allocate from
*
* Description: This function prepares a bio for attaching integrity
* metadata. nr_vecs specifies the maximum number of pages containing
* integrity metadata that can be attached.
*/
struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
gfp_t gfp_mask,
unsigned int nr_vecs,
struct bio_set *bs)
{
struct bio_integrity_payload *bip;
unsigned int idx = vecs_to_idx(nr_vecs);
BUG_ON(bio == NULL);
bip = NULL;
/* Lower order allocations come straight from slab */
if (!use_bip_pool(idx))
bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
/* Use mempool if lower order alloc failed or max vecs were requested */
if (bip == NULL) {
bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
if (unlikely(bip == NULL)) {
printk(KERN_ERR "%s: could not alloc bip\n", __func__);
return NULL;
}
}
memset(bip, 0, sizeof(*bip));
bip->bip_slab = idx;
bip->bip_bio = bio;
bio->bi_integrity = bip;
return bip;
}
EXPORT_SYMBOL(bio_integrity_alloc_bioset);
/** /**
* bio_integrity_alloc - Allocate integrity payload and attach it to bio * bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to * @bio: bio to attach integrity metadata to
@ -44,44 +127,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
gfp_t gfp_mask, gfp_t gfp_mask,
unsigned int nr_vecs) unsigned int nr_vecs)
{ {
struct bio_integrity_payload *bip; return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
struct bio_vec *iv;
unsigned long idx;
BUG_ON(bio == NULL);
bip = mempool_alloc(bio_integrity_pool, gfp_mask);
if (unlikely(bip == NULL)) {
printk(KERN_ERR "%s: could not alloc bip\n", __func__);
return NULL;
}
memset(bip, 0, sizeof(*bip));
iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
if (unlikely(iv == NULL)) {
printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
mempool_free(bip, bio_integrity_pool);
return NULL;
}
bip->bip_pool = idx;
bip->bip_vec = iv;
bip->bip_bio = bio;
bio->bi_integrity = bip;
return bip;
} }
EXPORT_SYMBOL(bio_integrity_alloc); EXPORT_SYMBOL(bio_integrity_alloc);
/** /**
* bio_integrity_free - Free bio integrity payload * bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed * @bio: bio containing bip to be freed
* @bs: bio_set this bio was allocated from
* *
* Description: Used to free the integrity portion of a bio. Usually * Description: Used to free the integrity portion of a bio. Usually
* called from bio_free(). * called from bio_free().
*/ */
void bio_integrity_free(struct bio *bio) void bio_integrity_free(struct bio *bio, struct bio_set *bs)
{ {
struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_integrity_payload *bip = bio->bi_integrity;
@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
&& bip->bip_buf != NULL) && bip->bip_buf != NULL)
kfree(bip->bip_buf); kfree(bip->bip_buf);
bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); if (use_bip_pool(bip->bip_slab))
mempool_free(bip, bio_integrity_pool); mempool_free(bip, bs->bio_integrity_pool);
else
kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
bio->bi_integrity = NULL; bio->bi_integrity = NULL;
} }
@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv; struct bio_vec *iv;
if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__); printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0; return 0;
} }
@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
bp->iv1 = bip->bip_vec[0]; bp->iv1 = bip->bip_vec[0];
bp->iv2 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0];
bp->bip1.bip_vec = &bp->iv1; bp->bip1.bip_vec[0] = bp->iv1;
bp->bip2.bip_vec = &bp->iv2; bp->bip2.bip_vec[0] = bp->iv2;
bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv1.bv_len = sectors * bi->tuple_size;
bp->iv2.bv_offset += sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size;
@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
* @bio: New bio * @bio: New bio
* @bio_src: Original bio * @bio_src: Original bio
* @gfp_mask: Memory allocation mask * @gfp_mask: Memory allocation mask
* @bs: bio_set to allocate bip from
* *
* Description: Called to allocate a bip when cloning a bio * Description: Called to allocate a bip when cloning a bio
*/ */
int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
gfp_t gfp_mask, struct bio_set *bs)
{ {
struct bio_integrity_payload *bip_src = bio_src->bi_integrity; struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
struct bio_integrity_payload *bip; struct bio_integrity_payload *bip;
BUG_ON(bip_src == NULL); BUG_ON(bip_src == NULL);
bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
if (bip == NULL) if (bip == NULL)
return -EIO; return -EIO;
@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
} }
EXPORT_SYMBOL(bio_integrity_clone); EXPORT_SYMBOL(bio_integrity_clone);
static int __init bio_integrity_init(void) int bioset_integrity_create(struct bio_set *bs, int pool_size)
{ {
kintegrityd_wq = create_workqueue("kintegrityd"); unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
if (!kintegrityd_wq) bs->bio_integrity_pool =
panic("Failed to create kintegrityd\n"); mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, if (!bs->bio_integrity_pool)
SLAB_HWCACHE_ALIGN|SLAB_PANIC); return -1;
bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
bio_integrity_slab);
if (!bio_integrity_pool)
panic("bio_integrity: can't allocate bip pool\n");
integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
if (!integrity_bio_set)
panic("bio_integrity: can't allocate bio_set\n");
return 0; return 0;
} }
subsys_initcall(bio_integrity_init); EXPORT_SYMBOL(bioset_integrity_create);
void bioset_integrity_free(struct bio_set *bs)
{
if (bs->bio_integrity_pool)
mempool_destroy(bs->bio_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
void __init bio_integrity_init(void)
{
unsigned int i;
kintegrityd_wq = create_workqueue("kintegrityd");
if (!kintegrityd_wq)
panic("Failed to create kintegrityd\n");
for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
unsigned int size;
size = sizeof(struct bio_integrity_payload)
+ bip_slab[i].nr_vecs * sizeof(struct bio_vec);
bip_slab[i].slab =
kmem_cache_create(bip_slab[i].name, size, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}
}

View File

@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
if (bio_integrity(bio)) if (bio_integrity(bio))
bio_integrity_free(bio); bio_integrity_free(bio, bs);
/* /*
* If we have front padding, adjust the bio pointer before freeing * If we have front padding, adjust the bio pointer before freeing
@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
static void bio_kmalloc_destructor(struct bio *bio) static void bio_kmalloc_destructor(struct bio *bio)
{ {
if (bio_integrity(bio)) if (bio_integrity(bio))
bio_integrity_free(bio); bio_integrity_free(bio, fs_bio_set);
kfree(bio); kfree(bio);
} }
@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
if (bio_integrity(bio)) { if (bio_integrity(bio)) {
int ret; int ret;
ret = bio_integrity_clone(b, bio, gfp_mask); ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
if (ret < 0) { if (ret < 0) {
bio_put(b); bio_put(b);
@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs)
if (bs->bio_pool) if (bs->bio_pool)
mempool_destroy(bs->bio_pool); mempool_destroy(bs->bio_pool);
bioset_integrity_free(bs);
biovec_free_pools(bs); biovec_free_pools(bs);
bio_put_slab(bs); bio_put_slab(bs);
@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
if (!bs->bio_pool) if (!bs->bio_pool)
goto bad; goto bad;
if (bioset_integrity_create(bs, pool_size))
goto bad;
if (!biovec_create_pools(bs, pool_size)) if (!biovec_create_pools(bs, pool_size))
return bs; return bs;
@ -1616,6 +1620,7 @@ static int __init init_bio(void)
if (!bio_slabs) if (!bio_slabs)
panic("bio: can't allocate bios\n"); panic("bio: can't allocate bios\n");
bio_integrity_init();
biovec_init_slabs(); biovec_init_slabs();
fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);

View File

@ -319,7 +319,6 @@ static inline int bio_has_allocated_vec(struct bio *bio)
*/ */
struct bio_integrity_payload { struct bio_integrity_payload {
struct bio *bip_bio; /* parent bio */ struct bio *bip_bio; /* parent bio */
struct bio_vec *bip_vec; /* integrity data vector */
sector_t bip_sector; /* virtual start sector */ sector_t bip_sector; /* virtual start sector */
@ -328,11 +327,12 @@ struct bio_integrity_payload {
unsigned int bip_size; unsigned int bip_size;
unsigned short bip_pool; /* pool the ivec came from */ unsigned short bip_slab; /* slab the bip came from */
unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_idx; /* current bip_vec index */ unsigned short bip_idx; /* current bip_vec index */
struct work_struct bip_work; /* I/O completion */ struct work_struct bip_work; /* I/O completion */
struct bio_vec bip_vec[0]; /* embedded bvec array */
}; };
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */
@ -430,6 +430,9 @@ struct bio_set {
unsigned int front_pad; unsigned int front_pad;
mempool_t *bio_pool; mempool_t *bio_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
mempool_t *bio_integrity_pool;
#endif
mempool_t *bvec_pool; mempool_t *bvec_pool;
}; };
@ -634,8 +637,9 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
#define bio_integrity(bio) (bio->bi_integrity != NULL) #define bio_integrity(bio) (bio->bi_integrity != NULL)
extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
extern void bio_integrity_free(struct bio *); extern void bio_integrity_free(struct bio *, struct bio_set *);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
extern int bio_integrity_enabled(struct bio *bio); extern int bio_integrity_enabled(struct bio *bio);
extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
@ -645,21 +649,27 @@ extern void bio_integrity_endio(struct bio *, int);
extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
extern void bio_integrity_split(struct bio *, struct bio_pair *, int); extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
extern void bio_integrity_init(void);
#else /* CONFIG_BLK_DEV_INTEGRITY */ #else /* CONFIG_BLK_DEV_INTEGRITY */
#define bio_integrity(a) (0) #define bio_integrity(a) (0)
#define bioset_integrity_create(a, b) (0)
#define bio_integrity_prep(a) (0) #define bio_integrity_prep(a) (0)
#define bio_integrity_enabled(a) (0) #define bio_integrity_enabled(a) (0)
#define bio_integrity_clone(a, b, c) (0) #define bio_integrity_clone(a, b, c, d) (0)
#define bio_integrity_free(a) do { } while (0) #define bioset_integrity_free(a) do { } while (0)
#define bio_integrity_free(a, b) do { } while (0)
#define bio_integrity_endio(a, b) do { } while (0) #define bio_integrity_endio(a, b) do { } while (0)
#define bio_integrity_advance(a, b) do { } while (0) #define bio_integrity_advance(a, b) do { } while (0)
#define bio_integrity_trim(a, b, c) do { } while (0) #define bio_integrity_trim(a, b, c) do { } while (0)
#define bio_integrity_split(a, b, c) do { } while (0) #define bio_integrity_split(a, b, c) do { } while (0)
#define bio_integrity_set_tag(a, b, c) do { } while (0) #define bio_integrity_set_tag(a, b, c) do { } while (0)
#define bio_integrity_get_tag(a, b, c) do { } while (0) #define bio_integrity_get_tag(a, b, c) do { } while (0)
#define bio_integrity_init(a) do { } while (0)
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */

View File

@ -301,12 +301,6 @@ struct blk_queue_tag {
#define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_MAX_CMDS (256)
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
struct blk_cmd_filter {
unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
struct kobject kobj;
};
struct queue_limits { struct queue_limits {
unsigned long bounce_pfn; unsigned long bounce_pfn;
unsigned long seg_boundary_mask; unsigned long seg_boundary_mask;
@ -445,7 +439,6 @@ struct request_queue
#if defined(CONFIG_BLK_DEV_BSG) #if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev; struct bsg_class_device bsg_dev;
#endif #endif
struct blk_cmd_filter cmd_filter;
}; };
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
@ -998,13 +991,7 @@ static inline int sb_issue_discard(struct super_block *sb,
return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
} }
/* extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
* command filter functions
*/
extern int blk_verify_command(struct blk_cmd_filter *filter,
unsigned char *cmd, fmode_t has_write_perm);
extern void blk_unregister_filter(struct gendisk *disk);
extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
#define MAX_PHYS_SEGMENTS 128 #define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128 #define MAX_HW_SEGMENTS 128