Merge branch 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block: (110 commits) loop: handle on-demand devices correctly loop: limit 'max_part' module param to DISK_MAX_PARTS drbd: fix warning drbd: fix warning drbd: Fix spelling drbd: fix schedule in atomic drbd: Take a more conservative approach when deciding max_bio_size drbd: Fixed state transitions after async outdate-peer-handler returned drbd: Disallow the peer_disk_state to be D_OUTDATED while connected drbd: Fix for the connection problems on high latency links drbd: fix potential activity log refcount imbalance in error path drbd: Only downgrade the disk state in case of disk failures drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int drbd: fix potential distributed deadlock lru_cache.h: fix comments referring to ts_ instead of lc_ drbd: Fix for application IO with the on-io-error=pass-on policy xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions. xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override. xen/blkback: don't fail empty barrier requests xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end() ...
This commit is contained in:
commit
929cfdd5d3
@ -169,3 +169,18 @@ is issued which positions the tape to a known position. Typically you
|
||||
must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example)
|
||||
before i/o can proceed again to a tape drive which was reset.
|
||||
|
||||
There is a cciss_tape_cmds module parameter which can be used to make cciss
|
||||
allocate more commands for use by tape drives. Ordinarily only a few commands
|
||||
(6) are allocated for tape drives because tape drives are slow and
|
||||
infrequently used and the primary purpose of Smart Array controllers is to
|
||||
act as a RAID controller for disk drives, so the vast majority of commands
|
||||
are allocated for disk devices. However, if you have more than a few tape
|
||||
drives attached to a smart array, the default number of commands may not be
|
||||
enought (for example, if you have 8 tape drives, you could only rewind 6
|
||||
at one time with the default number of commands.) The cciss_tape_cmds module
|
||||
parameter allows more commands (up to 16 more) to be allocated for use by
|
||||
tape drives. For example:
|
||||
|
||||
insmod cciss.ko cciss_tape_cmds=16
|
||||
|
||||
Or, as a kernel boot parameter passed in via grub: cciss.cciss_tape_cmds=8
|
||||
|
@ -470,6 +470,27 @@ config XEN_BLKDEV_FRONTEND
|
||||
block device driver. It communicates with a back-end driver
|
||||
in another domain which drives the actual block device.
|
||||
|
||||
config XEN_BLKDEV_BACKEND
|
||||
tristate "Block-device backend driver"
|
||||
depends on XEN_BACKEND
|
||||
help
|
||||
The block-device backend driver allows the kernel to export its
|
||||
block devices to other guests via a high-performance shared-memory
|
||||
interface.
|
||||
|
||||
The corresponding Linux frontend driver is enabled by the
|
||||
CONFIG_XEN_BLKDEV_FRONTEND configuration option.
|
||||
|
||||
The backend driver attaches itself to a any block device specified
|
||||
in the XenBus configuration. There are no limits to what the block
|
||||
device as long as it has a major and minor.
|
||||
|
||||
If you are compiling a kernel to run in a Xen block backend driver
|
||||
domain (often this is domain 0) you should say Y here. To
|
||||
compile this driver as a module, chose M here: the module
|
||||
will be called xen-blkback.
|
||||
|
||||
|
||||
config VIRTIO_BLK
|
||||
tristate "Virtio block driver (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL && VIRTIO
|
||||
|
@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o
|
||||
obj-$(CONFIG_BLK_DEV_HD) += hd.o
|
||||
|
||||
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
||||
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
|
||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
|
||||
obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
|
||||
|
||||
|
@ -64,6 +64,10 @@ MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
|
||||
MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
|
||||
MODULE_VERSION("3.6.26");
|
||||
MODULE_LICENSE("GPL");
|
||||
static int cciss_tape_cmds = 6;
|
||||
module_param(cciss_tape_cmds, int, 0644);
|
||||
MODULE_PARM_DESC(cciss_tape_cmds,
|
||||
"number of commands to allocate for tape devices (default: 6)");
|
||||
|
||||
static DEFINE_MUTEX(cciss_mutex);
|
||||
static struct proc_dir_entry *proc_cciss;
|
||||
@ -194,6 +198,8 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
|
||||
static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
|
||||
unsigned long *memory_bar);
|
||||
static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
|
||||
static __devinit int write_driver_ver_to_cfgtable(
|
||||
CfgTable_struct __iomem *cfgtable);
|
||||
|
||||
/* performant mode helper functions */
|
||||
static void calc_bucket_map(int *bucket, int num_buckets, int nsgs,
|
||||
@ -556,7 +562,7 @@ static void __devinit cciss_procinit(ctlr_info_t *h)
|
||||
#define to_hba(n) container_of(n, struct ctlr_info, dev)
|
||||
#define to_drv(n) container_of(n, drive_info_struct, dev)
|
||||
|
||||
/* List of controllers which cannot be reset on kexec with reset_devices */
|
||||
/* List of controllers which cannot be hard reset on kexec with reset_devices */
|
||||
static u32 unresettable_controller[] = {
|
||||
0x324a103C, /* Smart Array P712m */
|
||||
0x324b103C, /* SmartArray P711m */
|
||||
@ -574,23 +580,45 @@ static u32 unresettable_controller[] = {
|
||||
0x409D0E11, /* Smart Array 6400 EM */
|
||||
};
|
||||
|
||||
static int ctlr_is_resettable(struct ctlr_info *h)
|
||||
/* List of controllers which cannot even be soft reset */
|
||||
static u32 soft_unresettable_controller[] = {
|
||||
0x409C0E11, /* Smart Array 6400 */
|
||||
0x409D0E11, /* Smart Array 6400 EM */
|
||||
};
|
||||
|
||||
static int ctlr_is_hard_resettable(u32 board_id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
|
||||
if (unresettable_controller[i] == h->board_id)
|
||||
if (unresettable_controller[i] == board_id)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ctlr_is_soft_resettable(u32 board_id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
|
||||
if (soft_unresettable_controller[i] == board_id)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ctlr_is_resettable(u32 board_id)
|
||||
{
|
||||
return ctlr_is_hard_resettable(board_id) ||
|
||||
ctlr_is_soft_resettable(board_id);
|
||||
}
|
||||
|
||||
static ssize_t host_show_resettable(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct ctlr_info *h = to_hba(dev);
|
||||
|
||||
return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
|
||||
return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id));
|
||||
}
|
||||
static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
|
||||
|
||||
@ -2567,7 +2595,7 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
|
||||
}
|
||||
} else if (cmd_type == TYPE_MSG) {
|
||||
switch (cmd) {
|
||||
case 0: /* ABORT message */
|
||||
case CCISS_ABORT_MSG:
|
||||
c->Request.CDBLen = 12;
|
||||
c->Request.Type.Attribute = ATTR_SIMPLE;
|
||||
c->Request.Type.Direction = XFER_WRITE;
|
||||
@ -2577,16 +2605,16 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
|
||||
/* buff contains the tag of the command to abort */
|
||||
memcpy(&c->Request.CDB[4], buff, 8);
|
||||
break;
|
||||
case 1: /* RESET message */
|
||||
case CCISS_RESET_MSG:
|
||||
c->Request.CDBLen = 16;
|
||||
c->Request.Type.Attribute = ATTR_SIMPLE;
|
||||
c->Request.Type.Direction = XFER_NONE;
|
||||
c->Request.Timeout = 0;
|
||||
memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
|
||||
c->Request.CDB[0] = cmd; /* reset */
|
||||
c->Request.CDB[1] = 0x03; /* reset a target */
|
||||
c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET;
|
||||
break;
|
||||
case 3: /* No-Op message */
|
||||
case CCISS_NOOP_MSG:
|
||||
c->Request.CDBLen = 1;
|
||||
c->Request.Type.Attribute = ATTR_SIMPLE;
|
||||
c->Request.Type.Direction = XFER_WRITE;
|
||||
@ -2615,6 +2643,31 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
|
||||
return status;
|
||||
}
|
||||
|
||||
static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr,
|
||||
u8 reset_type)
|
||||
{
|
||||
CommandList_struct *c;
|
||||
int return_status;
|
||||
|
||||
c = cmd_alloc(h);
|
||||
if (!c)
|
||||
return -ENOMEM;
|
||||
return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0,
|
||||
CTLR_LUNID, TYPE_MSG);
|
||||
c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
|
||||
if (return_status != IO_OK) {
|
||||
cmd_special_free(h, c);
|
||||
return return_status;
|
||||
}
|
||||
c->waiting = NULL;
|
||||
enqueue_cmd_and_start_io(h, c);
|
||||
/* Don't wait for completion, the reset won't complete. Don't free
|
||||
* the command either. This is the last command we will send before
|
||||
* re-initializing everything, so it doesn't matter and won't leak.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
|
||||
{
|
||||
switch (c->err_info->ScsiStatus) {
|
||||
@ -3461,6 +3514,63 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
|
||||
return next_command(h);
|
||||
}
|
||||
|
||||
/* Some controllers, like p400, will give us one interrupt
|
||||
* after a soft reset, even if we turned interrupts off.
|
||||
* Only need to check for this in the cciss_xxx_discard_completions
|
||||
* functions.
|
||||
*/
|
||||
static int ignore_bogus_interrupt(ctlr_info_t *h)
|
||||
{
|
||||
if (likely(!reset_devices))
|
||||
return 0;
|
||||
|
||||
if (likely(h->interrupts_enabled))
|
||||
return 0;
|
||||
|
||||
dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
|
||||
"(known firmware bug.) Ignoring.\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id)
|
||||
{
|
||||
ctlr_info_t *h = dev_id;
|
||||
unsigned long flags;
|
||||
u32 raw_tag;
|
||||
|
||||
if (ignore_bogus_interrupt(h))
|
||||
return IRQ_NONE;
|
||||
|
||||
if (interrupt_not_for_us(h))
|
||||
return IRQ_NONE;
|
||||
spin_lock_irqsave(&h->lock, flags);
|
||||
while (interrupt_pending(h)) {
|
||||
raw_tag = get_next_completion(h);
|
||||
while (raw_tag != FIFO_EMPTY)
|
||||
raw_tag = next_command(h);
|
||||
}
|
||||
spin_unlock_irqrestore(&h->lock, flags);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id)
|
||||
{
|
||||
ctlr_info_t *h = dev_id;
|
||||
unsigned long flags;
|
||||
u32 raw_tag;
|
||||
|
||||
if (ignore_bogus_interrupt(h))
|
||||
return IRQ_NONE;
|
||||
|
||||
spin_lock_irqsave(&h->lock, flags);
|
||||
raw_tag = get_next_completion(h);
|
||||
while (raw_tag != FIFO_EMPTY)
|
||||
raw_tag = next_command(h);
|
||||
spin_unlock_irqrestore(&h->lock, flags);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t do_cciss_intx(int irq, void *dev_id)
|
||||
{
|
||||
ctlr_info_t *h = dev_id;
|
||||
@ -4078,6 +4188,9 @@ static int __devinit cciss_find_cfgtables(ctlr_info_t *h)
|
||||
cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
|
||||
if (!h->cfgtable)
|
||||
return -ENOMEM;
|
||||
rc = write_driver_ver_to_cfgtable(h->cfgtable);
|
||||
if (rc)
|
||||
return rc;
|
||||
/* Find performant mode table. */
|
||||
trans_offset = readl(&h->cfgtable->TransMethodOffset);
|
||||
h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
|
||||
@ -4112,7 +4225,7 @@ static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
|
||||
static void __devinit cciss_find_board_params(ctlr_info_t *h)
|
||||
{
|
||||
cciss_get_max_perf_mode_cmds(h);
|
||||
h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
|
||||
h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
|
||||
h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
|
||||
/*
|
||||
* Limit in-command s/g elements to 32 save dma'able memory.
|
||||
@ -4348,7 +4461,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
|
||||
tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
|
||||
if ((tag & ~3) == paddr32)
|
||||
break;
|
||||
schedule_timeout_uninterruptible(HZ);
|
||||
msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS);
|
||||
}
|
||||
|
||||
iounmap(vaddr);
|
||||
@ -4375,11 +4488,10 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
|
||||
#define cciss_noop(p) cciss_message(p, 3, 0)
|
||||
|
||||
static int cciss_controller_hard_reset(struct pci_dev *pdev,
|
||||
void * __iomem vaddr, bool use_doorbell)
|
||||
void * __iomem vaddr, u32 use_doorbell)
|
||||
{
|
||||
u16 pmcsr;
|
||||
int pos;
|
||||
@ -4390,8 +4502,7 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
|
||||
* other way using the doorbell register.
|
||||
*/
|
||||
dev_info(&pdev->dev, "using doorbell to reset controller\n");
|
||||
writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL);
|
||||
msleep(1000);
|
||||
writel(use_doorbell, vaddr + SA5_DOORBELL);
|
||||
} else { /* Try to do it the PCI power state way */
|
||||
|
||||
/* Quoting from the Open CISS Specification: "The Power
|
||||
@ -4422,12 +4533,64 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
|
||||
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
|
||||
pmcsr |= PCI_D0;
|
||||
pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
|
||||
|
||||
msleep(500);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __devinit void init_driver_version(char *driver_version, int len)
|
||||
{
|
||||
memset(driver_version, 0, len);
|
||||
strncpy(driver_version, "cciss " DRIVER_NAME, len - 1);
|
||||
}
|
||||
|
||||
static __devinit int write_driver_ver_to_cfgtable(
|
||||
CfgTable_struct __iomem *cfgtable)
|
||||
{
|
||||
char *driver_version;
|
||||
int i, size = sizeof(cfgtable->driver_version);
|
||||
|
||||
driver_version = kmalloc(size, GFP_KERNEL);
|
||||
if (!driver_version)
|
||||
return -ENOMEM;
|
||||
|
||||
init_driver_version(driver_version, size);
|
||||
for (i = 0; i < size; i++)
|
||||
writeb(driver_version[i], &cfgtable->driver_version[i]);
|
||||
kfree(driver_version);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __devinit void read_driver_ver_from_cfgtable(
|
||||
CfgTable_struct __iomem *cfgtable, unsigned char *driver_ver)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sizeof(cfgtable->driver_version); i++)
|
||||
driver_ver[i] = readb(&cfgtable->driver_version[i]);
|
||||
}
|
||||
|
||||
static __devinit int controller_reset_failed(
|
||||
CfgTable_struct __iomem *cfgtable)
|
||||
{
|
||||
|
||||
char *driver_ver, *old_driver_ver;
|
||||
int rc, size = sizeof(cfgtable->driver_version);
|
||||
|
||||
old_driver_ver = kmalloc(2 * size, GFP_KERNEL);
|
||||
if (!old_driver_ver)
|
||||
return -ENOMEM;
|
||||
driver_ver = old_driver_ver + size;
|
||||
|
||||
/* After a reset, the 32 bytes of "driver version" in the cfgtable
|
||||
* should have been changed, otherwise we know the reset failed.
|
||||
*/
|
||||
init_driver_version(old_driver_ver, size);
|
||||
read_driver_ver_from_cfgtable(cfgtable, driver_ver);
|
||||
rc = !memcmp(driver_ver, old_driver_ver, size);
|
||||
kfree(old_driver_ver);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* This does a hard reset of the controller using PCI power management
|
||||
* states or using the doorbell register. */
|
||||
static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
|
||||
@ -4437,10 +4600,10 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
|
||||
u64 cfg_base_addr_index;
|
||||
void __iomem *vaddr;
|
||||
unsigned long paddr;
|
||||
u32 misc_fw_support, active_transport;
|
||||
u32 misc_fw_support;
|
||||
int rc;
|
||||
CfgTable_struct __iomem *cfgtable;
|
||||
bool use_doorbell;
|
||||
u32 use_doorbell;
|
||||
u32 board_id;
|
||||
u16 command_register;
|
||||
|
||||
@ -4464,12 +4627,16 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
|
||||
* likely not be happy. Just forbid resetting this conjoined mess.
|
||||
*/
|
||||
cciss_lookup_board_id(pdev, &board_id);
|
||||
if (board_id == 0x409C0E11 || board_id == 0x409D0E11) {
|
||||
if (!ctlr_is_resettable(board_id)) {
|
||||
dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
|
||||
"due to shared cache module.");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* if controller is soft- but not hard resettable... */
|
||||
if (!ctlr_is_hard_resettable(board_id))
|
||||
return -ENOTSUPP; /* try soft reset later. */
|
||||
|
||||
/* Save the PCI command register */
|
||||
pci_read_config_word(pdev, 4, &command_register);
|
||||
/* Turn the board off. This is so that later pci_restore_state()
|
||||
@ -4497,16 +4664,28 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
|
||||
rc = -ENOMEM;
|
||||
goto unmap_vaddr;
|
||||
}
|
||||
rc = write_driver_ver_to_cfgtable(cfgtable);
|
||||
if (rc)
|
||||
goto unmap_vaddr;
|
||||
|
||||
/* If reset via doorbell register is supported, use that. */
|
||||
misc_fw_support = readl(&cfgtable->misc_fw_support);
|
||||
use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
|
||||
|
||||
/* The doorbell reset seems to cause lockups on some Smart
|
||||
* Arrays (e.g. P410, P410i, maybe others). Until this is
|
||||
* fixed or at least isolated, avoid the doorbell reset.
|
||||
/* If reset via doorbell register is supported, use that.
|
||||
* There are two such methods. Favor the newest method.
|
||||
*/
|
||||
use_doorbell = 0;
|
||||
misc_fw_support = readl(&cfgtable->misc_fw_support);
|
||||
use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2;
|
||||
if (use_doorbell) {
|
||||
use_doorbell = DOORBELL_CTLR_RESET2;
|
||||
} else {
|
||||
use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
|
||||
if (use_doorbell) {
|
||||
dev_warn(&pdev->dev, "Controller claims that "
|
||||
"'Bit 2 doorbell reset' is "
|
||||
"supported, but not 'bit 5 doorbell reset'. "
|
||||
"Firmware update is recommended.\n");
|
||||
rc = -ENOTSUPP; /* use the soft reset */
|
||||
goto unmap_cfgtable;
|
||||
}
|
||||
}
|
||||
|
||||
rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
|
||||
if (rc)
|
||||
@ -4524,30 +4703,31 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
|
||||
msleep(CCISS_POST_RESET_PAUSE_MSECS);
|
||||
|
||||
/* Wait for board to become not ready, then ready. */
|
||||
dev_info(&pdev->dev, "Waiting for board to become ready.\n");
|
||||
dev_info(&pdev->dev, "Waiting for board to reset.\n");
|
||||
rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
|
||||
if (rc) /* Don't bail, might be E500, etc. which can't be reset */
|
||||
dev_warn(&pdev->dev,
|
||||
"failed waiting for board to become not ready\n");
|
||||
if (rc) {
|
||||
dev_warn(&pdev->dev, "Failed waiting for board to hard reset."
|
||||
" Will try soft reset.\n");
|
||||
rc = -ENOTSUPP; /* Not expected, but try soft reset later */
|
||||
goto unmap_cfgtable;
|
||||
}
|
||||
rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
|
||||
if (rc) {
|
||||
dev_warn(&pdev->dev,
|
||||
"failed waiting for board to become ready\n");
|
||||
"failed waiting for board to become ready "
|
||||
"after hard reset\n");
|
||||
goto unmap_cfgtable;
|
||||
}
|
||||
dev_info(&pdev->dev, "board ready.\n");
|
||||
|
||||
/* Controller should be in simple mode at this point. If it's not,
|
||||
* It means we're on one of those controllers which doesn't support
|
||||
* the doorbell reset method and on which the PCI power management reset
|
||||
* method doesn't work (P800, for example.)
|
||||
* In those cases, don't try to proceed, as it generally doesn't work.
|
||||
*/
|
||||
active_transport = readl(&cfgtable->TransportActive);
|
||||
if (active_transport & PERFORMANT_MODE) {
|
||||
dev_warn(&pdev->dev, "Unable to successfully reset controller,"
|
||||
" Ignoring controller.\n");
|
||||
rc = -ENODEV;
|
||||
rc = controller_reset_failed(vaddr);
|
||||
if (rc < 0)
|
||||
goto unmap_cfgtable;
|
||||
if (rc) {
|
||||
dev_warn(&pdev->dev, "Unable to successfully hard reset "
|
||||
"controller. Will try soft reset.\n");
|
||||
rc = -ENOTSUPP; /* Not expected, but try soft reset later */
|
||||
} else {
|
||||
dev_info(&pdev->dev, "Board ready after hard reset.\n");
|
||||
}
|
||||
|
||||
unmap_cfgtable:
|
||||
@ -4574,11 +4754,12 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
|
||||
* due to concerns about shared bbwc between 6402/6404 pair.
|
||||
*/
|
||||
if (rc == -ENOTSUPP)
|
||||
return 0; /* just try to do the kdump anyhow. */
|
||||
return rc; /* just try to do the kdump anyhow. */
|
||||
if (rc)
|
||||
return -ENODEV;
|
||||
|
||||
/* Now try to get the controller to respond to a no-op */
|
||||
dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
|
||||
for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
|
||||
if (cciss_noop(pdev) == 0)
|
||||
break;
|
||||
@ -4591,6 +4772,148 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
|
||||
{
|
||||
h->cmd_pool_bits = kmalloc(
|
||||
DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
|
||||
sizeof(unsigned long), GFP_KERNEL);
|
||||
h->cmd_pool = pci_alloc_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(CommandList_struct),
|
||||
&(h->cmd_pool_dhandle));
|
||||
h->errinfo_pool = pci_alloc_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(ErrorInfo_struct),
|
||||
&(h->errinfo_pool_dhandle));
|
||||
if ((h->cmd_pool_bits == NULL)
|
||||
|| (h->cmd_pool == NULL)
|
||||
|| (h->errinfo_pool == NULL)) {
|
||||
dev_err(&h->pdev->dev, "out of memory");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __devinit int cciss_allocate_scatterlists(ctlr_info_t *h)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* zero it, so that on free we need not know how many were alloc'ed */
|
||||
h->scatter_list = kzalloc(h->max_commands *
|
||||
sizeof(struct scatterlist *), GFP_KERNEL);
|
||||
if (!h->scatter_list)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < h->nr_cmds; i++) {
|
||||
h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) *
|
||||
h->maxsgentries, GFP_KERNEL);
|
||||
if (h->scatter_list[i] == NULL) {
|
||||
dev_err(&h->pdev->dev, "could not allocate "
|
||||
"s/g lists\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cciss_free_scatterlists(ctlr_info_t *h)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (h->scatter_list) {
|
||||
for (i = 0; i < h->nr_cmds; i++)
|
||||
kfree(h->scatter_list[i]);
|
||||
kfree(h->scatter_list);
|
||||
}
|
||||
}
|
||||
|
||||
static void cciss_free_cmd_pool(ctlr_info_t *h)
|
||||
{
|
||||
kfree(h->cmd_pool_bits);
|
||||
if (h->cmd_pool)
|
||||
pci_free_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(CommandList_struct),
|
||||
h->cmd_pool, h->cmd_pool_dhandle);
|
||||
if (h->errinfo_pool)
|
||||
pci_free_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(ErrorInfo_struct),
|
||||
h->errinfo_pool, h->errinfo_pool_dhandle);
|
||||
}
|
||||
|
||||
static int cciss_request_irq(ctlr_info_t *h,
|
||||
irqreturn_t (*msixhandler)(int, void *),
|
||||
irqreturn_t (*intxhandler)(int, void *))
|
||||
{
|
||||
if (h->msix_vector || h->msi_vector) {
|
||||
if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
|
||||
IRQF_DISABLED, h->devname, h))
|
||||
return 0;
|
||||
dev_err(&h->pdev->dev, "Unable to get msi irq %d"
|
||||
" for %s\n", h->intr[PERF_MODE_INT],
|
||||
h->devname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
|
||||
IRQF_DISABLED, h->devname, h))
|
||||
return 0;
|
||||
dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
|
||||
h->intr[PERF_MODE_INT], h->devname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h)
|
||||
{
|
||||
if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) {
|
||||
dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
|
||||
if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
|
||||
dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
|
||||
if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
|
||||
dev_warn(&h->pdev->dev, "Board failed to become ready "
|
||||
"after soft reset.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
|
||||
{
|
||||
int ctlr = h->ctlr;
|
||||
|
||||
free_irq(h->intr[PERF_MODE_INT], h);
|
||||
#ifdef CONFIG_PCI_MSI
|
||||
if (h->msix_vector)
|
||||
pci_disable_msix(h->pdev);
|
||||
else if (h->msi_vector)
|
||||
pci_disable_msi(h->pdev);
|
||||
#endif /* CONFIG_PCI_MSI */
|
||||
cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
|
||||
cciss_free_scatterlists(h);
|
||||
cciss_free_cmd_pool(h);
|
||||
kfree(h->blockFetchTable);
|
||||
if (h->reply_pool)
|
||||
pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
|
||||
h->reply_pool, h->reply_pool_dhandle);
|
||||
if (h->transtable)
|
||||
iounmap(h->transtable);
|
||||
if (h->cfgtable)
|
||||
iounmap(h->cfgtable);
|
||||
if (h->vaddr)
|
||||
iounmap(h->vaddr);
|
||||
unregister_blkdev(h->major, h->devname);
|
||||
cciss_destroy_hba_sysfs_entry(h);
|
||||
pci_release_regions(h->pdev);
|
||||
kfree(h);
|
||||
hba[ctlr] = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is it. Find all the controllers and register them. I really hate
|
||||
* stealing all these major device numbers.
|
||||
@ -4601,15 +4924,28 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
|
||||
{
|
||||
int i;
|
||||
int j = 0;
|
||||
int k = 0;
|
||||
int rc;
|
||||
int try_soft_reset = 0;
|
||||
int dac, return_code;
|
||||
InquiryData_struct *inq_buff;
|
||||
ctlr_info_t *h;
|
||||
unsigned long flags;
|
||||
|
||||
rc = cciss_init_reset_devices(pdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (rc) {
|
||||
if (rc != -ENOTSUPP)
|
||||
return rc;
|
||||
/* If the reset fails in a particular way (it has no way to do
|
||||
* a proper hard reset, so returns -ENOTSUPP) we can try to do
|
||||
* a soft reset once we get the controller configured up to the
|
||||
* point that it can accept a command.
|
||||
*/
|
||||
try_soft_reset = 1;
|
||||
rc = 0;
|
||||
}
|
||||
|
||||
reinit_after_soft_reset:
|
||||
|
||||
i = alloc_cciss_hba(pdev);
|
||||
if (i < 0)
|
||||
return -1;
|
||||
@ -4627,6 +4963,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
|
||||
sprintf(h->devname, "cciss%d", i);
|
||||
h->ctlr = i;
|
||||
|
||||
if (cciss_tape_cmds < 2)
|
||||
cciss_tape_cmds = 2;
|
||||
if (cciss_tape_cmds > 16)
|
||||
cciss_tape_cmds = 16;
|
||||
|
||||
init_completion(&h->scan_wait);
|
||||
|
||||
if (cciss_create_hba_sysfs_entry(h))
|
||||
@ -4662,62 +5003,20 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
|
||||
|
||||
/* make sure the board interrupts are off */
|
||||
h->access.set_intr_mask(h, CCISS_INTR_OFF);
|
||||
if (h->msi_vector || h->msix_vector) {
|
||||
if (request_irq(h->intr[PERF_MODE_INT],
|
||||
do_cciss_msix_intr,
|
||||
IRQF_DISABLED, h->devname, h)) {
|
||||
dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
|
||||
h->intr[PERF_MODE_INT], h->devname);
|
||||
goto clean2;
|
||||
}
|
||||
} else {
|
||||
if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx,
|
||||
IRQF_DISABLED, h->devname, h)) {
|
||||
dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
|
||||
h->intr[PERF_MODE_INT], h->devname);
|
||||
goto clean2;
|
||||
}
|
||||
}
|
||||
rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx);
|
||||
if (rc)
|
||||
goto clean2;
|
||||
|
||||
dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
|
||||
h->devname, pdev->device, pci_name(pdev),
|
||||
h->intr[PERF_MODE_INT], dac ? "" : " not");
|
||||
|
||||
h->cmd_pool_bits =
|
||||
kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
|
||||
* sizeof(unsigned long), GFP_KERNEL);
|
||||
h->cmd_pool = (CommandList_struct *)
|
||||
pci_alloc_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(CommandList_struct),
|
||||
&(h->cmd_pool_dhandle));
|
||||
h->errinfo_pool = (ErrorInfo_struct *)
|
||||
pci_alloc_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(ErrorInfo_struct),
|
||||
&(h->errinfo_pool_dhandle));
|
||||
if ((h->cmd_pool_bits == NULL)
|
||||
|| (h->cmd_pool == NULL)
|
||||
|| (h->errinfo_pool == NULL)) {
|
||||
dev_err(&h->pdev->dev, "out of memory");
|
||||
goto clean4;
|
||||
}
|
||||
|
||||
/* Need space for temp scatter list */
|
||||
h->scatter_list = kmalloc(h->max_commands *
|
||||
sizeof(struct scatterlist *),
|
||||
GFP_KERNEL);
|
||||
if (!h->scatter_list)
|
||||
if (cciss_allocate_cmd_pool(h))
|
||||
goto clean4;
|
||||
|
||||
if (cciss_allocate_scatterlists(h))
|
||||
goto clean4;
|
||||
|
||||
for (k = 0; k < h->nr_cmds; k++) {
|
||||
h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
|
||||
h->maxsgentries,
|
||||
GFP_KERNEL);
|
||||
if (h->scatter_list[k] == NULL) {
|
||||
dev_err(&h->pdev->dev,
|
||||
"could not allocate s/g lists\n");
|
||||
goto clean4;
|
||||
}
|
||||
}
|
||||
h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
|
||||
h->chainsize, h->nr_cmds);
|
||||
if (!h->cmd_sg_list && h->chainsize > 0)
|
||||
@ -4741,6 +5040,62 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
|
||||
h->gendisk[j] = NULL;
|
||||
}
|
||||
|
||||
/* At this point, the controller is ready to take commands.
|
||||
* Now, if reset_devices and the hard reset didn't work, try
|
||||
* the soft reset and see if that works.
|
||||
*/
|
||||
if (try_soft_reset) {
|
||||
|
||||
/* This is kind of gross. We may or may not get a completion
|
||||
* from the soft reset command, and if we do, then the value
|
||||
* from the fifo may or may not be valid. So, we wait 10 secs
|
||||
* after the reset throwing away any completions we get during
|
||||
* that time. Unregister the interrupt handler and register
|
||||
* fake ones to scoop up any residual completions.
|
||||
*/
|
||||
spin_lock_irqsave(&h->lock, flags);
|
||||
h->access.set_intr_mask(h, CCISS_INTR_OFF);
|
||||
spin_unlock_irqrestore(&h->lock, flags);
|
||||
free_irq(h->intr[PERF_MODE_INT], h);
|
||||
rc = cciss_request_irq(h, cciss_msix_discard_completions,
|
||||
cciss_intx_discard_completions);
|
||||
if (rc) {
|
||||
dev_warn(&h->pdev->dev, "Failed to request_irq after "
|
||||
"soft reset.\n");
|
||||
goto clean4;
|
||||
}
|
||||
|
||||
rc = cciss_kdump_soft_reset(h);
|
||||
if (rc) {
|
||||
dev_warn(&h->pdev->dev, "Soft reset failed.\n");
|
||||
goto clean4;
|
||||
}
|
||||
|
||||
dev_info(&h->pdev->dev, "Board READY.\n");
|
||||
dev_info(&h->pdev->dev,
|
||||
"Waiting for stale completions to drain.\n");
|
||||
h->access.set_intr_mask(h, CCISS_INTR_ON);
|
||||
msleep(10000);
|
||||
h->access.set_intr_mask(h, CCISS_INTR_OFF);
|
||||
|
||||
rc = controller_reset_failed(h->cfgtable);
|
||||
if (rc)
|
||||
dev_info(&h->pdev->dev,
|
||||
"Soft reset appears to have failed.\n");
|
||||
|
||||
/* since the controller's reset, we have to go back and re-init
|
||||
* everything. Easiest to just forget what we've done and do it
|
||||
* all over again.
|
||||
*/
|
||||
cciss_undo_allocations_after_kdump_soft_reset(h);
|
||||
try_soft_reset = 0;
|
||||
if (rc)
|
||||
/* don't go to clean4, we already unallocated */
|
||||
return -ENODEV;
|
||||
|
||||
goto reinit_after_soft_reset;
|
||||
}
|
||||
|
||||
cciss_scsi_setup(h);
|
||||
|
||||
/* Turn the interrupts on so we can service requests */
|
||||
@ -4775,21 +5130,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
|
||||
return 1;
|
||||
|
||||
clean4:
|
||||
kfree(h->cmd_pool_bits);
|
||||
/* Free up sg elements */
|
||||
for (k-- ; k >= 0; k--)
|
||||
kfree(h->scatter_list[k]);
|
||||
kfree(h->scatter_list);
|
||||
cciss_free_cmd_pool(h);
|
||||
cciss_free_scatterlists(h);
|
||||
cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
|
||||
if (h->cmd_pool)
|
||||
pci_free_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(CommandList_struct),
|
||||
h->cmd_pool, h->cmd_pool_dhandle);
|
||||
if (h->errinfo_pool)
|
||||
pci_free_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(ErrorInfo_struct),
|
||||
h->errinfo_pool,
|
||||
h->errinfo_pool_dhandle);
|
||||
free_irq(h->intr[PERF_MODE_INT], h);
|
||||
clean2:
|
||||
unregister_blkdev(h->major, h->devname);
|
||||
@ -4887,16 +5230,16 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
|
||||
iounmap(h->cfgtable);
|
||||
iounmap(h->vaddr);
|
||||
|
||||
pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct),
|
||||
h->cmd_pool, h->cmd_pool_dhandle);
|
||||
pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct),
|
||||
h->errinfo_pool, h->errinfo_pool_dhandle);
|
||||
kfree(h->cmd_pool_bits);
|
||||
cciss_free_cmd_pool(h);
|
||||
/* Free up sg elements */
|
||||
for (j = 0; j < h->nr_cmds; j++)
|
||||
kfree(h->scatter_list[j]);
|
||||
kfree(h->scatter_list);
|
||||
cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
|
||||
kfree(h->blockFetchTable);
|
||||
if (h->reply_pool)
|
||||
pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
|
||||
h->reply_pool, h->reply_pool_dhandle);
|
||||
/*
|
||||
* Deliberately omit pci_disable_device(): it does something nasty to
|
||||
* Smart Array controllers that pci_enable_device does not undo
|
||||
|
@ -200,7 +200,7 @@ struct ctlr_info
|
||||
* the above.
|
||||
*/
|
||||
#define CCISS_BOARD_READY_WAIT_SECS (120)
|
||||
#define CCISS_BOARD_NOT_READY_WAIT_SECS (10)
|
||||
#define CCISS_BOARD_NOT_READY_WAIT_SECS (100)
|
||||
#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
|
||||
#define CCISS_BOARD_READY_ITERATIONS \
|
||||
((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
|
||||
@ -209,8 +209,9 @@ struct ctlr_info
|
||||
((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \
|
||||
CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
|
||||
#define CCISS_POST_RESET_PAUSE_MSECS (3000)
|
||||
#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000)
|
||||
#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000)
|
||||
#define CCISS_POST_RESET_NOOP_RETRIES (12)
|
||||
#define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000)
|
||||
|
||||
/*
|
||||
Send the command to the hardware
|
||||
@ -239,11 +240,13 @@ static void SA5_intr_mask(ctlr_info_t *h, unsigned long val)
|
||||
{ /* Turn interrupts on */
|
||||
h->interrupts_enabled = 1;
|
||||
writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
} else /* Turn them off */
|
||||
{
|
||||
h->interrupts_enabled = 0;
|
||||
writel( SA5_INTR_OFF,
|
||||
h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -257,11 +260,13 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val)
|
||||
{ /* Turn interrupts on */
|
||||
h->interrupts_enabled = 1;
|
||||
writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
} else /* Turn them off */
|
||||
{
|
||||
h->interrupts_enabled = 0;
|
||||
writel( SA5B_INTR_OFF,
|
||||
h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
}
|
||||
}
|
||||
|
||||
@ -271,10 +276,12 @@ static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
|
||||
if (val) { /* turn on interrupts */
|
||||
h->interrupts_enabled = 1;
|
||||
writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
} else {
|
||||
h->interrupts_enabled = 0;
|
||||
writel(SA5_PERF_INTR_OFF,
|
||||
h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,7 @@
|
||||
#define CFGTBL_ChangeReq 0x00000001l
|
||||
#define CFGTBL_AccCmds 0x00000001l
|
||||
#define DOORBELL_CTLR_RESET 0x00000004l
|
||||
#define DOORBELL_CTLR_RESET2 0x00000020l
|
||||
|
||||
#define CFGTBL_Trans_Simple 0x00000002l
|
||||
#define CFGTBL_Trans_Performant 0x00000004l
|
||||
@ -142,6 +143,14 @@ typedef struct _ReadCapdata_struct_16
|
||||
#define BMIC_CACHE_FLUSH 0xc2
|
||||
#define CCISS_CACHE_FLUSH 0x01 /* C2 was already being used by CCISS */
|
||||
|
||||
#define CCISS_ABORT_MSG 0x00
|
||||
#define CCISS_RESET_MSG 0x01
|
||||
#define CCISS_RESET_TYPE_CONTROLLER 0x00
|
||||
#define CCISS_RESET_TYPE_BUS 0x01
|
||||
#define CCISS_RESET_TYPE_TARGET 0x03
|
||||
#define CCISS_RESET_TYPE_LUN 0x04
|
||||
#define CCISS_NOOP_MSG 0x03
|
||||
|
||||
/* Command List Structure */
|
||||
#define CTLR_LUNID "\0\0\0\0\0\0\0\0"
|
||||
|
||||
@ -235,6 +244,8 @@ typedef struct _CfgTable_struct {
|
||||
u8 reserved[0x78 - 0x58];
|
||||
u32 misc_fw_support; /* offset 0x78 */
|
||||
#define MISC_FW_DOORBELL_RESET (0x02)
|
||||
#define MISC_FW_DOORBELL_RESET2 (0x10)
|
||||
u8 driver_version[32];
|
||||
} CfgTable_struct;
|
||||
|
||||
struct TransTable_struct {
|
||||
|
@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = {
|
||||
.proc_name = "cciss",
|
||||
.proc_info = cciss_scsi_proc_info,
|
||||
.queuecommand = cciss_scsi_queue_command,
|
||||
.can_queue = SCSI_CCISS_CAN_QUEUE,
|
||||
.this_id = 7,
|
||||
.cmd_per_lun = 1,
|
||||
.use_clustering = DISABLE_CLUSTERING,
|
||||
@ -108,16 +107,13 @@ struct cciss_scsi_cmd_stack_elem_t {
|
||||
|
||||
#pragma pack()
|
||||
|
||||
#define CMD_STACK_SIZE (SCSI_CCISS_CAN_QUEUE * \
|
||||
CCISS_MAX_SCSI_DEVS_PER_HBA + 2)
|
||||
// plus two for init time usage
|
||||
|
||||
#pragma pack(1)
|
||||
struct cciss_scsi_cmd_stack_t {
|
||||
struct cciss_scsi_cmd_stack_elem_t *pool;
|
||||
struct cciss_scsi_cmd_stack_elem_t *elem[CMD_STACK_SIZE];
|
||||
struct cciss_scsi_cmd_stack_elem_t **elem;
|
||||
dma_addr_t cmd_pool_handle;
|
||||
int top;
|
||||
int nelems;
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
@ -191,7 +187,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
|
||||
sa = h->scsi_ctlr;
|
||||
stk = &sa->cmd_stack;
|
||||
stk->top++;
|
||||
if (stk->top >= CMD_STACK_SIZE) {
|
||||
if (stk->top >= stk->nelems) {
|
||||
dev_err(&h->pdev->dev,
|
||||
"scsi_cmd_free called too many times.\n");
|
||||
BUG();
|
||||
@ -206,13 +202,14 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
|
||||
struct cciss_scsi_cmd_stack_t *stk;
|
||||
size_t size;
|
||||
|
||||
stk = &sa->cmd_stack;
|
||||
stk->nelems = cciss_tape_cmds + 2;
|
||||
sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
|
||||
h->chainsize, CMD_STACK_SIZE);
|
||||
h->chainsize, stk->nelems);
|
||||
if (!sa->cmd_sg_list && h->chainsize > 0)
|
||||
return -ENOMEM;
|
||||
|
||||
stk = &sa->cmd_stack;
|
||||
size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
|
||||
size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
|
||||
|
||||
/* Check alignment, see cciss_cmd.h near CommandList_struct def. */
|
||||
BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
|
||||
@ -221,18 +218,23 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
|
||||
pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
|
||||
|
||||
if (stk->pool == NULL) {
|
||||
cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
|
||||
cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
|
||||
sa->cmd_sg_list = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i=0; i<CMD_STACK_SIZE; i++) {
|
||||
stk->elem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL);
|
||||
if (!stk->elem) {
|
||||
pci_free_consistent(h->pdev, size, stk->pool,
|
||||
stk->cmd_pool_handle);
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < stk->nelems; i++) {
|
||||
stk->elem[i] = &stk->pool[i];
|
||||
stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
|
||||
(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
|
||||
stk->elem[i]->cmdindex = i;
|
||||
}
|
||||
stk->top = CMD_STACK_SIZE-1;
|
||||
stk->top = stk->nelems-1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -245,16 +247,18 @@ scsi_cmd_stack_free(ctlr_info_t *h)
|
||||
|
||||
sa = h->scsi_ctlr;
|
||||
stk = &sa->cmd_stack;
|
||||
if (stk->top != CMD_STACK_SIZE-1) {
|
||||
if (stk->top != stk->nelems-1) {
|
||||
dev_warn(&h->pdev->dev,
|
||||
"bug: %d scsi commands are still outstanding.\n",
|
||||
CMD_STACK_SIZE - stk->top);
|
||||
stk->nelems - stk->top);
|
||||
}
|
||||
size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
|
||||
size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
|
||||
|
||||
pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
|
||||
stk->pool = NULL;
|
||||
cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
|
||||
cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
|
||||
kfree(stk->elem);
|
||||
stk->elem = NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -859,6 +863,7 @@ cciss_scsi_detect(ctlr_info_t *h)
|
||||
sh->io_port = 0; // good enough? FIXME,
|
||||
sh->n_io_port = 0; // I don't think we use these two...
|
||||
sh->this_id = SELF_SCSI_ID;
|
||||
sh->can_queue = cciss_tape_cmds;
|
||||
sh->sg_tablesize = h->maxsgentries;
|
||||
sh->max_cmd_len = MAX_COMMAND_SIZE;
|
||||
|
||||
|
@ -36,13 +36,9 @@
|
||||
addressible natively, and may in fact turn
|
||||
out to be not scsi at all. */
|
||||
|
||||
#define SCSI_CCISS_CAN_QUEUE 2
|
||||
|
||||
/*
|
||||
|
||||
Note, cmd_per_lun could give us some trouble, so I'm setting it very low.
|
||||
Likewise, SCSI_CCISS_CAN_QUEUE is set very conservatively.
|
||||
|
||||
If the upper scsi layer tries to track how many commands we have
|
||||
outstanding, it will be operating under the misapprehension that it is
|
||||
the only one sending us requests. We also have the block interface,
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include "drbd_int.h"
|
||||
#include "drbd_wrappers.h"
|
||||
|
||||
/* We maintain a trivial check sum in our on disk activity log.
|
||||
/* We maintain a trivial checksum in our on disk activity log.
|
||||
* With that we can ensure correct operation even when the storage
|
||||
* device might do a partial (last) sector write while losing power.
|
||||
*/
|
||||
|
@ -74,7 +74,7 @@
|
||||
* as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
|
||||
* seems excessive.
|
||||
*
|
||||
* We plan to reduce the amount of in-core bitmap pages by pageing them in
|
||||
* We plan to reduce the amount of in-core bitmap pages by paging them in
|
||||
* and out against their on-disk location as necessary, but need to make
|
||||
* sure we don't cause too much meta data IO, and must not deadlock in
|
||||
* tight memory situations. This needs some more work.
|
||||
@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
|
||||
* we if bits have been cleared since last IO. */
|
||||
#define BM_PAGE_LAZY_WRITEOUT 28
|
||||
|
||||
/* store_page_idx uses non-atomic assingment. It is only used directly after
|
||||
/* store_page_idx uses non-atomic assignment. It is only used directly after
|
||||
* allocating the page. All other bm_set_page_* and bm_clear_page_* need to
|
||||
* use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
|
||||
* changes) may happen from various contexts, and wait_on_bit/wake_up_bit
|
||||
@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr)
|
||||
/* word offset from start of bitmap to word number _in_page_
|
||||
* modulo longs per page
|
||||
#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
|
||||
hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
|
||||
hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
|
||||
so do it explicitly:
|
||||
*/
|
||||
#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
|
||||
|
@ -700,7 +700,7 @@ struct drbd_request {
|
||||
* see drbd_endio_pri(). */
|
||||
struct bio *private_bio;
|
||||
|
||||
struct hlist_node colision;
|
||||
struct hlist_node collision;
|
||||
sector_t sector;
|
||||
unsigned int size;
|
||||
unsigned int epoch; /* barrier_nr */
|
||||
@ -766,7 +766,7 @@ struct digest_info {
|
||||
|
||||
struct drbd_epoch_entry {
|
||||
struct drbd_work w;
|
||||
struct hlist_node colision;
|
||||
struct hlist_node collision;
|
||||
struct drbd_epoch *epoch; /* for writes */
|
||||
struct drbd_conf *mdev;
|
||||
struct page *pages;
|
||||
@ -1129,6 +1129,8 @@ struct drbd_conf {
|
||||
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
|
||||
int rs_planed; /* resync sectors already planned */
|
||||
atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
|
||||
int peer_max_bio_size;
|
||||
int local_max_bio_size;
|
||||
};
|
||||
|
||||
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
|
||||
@ -1218,8 +1220,6 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
|
||||
extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
|
||||
unsigned int set_size);
|
||||
extern void tl_clear(struct drbd_conf *mdev);
|
||||
enum drbd_req_event;
|
||||
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
|
||||
extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
|
||||
extern void drbd_free_sock(struct drbd_conf *mdev);
|
||||
extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
|
||||
@ -1434,6 +1434,7 @@ struct bm_extent {
|
||||
* hash table. */
|
||||
#define HT_SHIFT 8
|
||||
#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
|
||||
#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
|
||||
|
||||
#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
|
||||
|
||||
@ -1518,9 +1519,9 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
|
||||
extern char *ppsize(char *buf, unsigned long long size);
|
||||
extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
|
||||
enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
|
||||
extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
|
||||
extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
|
||||
extern void resync_after_online_grow(struct drbd_conf *);
|
||||
extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
|
||||
extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
|
||||
extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
|
||||
enum drbd_role new_role,
|
||||
int force);
|
||||
@ -1828,6 +1829,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
|
||||
if (!forcedetach) {
|
||||
if (__ratelimit(&drbd_ratelimit_state))
|
||||
dev_err(DEV, "Local IO failed in %s.\n", where);
|
||||
if (mdev->state.disk > D_INCONSISTENT)
|
||||
_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
|
||||
break;
|
||||
}
|
||||
/* NOTE fall through to detach case if forcedetach set */
|
||||
@ -2153,6 +2156,10 @@ static inline int get_net_conf(struct drbd_conf *mdev)
|
||||
static inline void put_ldev(struct drbd_conf *mdev)
|
||||
{
|
||||
int i = atomic_dec_return(&mdev->local_cnt);
|
||||
|
||||
/* This may be called from some endio handler,
|
||||
* so we must not sleep here. */
|
||||
|
||||
__release(local);
|
||||
D_ASSERT(i >= 0);
|
||||
if (i == 0) {
|
||||
|
@ -745,6 +745,9 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
|
||||
mdev->agreed_pro_version < 88)
|
||||
rv = SS_NOT_SUPPORTED;
|
||||
|
||||
else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
|
||||
rv = SS_CONNECTED_OUTDATES;
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
@ -1565,6 +1568,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
||||
put_ldev(mdev);
|
||||
}
|
||||
|
||||
/* Notify peer that I had a local IO error, and did not detached.. */
|
||||
if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
|
||||
drbd_send_state(mdev);
|
||||
|
||||
/* Disks got bigger while they were detached */
|
||||
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
|
||||
test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
|
||||
@ -2064,7 +2071,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
|
||||
{
|
||||
struct p_sizes p;
|
||||
sector_t d_size, u_size;
|
||||
int q_order_type;
|
||||
int q_order_type, max_bio_size;
|
||||
int ok;
|
||||
|
||||
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
|
||||
@ -2072,17 +2079,20 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
|
||||
d_size = drbd_get_max_capacity(mdev->ldev);
|
||||
u_size = mdev->ldev->dc.disk_size;
|
||||
q_order_type = drbd_queue_order_type(mdev);
|
||||
max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
|
||||
max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
|
||||
put_ldev(mdev);
|
||||
} else {
|
||||
d_size = 0;
|
||||
u_size = 0;
|
||||
q_order_type = QUEUE_ORDERED_NONE;
|
||||
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
|
||||
}
|
||||
|
||||
p.d_size = cpu_to_be64(d_size);
|
||||
p.u_size = cpu_to_be64(u_size);
|
||||
p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
|
||||
p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
|
||||
p.max_bio_size = cpu_to_be32(max_bio_size);
|
||||
p.queue_order_type = cpu_to_be16(q_order_type);
|
||||
p.dds_flags = cpu_to_be16(flags);
|
||||
|
||||
@ -2722,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
|
||||
|
||||
/* double check digest, sometimes buffers have been modified in flight. */
|
||||
if (dgs > 0 && dgs <= 64) {
|
||||
/* 64 byte, 512 bit, is the larges digest size
|
||||
/* 64 byte, 512 bit, is the largest digest size
|
||||
* currently supported in kernel crypto. */
|
||||
unsigned char digest[64];
|
||||
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
|
||||
@ -3041,6 +3051,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
|
||||
mdev->agreed_pro_version = PRO_VERSION_MAX;
|
||||
mdev->write_ordering = WO_bdev_flush;
|
||||
mdev->resync_wenr = LC_FREE;
|
||||
mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
|
||||
mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
|
||||
}
|
||||
|
||||
void drbd_mdev_cleanup(struct drbd_conf *mdev)
|
||||
@ -3275,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor)
|
||||
|
||||
drbd_release_ee_lists(mdev);
|
||||
|
||||
/* should be free'd on disconnect? */
|
||||
/* should be freed on disconnect? */
|
||||
kfree(mdev->ee_hash);
|
||||
/*
|
||||
mdev->ee_hash_s = 0;
|
||||
@ -3415,7 +3427,9 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
|
||||
q->backing_dev_info.congested_data = mdev;
|
||||
|
||||
blk_queue_make_request(q, drbd_make_request);
|
||||
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
|
||||
/* Setting the max_hw_sectors to an odd value of 8kibyte here
|
||||
This triggers a max_bio_size message upon first attach or connect */
|
||||
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
|
||||
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
|
||||
blk_queue_merge_bvec(q, drbd_merge_bvec);
|
||||
q->queue_lock = &mdev->req_lock;
|
||||
@ -3627,7 +3641,8 @@ struct meta_data_on_disk {
|
||||
/* `-- act_log->nr_elements <-- sync_conf.al_extents */
|
||||
u32 bm_offset; /* offset to the bitmap, from here */
|
||||
u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
|
||||
u32 reserved_u32[4];
|
||||
u32 la_peer_max_bio_size; /* last peer max_bio_size */
|
||||
u32 reserved_u32[3];
|
||||
|
||||
} __packed;
|
||||
|
||||
@ -3668,6 +3683,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
|
||||
buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
|
||||
|
||||
buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
|
||||
buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
|
||||
|
||||
D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
|
||||
sector = mdev->ldev->md.md_offset;
|
||||
@ -3751,6 +3767,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
|
||||
mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
|
||||
bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
|
||||
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
if (mdev->state.conn < C_CONNECTED) {
|
||||
int peer;
|
||||
peer = be32_to_cpu(buffer->la_peer_max_bio_size);
|
||||
peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
|
||||
mdev->peer_max_bio_size = peer;
|
||||
}
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
|
||||
if (mdev->sync_conf.al_extents < 7)
|
||||
mdev->sync_conf.al_extents = 127;
|
||||
|
||||
|
@ -272,9 +272,28 @@ static int _try_outdate_peer_async(void *data)
|
||||
{
|
||||
struct drbd_conf *mdev = (struct drbd_conf *)data;
|
||||
enum drbd_disk_state nps;
|
||||
union drbd_state ns;
|
||||
|
||||
nps = drbd_try_outdate_peer(mdev);
|
||||
drbd_request_state(mdev, NS(pdsk, nps));
|
||||
|
||||
/* Not using
|
||||
drbd_request_state(mdev, NS(pdsk, nps));
|
||||
here, because we might were able to re-establish the connection
|
||||
in the meantime. This can only partially be solved in the state's
|
||||
engine is_valid_state() and is_valid_state_transition()
|
||||
functions.
|
||||
|
||||
nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
|
||||
pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
|
||||
therefore we have to have the pre state change check here.
|
||||
*/
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
ns = mdev->state;
|
||||
if (ns.conn < C_WF_REPORT_PARAMS) {
|
||||
ns.pdsk = nps;
|
||||
_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
|
||||
}
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -577,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
|
||||
* Returns 0 on success, negative return values indicate errors.
|
||||
* You should call drbd_md_sync() after calling this function.
|
||||
*/
|
||||
enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
|
||||
enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
|
||||
{
|
||||
sector_t prev_first_sect, prev_size; /* previous meta location */
|
||||
sector_t la_size;
|
||||
@ -773,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
|
||||
static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
|
||||
{
|
||||
struct request_queue * const q = mdev->rq_queue;
|
||||
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
|
||||
int max_segments = mdev->ldev->dc.max_bio_bvecs;
|
||||
int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
|
||||
int max_hw_sectors = max_bio_size >> 9;
|
||||
int max_segments = 0;
|
||||
|
||||
if (get_ldev_if_state(mdev, D_ATTACHING)) {
|
||||
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
|
||||
|
||||
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
|
||||
max_segments = mdev->ldev->dc.max_bio_bvecs;
|
||||
put_ldev(mdev);
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(q, 512);
|
||||
blk_queue_max_hw_sectors(q, max_hw_sectors);
|
||||
/* This is the workaround for "bio would need to, but cannot, be split" */
|
||||
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
|
||||
blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
|
||||
blk_queue_stack_limits(q, b);
|
||||
|
||||
dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
|
||||
if (get_ldev_if_state(mdev, D_ATTACHING)) {
|
||||
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
|
||||
|
||||
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
|
||||
dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
|
||||
q->backing_dev_info.ra_pages,
|
||||
b->backing_dev_info.ra_pages);
|
||||
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
|
||||
blk_queue_stack_limits(q, b);
|
||||
|
||||
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
|
||||
dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
|
||||
q->backing_dev_info.ra_pages,
|
||||
b->backing_dev_info.ra_pages);
|
||||
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
|
||||
}
|
||||
put_ldev(mdev);
|
||||
}
|
||||
}
|
||||
|
||||
void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
|
||||
{
|
||||
int now, new, local, peer;
|
||||
|
||||
now = queue_max_hw_sectors(mdev->rq_queue) << 9;
|
||||
local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
|
||||
peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
|
||||
|
||||
if (get_ldev_if_state(mdev, D_ATTACHING)) {
|
||||
local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
|
||||
mdev->local_max_bio_size = local;
|
||||
put_ldev(mdev);
|
||||
}
|
||||
|
||||
/* We may ignore peer limits if the peer is modern enough.
|
||||
Because new from 8.3.8 onwards the peer can use multiple
|
||||
BIOs for a single peer_request */
|
||||
if (mdev->state.conn >= C_CONNECTED) {
|
||||
if (mdev->agreed_pro_version < 94)
|
||||
peer = mdev->peer_max_bio_size;
|
||||
else if (mdev->agreed_pro_version == 94)
|
||||
peer = DRBD_MAX_SIZE_H80_PACKET;
|
||||
else /* drbd 8.3.8 onwards */
|
||||
peer = DRBD_MAX_BIO_SIZE;
|
||||
}
|
||||
|
||||
new = min_t(int, local, peer);
|
||||
|
||||
if (mdev->state.role == R_PRIMARY && new < now)
|
||||
dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
|
||||
|
||||
if (new != now)
|
||||
dev_info(DEV, "max BIO size = %u\n", new);
|
||||
|
||||
drbd_setup_queue_param(mdev, new);
|
||||
}
|
||||
|
||||
/* serialize deconfig (worker exiting, doing cleanup)
|
||||
* and reconfig (drbdsetup disk, drbdsetup net)
|
||||
*
|
||||
@ -865,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
|
||||
struct block_device *bdev;
|
||||
struct lru_cache *resync_lru = NULL;
|
||||
union drbd_state ns, os;
|
||||
unsigned int max_bio_size;
|
||||
enum drbd_state_rv rv;
|
||||
int cp_discovered = 0;
|
||||
int logical_block_size;
|
||||
@ -1117,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
|
||||
mdev->read_cnt = 0;
|
||||
mdev->writ_cnt = 0;
|
||||
|
||||
max_bio_size = DRBD_MAX_BIO_SIZE;
|
||||
if (mdev->state.conn == C_CONNECTED) {
|
||||
/* We are Primary, Connected, and now attach a new local
|
||||
* backing store. We must not increase the user visible maximum
|
||||
* bio size on this device to something the peer may not be
|
||||
* able to handle. */
|
||||
if (mdev->agreed_pro_version < 94)
|
||||
max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
|
||||
else if (mdev->agreed_pro_version == 94)
|
||||
max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
|
||||
/* else: drbd 8.3.9 and later, stay with default */
|
||||
}
|
||||
|
||||
drbd_setup_queue_param(mdev, max_bio_size);
|
||||
drbd_reconsider_max_bio_size(mdev);
|
||||
|
||||
/* If I am currently not R_PRIMARY,
|
||||
* but meta data primary indicator is set,
|
||||
@ -1152,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
|
||||
!drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
|
||||
set_bit(USE_DEGR_WFC_T, &mdev->flags);
|
||||
|
||||
dd = drbd_determin_dev_size(mdev, 0);
|
||||
dd = drbd_determine_dev_size(mdev, 0);
|
||||
if (dd == dev_size_error) {
|
||||
retcode = ERR_NOMEM_BITMAP;
|
||||
goto force_diskless_dec;
|
||||
@ -1281,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
|
||||
static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
|
||||
struct drbd_nl_cfg_reply *reply)
|
||||
{
|
||||
enum drbd_ret_code retcode;
|
||||
int ret;
|
||||
drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
|
||||
reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
|
||||
if (mdev->state.disk == D_DISKLESS)
|
||||
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
|
||||
retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
|
||||
/* D_FAILED will transition to DISKLESS. */
|
||||
ret = wait_event_interruptible(mdev->misc_wait,
|
||||
mdev->state.disk != D_FAILED);
|
||||
drbd_resume_io(mdev);
|
||||
if ((int)retcode == (int)SS_IS_DISKLESS)
|
||||
retcode = SS_NOTHING_TO_DO;
|
||||
if (ret)
|
||||
retcode = ERR_INTR;
|
||||
reply->ret_code = retcode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1658,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
|
||||
|
||||
mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
|
||||
ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
|
||||
dd = drbd_determin_dev_size(mdev, ddsf);
|
||||
dd = drbd_determine_dev_size(mdev, ddsf);
|
||||
drbd_md_sync(mdev);
|
||||
put_ldev(mdev);
|
||||
if (dd == dev_size_error) {
|
||||
|
@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
|
||||
if (!page)
|
||||
goto fail;
|
||||
|
||||
INIT_HLIST_NODE(&e->colision);
|
||||
INIT_HLIST_NODE(&e->collision);
|
||||
e->epoch = NULL;
|
||||
e->mdev = mdev;
|
||||
e->pages = page;
|
||||
@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
|
||||
kfree(e->digest);
|
||||
drbd_pp_free(mdev, e->pages, is_net);
|
||||
D_ASSERT(atomic_read(&e->pending_bios) == 0);
|
||||
D_ASSERT(hlist_unhashed(&e->colision));
|
||||
D_ASSERT(hlist_unhashed(&e->collision));
|
||||
mempool_free(e, drbd_ee_mempool);
|
||||
}
|
||||
|
||||
@ -787,7 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev)
|
||||
}
|
||||
|
||||
if (sock && msock) {
|
||||
schedule_timeout_interruptible(HZ / 10);
|
||||
schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10);
|
||||
ok = drbd_socket_okay(mdev, &sock);
|
||||
ok = drbd_socket_okay(mdev, &msock) && ok;
|
||||
if (ok)
|
||||
@ -899,11 +899,6 @@ retry:
|
||||
|
||||
drbd_thread_start(&mdev->asender);
|
||||
|
||||
if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
|
||||
drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
|
||||
put_ldev(mdev);
|
||||
}
|
||||
|
||||
if (drbd_send_protocol(mdev) == -1)
|
||||
return -1;
|
||||
drbd_send_sync_param(mdev, &mdev->sync_conf);
|
||||
@ -1418,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
|
||||
sector_t sector = e->sector;
|
||||
int ok;
|
||||
|
||||
D_ASSERT(hlist_unhashed(&e->colision));
|
||||
D_ASSERT(hlist_unhashed(&e->collision));
|
||||
|
||||
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
|
||||
drbd_set_in_sync(mdev, sector, e->size);
|
||||
@ -1487,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
|
||||
return false;
|
||||
}
|
||||
|
||||
/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
|
||||
/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
|
||||
* special casing it there for the various failure cases.
|
||||
* still no race with drbd_fail_pending_reads */
|
||||
ok = recv_dless_read(mdev, req, sector, data_size);
|
||||
@ -1558,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
* P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
|
||||
if (mdev->net_conf->two_primaries) {
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
D_ASSERT(!hlist_unhashed(&e->colision));
|
||||
hlist_del_init(&e->colision);
|
||||
D_ASSERT(!hlist_unhashed(&e->collision));
|
||||
hlist_del_init(&e->collision);
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
} else {
|
||||
D_ASSERT(hlist_unhashed(&e->colision));
|
||||
D_ASSERT(hlist_unhashed(&e->collision));
|
||||
}
|
||||
|
||||
drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
|
||||
@ -1579,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
|
||||
ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
|
||||
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
D_ASSERT(!hlist_unhashed(&e->colision));
|
||||
hlist_del_init(&e->colision);
|
||||
D_ASSERT(!hlist_unhashed(&e->collision));
|
||||
hlist_del_init(&e->collision);
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
|
||||
dec_unacked(mdev);
|
||||
@ -1755,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
|
||||
hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
|
||||
hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
|
||||
|
||||
#define OVERLAPS overlaps(i->sector, i->size, sector, size)
|
||||
slot = tl_hash_slot(mdev, sector);
|
||||
@ -1765,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
int have_conflict = 0;
|
||||
prepare_to_wait(&mdev->misc_wait, &wait,
|
||||
TASK_INTERRUPTIBLE);
|
||||
hlist_for_each_entry(i, n, slot, colision) {
|
||||
hlist_for_each_entry(i, n, slot, collision) {
|
||||
if (OVERLAPS) {
|
||||
/* only ALERT on first iteration,
|
||||
* we may be woken up early... */
|
||||
@ -1804,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
hlist_del_init(&e->colision);
|
||||
hlist_del_init(&e->collision);
|
||||
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
|
||||
@ -1862,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
dev_err(DEV, "submit failed, triggering re-connect\n");
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
list_del(&e->w.list);
|
||||
hlist_del_init(&e->colision);
|
||||
hlist_del_init(&e->collision);
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
if (e->flags & EE_CALL_AL_COMPLETE_IO)
|
||||
drbd_al_complete_io(mdev, e->sector);
|
||||
@ -2916,12 +2911,6 @@ disconnect:
|
||||
return false;
|
||||
}
|
||||
|
||||
static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
|
||||
{
|
||||
/* sorry, we currently have no working implementation
|
||||
* of distributed TCQ */
|
||||
}
|
||||
|
||||
/* warn if the arguments differ by more than 12.5% */
|
||||
static void warn_if_differ_considerably(struct drbd_conf *mdev,
|
||||
const char *s, sector_t a, sector_t b)
|
||||
@ -2939,7 +2928,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
{
|
||||
struct p_sizes *p = &mdev->data.rbuf.sizes;
|
||||
enum determine_dev_size dd = unchanged;
|
||||
unsigned int max_bio_size;
|
||||
sector_t p_size, p_usize, my_usize;
|
||||
int ldsc = 0; /* local disk size changed */
|
||||
enum dds_flags ddsf;
|
||||
@ -2994,7 +2982,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
|
||||
ddsf = be16_to_cpu(p->dds_flags);
|
||||
if (get_ldev(mdev)) {
|
||||
dd = drbd_determin_dev_size(mdev, ddsf);
|
||||
dd = drbd_determine_dev_size(mdev, ddsf);
|
||||
put_ldev(mdev);
|
||||
if (dd == dev_size_error)
|
||||
return false;
|
||||
@ -3004,23 +2992,15 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
|
||||
drbd_set_my_capacity(mdev, p_size);
|
||||
}
|
||||
|
||||
mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
|
||||
drbd_reconsider_max_bio_size(mdev);
|
||||
|
||||
if (get_ldev(mdev)) {
|
||||
if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
|
||||
mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
|
||||
ldsc = 1;
|
||||
}
|
||||
|
||||
if (mdev->agreed_pro_version < 94)
|
||||
max_bio_size = be32_to_cpu(p->max_bio_size);
|
||||
else if (mdev->agreed_pro_version == 94)
|
||||
max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
|
||||
else /* drbd 8.3.8 onwards */
|
||||
max_bio_size = DRBD_MAX_BIO_SIZE;
|
||||
|
||||
if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
|
||||
drbd_setup_queue_param(mdev, max_bio_size);
|
||||
|
||||
drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
|
||||
put_ldev(mdev);
|
||||
}
|
||||
|
||||
@ -4275,7 +4255,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
|
||||
struct hlist_node *n;
|
||||
struct drbd_request *req;
|
||||
|
||||
hlist_for_each_entry(req, n, slot, colision) {
|
||||
hlist_for_each_entry(req, n, slot, collision) {
|
||||
if ((unsigned long)req == (unsigned long)id) {
|
||||
if (req->sector != sector) {
|
||||
dev_err(DEV, "_ack_id_to_req: found req %p but it has "
|
||||
@ -4554,6 +4534,7 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
int received = 0;
|
||||
int expect = sizeof(struct p_header80);
|
||||
int empty;
|
||||
int ping_timeout_active = 0;
|
||||
|
||||
sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
|
||||
|
||||
@ -4566,6 +4547,7 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
|
||||
mdev->meta.socket->sk->sk_rcvtimeo =
|
||||
mdev->net_conf->ping_timeo*HZ/10;
|
||||
ping_timeout_active = 1;
|
||||
}
|
||||
|
||||
/* conditionally cork;
|
||||
@ -4620,8 +4602,7 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
dev_err(DEV, "meta connection shut down by peer.\n");
|
||||
goto reconnect;
|
||||
} else if (rv == -EAGAIN) {
|
||||
if (mdev->meta.socket->sk->sk_rcvtimeo ==
|
||||
mdev->net_conf->ping_timeo*HZ/10) {
|
||||
if (ping_timeout_active) {
|
||||
dev_err(DEV, "PingAck did not arrive in time.\n");
|
||||
goto reconnect;
|
||||
}
|
||||
@ -4660,6 +4641,11 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
if (!cmd->process(mdev, h))
|
||||
goto reconnect;
|
||||
|
||||
/* the idle_timeout (ping-int)
|
||||
* has been restored in got_PingAck() */
|
||||
if (cmd == get_asender_cmd(P_PING_ACK))
|
||||
ping_timeout_active = 0;
|
||||
|
||||
buf = h;
|
||||
received = 0;
|
||||
expect = sizeof(struct p_header80);
|
||||
|
@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
|
||||
* they must have been failed on the spot */
|
||||
#define OVERLAPS overlaps(sector, size, i->sector, i->size)
|
||||
slot = tl_hash_slot(mdev, sector);
|
||||
hlist_for_each_entry(i, n, slot, colision) {
|
||||
hlist_for_each_entry(i, n, slot, collision) {
|
||||
if (OVERLAPS) {
|
||||
dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
|
||||
"other: %p %llus +%u\n",
|
||||
@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
|
||||
#undef OVERLAPS
|
||||
#define OVERLAPS overlaps(sector, size, e->sector, e->size)
|
||||
slot = ee_hash_slot(mdev, req->sector);
|
||||
hlist_for_each_entry(e, n, slot, colision) {
|
||||
hlist_for_each_entry(e, n, slot, collision) {
|
||||
if (OVERLAPS) {
|
||||
wake_up(&mdev->misc_wait);
|
||||
break;
|
||||
@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
|
||||
|
||||
/* remove the request from the conflict detection
|
||||
* respective block_id verification hash */
|
||||
if (!hlist_unhashed(&req->colision))
|
||||
hlist_del(&req->colision);
|
||||
if (!hlist_unhashed(&req->collision))
|
||||
hlist_del(&req->collision);
|
||||
else
|
||||
D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
|
||||
|
||||
@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req)
|
||||
struct hlist_node *n;
|
||||
struct hlist_head *slot;
|
||||
|
||||
D_ASSERT(hlist_unhashed(&req->colision));
|
||||
D_ASSERT(hlist_unhashed(&req->collision));
|
||||
|
||||
if (!get_net_conf(mdev))
|
||||
return 0;
|
||||
@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req)
|
||||
|
||||
#define OVERLAPS overlaps(i->sector, i->size, sector, size)
|
||||
slot = tl_hash_slot(mdev, sector);
|
||||
hlist_for_each_entry(i, n, slot, colision) {
|
||||
hlist_for_each_entry(i, n, slot, collision) {
|
||||
if (OVERLAPS) {
|
||||
dev_alert(DEV, "%s[%u] Concurrent local write detected! "
|
||||
"[DISCARD L] new: %llus +%u; "
|
||||
@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req)
|
||||
#undef OVERLAPS
|
||||
#define OVERLAPS overlaps(e->sector, e->size, sector, size)
|
||||
slot = ee_hash_slot(mdev, sector);
|
||||
hlist_for_each_entry(e, n, slot, colision) {
|
||||
hlist_for_each_entry(e, n, slot, collision) {
|
||||
if (OVERLAPS) {
|
||||
dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
|
||||
" [DISCARD L] new: %llus +%u; "
|
||||
@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
|
||||
/* so we can verify the handle in the answer packet
|
||||
* corresponding hlist_del is in _req_may_be_done() */
|
||||
hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
|
||||
hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
|
||||
|
||||
set_bit(UNPLUG_REMOTE, &mdev->flags);
|
||||
|
||||
@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
/* assert something? */
|
||||
/* from drbd_make_request_common only */
|
||||
|
||||
hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
|
||||
hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
|
||||
/* corresponding hlist_del is in _req_may_be_done() */
|
||||
|
||||
/* NOTE
|
||||
@ -1033,7 +1033,7 @@ fail_conflicting:
|
||||
err = 0;
|
||||
|
||||
fail_free_complete:
|
||||
if (rw == WRITE && local)
|
||||
if (req->rq_state & RQ_IN_ACT_LOG)
|
||||
drbd_al_complete_io(mdev, sector);
|
||||
fail_and_free_req:
|
||||
if (local) {
|
||||
|
@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
|
||||
struct hlist_node *n;
|
||||
struct drbd_request *req;
|
||||
|
||||
hlist_for_each_entry(req, n, slot, colision) {
|
||||
hlist_for_each_entry(req, n, slot, collision) {
|
||||
if ((unsigned long)req == (unsigned long)id) {
|
||||
D_ASSERT(req->sector == sector);
|
||||
return req;
|
||||
@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
|
||||
req->epoch = 0;
|
||||
req->sector = bio_src->bi_sector;
|
||||
req->size = bio_src->bi_size;
|
||||
INIT_HLIST_NODE(&req->colision);
|
||||
INIT_HLIST_NODE(&req->collision);
|
||||
INIT_LIST_HEAD(&req->tl_requests);
|
||||
INIT_LIST_HEAD(&req->w.list);
|
||||
}
|
||||
@ -323,6 +323,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
extern void complete_master_bio(struct drbd_conf *mdev,
|
||||
struct bio_and_error *m);
|
||||
extern void request_timer_fn(unsigned long data);
|
||||
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
|
||||
|
||||
/* use this if you don't want to deal with calling complete_master_bio()
|
||||
* outside the spinlock, e.g. when walking some list on cleanup. */
|
||||
|
@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
|
||||
list_del(&e->w.list); /* has been on active_ee or sync_ee */
|
||||
list_add_tail(&e->w.list, &mdev->done_ee);
|
||||
|
||||
/* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
|
||||
/* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
|
||||
* neither did we wake possibly waiting conflicting requests.
|
||||
* done from "drbd_process_done_ee" within the appropriate w.cb
|
||||
* (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
|
||||
@ -297,42 +297,48 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
|
||||
crypto_hash_final(&desc, digest);
|
||||
}
|
||||
|
||||
static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
/* TODO merge common code with w_e_end_ov_req */
|
||||
int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
{
|
||||
struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
|
||||
int digest_size;
|
||||
void *digest;
|
||||
int ok;
|
||||
int ok = 1;
|
||||
|
||||
D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
|
||||
|
||||
if (unlikely(cancel)) {
|
||||
if (unlikely(cancel))
|
||||
goto out;
|
||||
|
||||
if (likely((e->flags & EE_WAS_ERROR) != 0))
|
||||
goto out;
|
||||
|
||||
digest_size = crypto_hash_digestsize(mdev->csums_tfm);
|
||||
digest = kmalloc(digest_size, GFP_NOIO);
|
||||
if (digest) {
|
||||
sector_t sector = e->sector;
|
||||
unsigned int size = e->size;
|
||||
drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
|
||||
/* Free e and pages before send.
|
||||
* In case we block on congestion, we could otherwise run into
|
||||
* some distributed deadlock, if the other side blocks on
|
||||
* congestion as well, because our receiver blocks in
|
||||
* drbd_pp_alloc due to pp_in_use > max_buffers. */
|
||||
drbd_free_ee(mdev, e);
|
||||
return 1;
|
||||
e = NULL;
|
||||
inc_rs_pending(mdev);
|
||||
ok = drbd_send_drequest_csum(mdev, sector, size,
|
||||
digest, digest_size,
|
||||
P_CSUM_RS_REQUEST);
|
||||
kfree(digest);
|
||||
} else {
|
||||
dev_err(DEV, "kmalloc() of digest failed.\n");
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
|
||||
digest_size = crypto_hash_digestsize(mdev->csums_tfm);
|
||||
digest = kmalloc(digest_size, GFP_NOIO);
|
||||
if (digest) {
|
||||
drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
|
||||
|
||||
inc_rs_pending(mdev);
|
||||
ok = drbd_send_drequest_csum(mdev,
|
||||
e->sector,
|
||||
e->size,
|
||||
digest,
|
||||
digest_size,
|
||||
P_CSUM_RS_REQUEST);
|
||||
kfree(digest);
|
||||
} else {
|
||||
dev_err(DEV, "kmalloc() of digest failed.\n");
|
||||
ok = 0;
|
||||
}
|
||||
} else
|
||||
ok = 1;
|
||||
|
||||
drbd_free_ee(mdev, e);
|
||||
out:
|
||||
if (e)
|
||||
drbd_free_ee(mdev, e);
|
||||
|
||||
if (unlikely(!ok))
|
||||
dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
|
||||
@ -834,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
|
||||
const int ratio =
|
||||
(t == 0) ? 0 :
|
||||
(t < 100000) ? ((s*100)/t) : (s/(t/100));
|
||||
dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
|
||||
dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
|
||||
"transferred %luK total %luK\n",
|
||||
ratio,
|
||||
Bit2KB(mdev->rs_same_csum),
|
||||
@ -1071,9 +1077,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* TODO merge common code with w_e_send_csum */
|
||||
int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
{
|
||||
struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
|
||||
sector_t sector = e->sector;
|
||||
unsigned int size = e->size;
|
||||
int digest_size;
|
||||
void *digest;
|
||||
int ok = 1;
|
||||
@ -1093,17 +1102,25 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
else
|
||||
memset(digest, 0, digest_size);
|
||||
|
||||
/* Free e and pages before send.
|
||||
* In case we block on congestion, we could otherwise run into
|
||||
* some distributed deadlock, if the other side blocks on
|
||||
* congestion as well, because our receiver blocks in
|
||||
* drbd_pp_alloc due to pp_in_use > max_buffers. */
|
||||
drbd_free_ee(mdev, e);
|
||||
e = NULL;
|
||||
inc_rs_pending(mdev);
|
||||
ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
|
||||
digest, digest_size, P_OV_REPLY);
|
||||
ok = drbd_send_drequest_csum(mdev, sector, size,
|
||||
digest, digest_size,
|
||||
P_OV_REPLY);
|
||||
if (!ok)
|
||||
dec_rs_pending(mdev);
|
||||
kfree(digest);
|
||||
|
||||
out:
|
||||
drbd_free_ee(mdev, e);
|
||||
if (e)
|
||||
drbd_free_ee(mdev, e);
|
||||
dec_unacked(mdev);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
@ -1122,8 +1139,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
{
|
||||
struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
|
||||
struct digest_info *di;
|
||||
int digest_size;
|
||||
void *digest;
|
||||
sector_t sector = e->sector;
|
||||
unsigned int size = e->size;
|
||||
int digest_size;
|
||||
int ok, eq = 0;
|
||||
|
||||
if (unlikely(cancel)) {
|
||||
@ -1153,16 +1172,21 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
|
||||
}
|
||||
}
|
||||
|
||||
dec_unacked(mdev);
|
||||
/* Free e and pages before send.
|
||||
* In case we block on congestion, we could otherwise run into
|
||||
* some distributed deadlock, if the other side blocks on
|
||||
* congestion as well, because our receiver blocks in
|
||||
* drbd_pp_alloc due to pp_in_use > max_buffers. */
|
||||
drbd_free_ee(mdev, e);
|
||||
if (!eq)
|
||||
drbd_ov_oos_found(mdev, e->sector, e->size);
|
||||
drbd_ov_oos_found(mdev, sector, size);
|
||||
else
|
||||
ov_oos_print(mdev);
|
||||
|
||||
ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size,
|
||||
ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
|
||||
eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
|
||||
|
||||
drbd_free_ee(mdev, e);
|
||||
dec_unacked(mdev);
|
||||
|
||||
--mdev->ov_left;
|
||||
|
||||
|
@ -1658,7 +1658,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
|
||||
struct kobject *kobj;
|
||||
|
||||
mutex_lock(&loop_devices_mutex);
|
||||
lo = loop_init_one(dev & MINORMASK);
|
||||
lo = loop_init_one(MINOR(dev) >> part_shift);
|
||||
kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
|
||||
mutex_unlock(&loop_devices_mutex);
|
||||
|
||||
@ -1691,15 +1691,18 @@ static int __init loop_init(void)
|
||||
if (max_part > 0)
|
||||
part_shift = fls(max_part);
|
||||
|
||||
if ((1UL << part_shift) > DISK_MAX_PARTS)
|
||||
return -EINVAL;
|
||||
|
||||
if (max_loop > 1UL << (MINORBITS - part_shift))
|
||||
return -EINVAL;
|
||||
|
||||
if (max_loop) {
|
||||
nr = max_loop;
|
||||
range = max_loop;
|
||||
range = max_loop << part_shift;
|
||||
} else {
|
||||
nr = 8;
|
||||
range = 1UL << (MINORBITS - part_shift);
|
||||
range = 1UL << MINORBITS;
|
||||
}
|
||||
|
||||
if (register_blkdev(LOOP_MAJOR, "loop"))
|
||||
@ -1738,7 +1741,7 @@ static void __exit loop_exit(void)
|
||||
unsigned long range;
|
||||
struct loop_device *lo, *next;
|
||||
|
||||
range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift);
|
||||
range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
|
||||
|
||||
list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
|
||||
loop_del_one(lo);
|
||||
|
3
drivers/block/xen-blkback/Makefile
Normal file
3
drivers/block/xen-blkback/Makefile
Normal file
@ -0,0 +1,3 @@
|
||||
obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o
|
||||
|
||||
xen-blkback-y := blkback.o xenbus.o
|
824
drivers/block/xen-blkback/blkback.c
Normal file
824
drivers/block/xen-blkback/blkback.c
Normal file
@ -0,0 +1,824 @@
|
||||
/******************************************************************************
|
||||
*
|
||||
* Back-end of the driver for virtual block devices. This portion of the
|
||||
* driver exports a 'unified' block-device interface that can be accessed
|
||||
* by any operating system that implements a compatible front end. A
|
||||
* reference front-end implementation can be found in:
|
||||
* drivers/block/xen-blkfront.c
|
||||
*
|
||||
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
||||
* Copyright (c) 2005, Christopher Clark
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <xen/events.h>
|
||||
#include <xen/page.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
#include "common.h"
|
||||
|
||||
/*
|
||||
* These are rather arbitrary. They are fairly large because adjacent requests
|
||||
* pulled from a communication ring are quite likely to end up being part of
|
||||
* the same scatter/gather request at the disc.
|
||||
*
|
||||
* ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
|
||||
*
|
||||
* This will increase the chances of being able to write whole tracks.
|
||||
* 64 should be enough to keep us competitive with Linux.
|
||||
*/
|
||||
static int xen_blkif_reqs = 64;
|
||||
module_param_named(reqs, xen_blkif_reqs, int, 0);
|
||||
MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
|
||||
|
||||
/* Run-time switchable: /sys/module/blkback/parameters/ */
|
||||
static unsigned int log_stats;
|
||||
module_param(log_stats, int, 0644);
|
||||
|
||||
/*
|
||||
* Each outstanding request that we've passed to the lower device layers has a
|
||||
* 'pending_req' allocated to it. Each buffer_head that completes decrements
|
||||
* the pendcnt towards zero. When it hits zero, the specified domain has a
|
||||
* response queued for it, with the saved 'id' passed back.
|
||||
*/
|
||||
struct pending_req {
|
||||
struct xen_blkif *blkif;
|
||||
u64 id;
|
||||
int nr_pages;
|
||||
atomic_t pendcnt;
|
||||
unsigned short operation;
|
||||
int status;
|
||||
struct list_head free_list;
|
||||
};
|
||||
|
||||
#define BLKBACK_INVALID_HANDLE (~0)
|
||||
|
||||
struct xen_blkbk {
|
||||
struct pending_req *pending_reqs;
|
||||
/* List of all 'pending_req' available */
|
||||
struct list_head pending_free;
|
||||
/* And its spinlock. */
|
||||
spinlock_t pending_free_lock;
|
||||
wait_queue_head_t pending_free_wq;
|
||||
/* The list of all pages that are available. */
|
||||
struct page **pending_pages;
|
||||
/* And the grant handles that are available. */
|
||||
grant_handle_t *pending_grant_handles;
|
||||
};
|
||||
|
||||
static struct xen_blkbk *blkbk;
|
||||
|
||||
/*
|
||||
* Little helpful macro to figure out the index and virtual address of the
|
||||
* pending_pages[..]. For each 'pending_req' we have have up to
|
||||
* BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
|
||||
* 10 and would index in the pending_pages[..].
|
||||
*/
|
||||
static inline int vaddr_pagenr(struct pending_req *req, int seg)
|
||||
{
|
||||
return (req - blkbk->pending_reqs) *
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
|
||||
}
|
||||
|
||||
#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
|
||||
|
||||
static inline unsigned long vaddr(struct pending_req *req, int seg)
|
||||
{
|
||||
unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
|
||||
return (unsigned long)pfn_to_kaddr(pfn);
|
||||
}
|
||||
|
||||
#define pending_handle(_req, _seg) \
|
||||
(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
|
||||
|
||||
|
||||
static int do_block_io_op(struct xen_blkif *blkif);
|
||||
static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
struct blkif_request *req,
|
||||
struct pending_req *pending_req);
|
||||
static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
unsigned short op, int st);
|
||||
|
||||
/*
|
||||
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
|
||||
*/
|
||||
static struct pending_req *alloc_req(void)
|
||||
{
|
||||
struct pending_req *req = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkbk->pending_free_lock, flags);
|
||||
if (!list_empty(&blkbk->pending_free)) {
|
||||
req = list_entry(blkbk->pending_free.next, struct pending_req,
|
||||
free_list);
|
||||
list_del(&req->free_list);
|
||||
}
|
||||
spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
|
||||
return req;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the 'pending_req' structure back to the freepool. We also
|
||||
* wake up the thread if it was waiting for a free page.
|
||||
*/
|
||||
static void free_req(struct pending_req *req)
|
||||
{
|
||||
unsigned long flags;
|
||||
int was_empty;
|
||||
|
||||
spin_lock_irqsave(&blkbk->pending_free_lock, flags);
|
||||
was_empty = list_empty(&blkbk->pending_free);
|
||||
list_add(&req->free_list, &blkbk->pending_free);
|
||||
spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
|
||||
if (was_empty)
|
||||
wake_up(&blkbk->pending_free_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Routines for managing virtual block devices (vbds).
|
||||
*/
|
||||
static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
|
||||
int operation)
|
||||
{
|
||||
struct xen_vbd *vbd = &blkif->vbd;
|
||||
int rc = -EACCES;
|
||||
|
||||
if ((operation != READ) && vbd->readonly)
|
||||
goto out;
|
||||
|
||||
if (likely(req->nr_sects)) {
|
||||
blkif_sector_t end = req->sector_number + req->nr_sects;
|
||||
|
||||
if (unlikely(end < req->sector_number))
|
||||
goto out;
|
||||
if (unlikely(end > vbd_sz(vbd)))
|
||||
goto out;
|
||||
}
|
||||
|
||||
req->dev = vbd->pdevice;
|
||||
req->bdev = vbd->bdev;
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void xen_vbd_resize(struct xen_blkif *blkif)
|
||||
{
|
||||
struct xen_vbd *vbd = &blkif->vbd;
|
||||
struct xenbus_transaction xbt;
|
||||
int err;
|
||||
struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
|
||||
unsigned long long new_size = vbd_sz(vbd);
|
||||
|
||||
pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
|
||||
blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
|
||||
pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
|
||||
vbd->size = new_size;
|
||||
again:
|
||||
err = xenbus_transaction_start(&xbt);
|
||||
if (err) {
|
||||
pr_warn(DRV_PFX "Error starting transaction");
|
||||
return;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
|
||||
(unsigned long long)vbd_sz(vbd));
|
||||
if (err) {
|
||||
pr_warn(DRV_PFX "Error writing new size");
|
||||
goto abort;
|
||||
}
|
||||
/*
|
||||
* Write the current state; we will use this to synchronize
|
||||
* the front-end. If the current state is "connected" the
|
||||
* front-end will get the new size information online.
|
||||
*/
|
||||
err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
|
||||
if (err) {
|
||||
pr_warn(DRV_PFX "Error writing the state");
|
||||
goto abort;
|
||||
}
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
if (err)
|
||||
pr_warn(DRV_PFX "Error ending transaction");
|
||||
return;
|
||||
abort:
|
||||
xenbus_transaction_end(xbt, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notification from the guest OS.
|
||||
*/
|
||||
static void blkif_notify_work(struct xen_blkif *blkif)
|
||||
{
|
||||
blkif->waiting_reqs = 1;
|
||||
wake_up(&blkif->wq);
|
||||
}
|
||||
|
||||
irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
|
||||
{
|
||||
blkif_notify_work(dev_id);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* SCHEDULER FUNCTIONS
|
||||
*/
|
||||
|
||||
static void print_stats(struct xen_blkif *blkif)
|
||||
{
|
||||
pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n",
|
||||
current->comm, blkif->st_oo_req,
|
||||
blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
|
||||
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
||||
blkif->st_rd_req = 0;
|
||||
blkif->st_wr_req = 0;
|
||||
blkif->st_oo_req = 0;
|
||||
}
|
||||
|
||||
int xen_blkif_schedule(void *arg)
|
||||
{
|
||||
struct xen_blkif *blkif = arg;
|
||||
struct xen_vbd *vbd = &blkif->vbd;
|
||||
|
||||
xen_blkif_get(blkif);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (try_to_freeze())
|
||||
continue;
|
||||
if (unlikely(vbd->size != vbd_sz(vbd)))
|
||||
xen_vbd_resize(blkif);
|
||||
|
||||
wait_event_interruptible(
|
||||
blkif->wq,
|
||||
blkif->waiting_reqs || kthread_should_stop());
|
||||
wait_event_interruptible(
|
||||
blkbk->pending_free_wq,
|
||||
!list_empty(&blkbk->pending_free) ||
|
||||
kthread_should_stop());
|
||||
|
||||
blkif->waiting_reqs = 0;
|
||||
smp_mb(); /* clear flag *before* checking for work */
|
||||
|
||||
if (do_block_io_op(blkif))
|
||||
blkif->waiting_reqs = 1;
|
||||
|
||||
if (log_stats && time_after(jiffies, blkif->st_print))
|
||||
print_stats(blkif);
|
||||
}
|
||||
|
||||
if (log_stats)
|
||||
print_stats(blkif);
|
||||
|
||||
blkif->xenblkd = NULL;
|
||||
xen_blkif_put(blkif);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct seg_buf {
|
||||
unsigned long buf;
|
||||
unsigned int nsec;
|
||||
};
|
||||
/*
|
||||
* Unmap the grant references, and also remove the M2P over-rides
|
||||
* used in the 'pending_req'.
|
||||
*/
|
||||
static void xen_blkbk_unmap(struct pending_req *req)
|
||||
{
|
||||
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
unsigned int i, invcount = 0;
|
||||
grant_handle_t handle;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < req->nr_pages; i++) {
|
||||
handle = pending_handle(req, i);
|
||||
if (handle == BLKBACK_INVALID_HANDLE)
|
||||
continue;
|
||||
gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
|
||||
GNTMAP_host_map, handle);
|
||||
pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
|
||||
invcount++;
|
||||
}
|
||||
|
||||
ret = HYPERVISOR_grant_table_op(
|
||||
GNTTABOP_unmap_grant_ref, unmap, invcount);
|
||||
BUG_ON(ret);
|
||||
/*
|
||||
* Note, we use invcount, so nr->pages, so we can't index
|
||||
* using vaddr(req, i).
|
||||
*/
|
||||
for (i = 0; i < invcount; i++) {
|
||||
ret = m2p_remove_override(
|
||||
virt_to_page(unmap[i].host_addr), false);
|
||||
if (ret) {
|
||||
pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n",
|
||||
(unsigned long)unmap[i].host_addr);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int xen_blkbk_map(struct blkif_request *req,
|
||||
struct pending_req *pending_req,
|
||||
struct seg_buf seg[])
|
||||
{
|
||||
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
int i;
|
||||
int nseg = req->nr_segments;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Fill out preq.nr_sects with proper amount of sectors, and setup
|
||||
* assign map[..] with the PFN of the page in our domain with the
|
||||
* corresponding grant reference for each page.
|
||||
*/
|
||||
for (i = 0; i < nseg; i++) {
|
||||
uint32_t flags;
|
||||
|
||||
flags = GNTMAP_host_map;
|
||||
if (pending_req->operation != BLKIF_OP_READ)
|
||||
flags |= GNTMAP_readonly;
|
||||
gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
|
||||
req->u.rw.seg[i].gref,
|
||||
pending_req->blkif->domid);
|
||||
}
|
||||
|
||||
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
|
||||
BUG_ON(ret);
|
||||
|
||||
/*
|
||||
* Now swizzle the MFN in our domain with the MFN from the other domain
|
||||
* so that when we access vaddr(pending_req,i) it has the contents of
|
||||
* the page from the other domain.
|
||||
*/
|
||||
for (i = 0; i < nseg; i++) {
|
||||
if (unlikely(map[i].status != 0)) {
|
||||
pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
|
||||
map[i].handle = BLKBACK_INVALID_HANDLE;
|
||||
ret |= 1;
|
||||
}
|
||||
|
||||
pending_handle(pending_req, i) = map[i].handle;
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
|
||||
blkbk->pending_page(pending_req, i), false);
|
||||
if (ret) {
|
||||
pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
|
||||
(unsigned long)map[i].dev_bus_addr, ret);
|
||||
/* We could switch over to GNTTABOP_copy */
|
||||
continue;
|
||||
}
|
||||
|
||||
seg[i].buf = map[i].dev_bus_addr |
|
||||
(req->u.rw.seg[i].first_sect << 9);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Completion callback on the bio's. Called as bh->b_end_io()
|
||||
*/
|
||||
|
||||
static void __end_block_io_op(struct pending_req *pending_req, int error)
|
||||
{
|
||||
/* An error fails the entire request. */
|
||||
if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
|
||||
(error == -EOPNOTSUPP)) {
|
||||
pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
|
||||
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
|
||||
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
|
||||
} else if (error) {
|
||||
pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
|
||||
" error=%d\n", error);
|
||||
pending_req->status = BLKIF_RSP_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* If all of the bio's have completed it is time to unmap
|
||||
* the grant references associated with 'request' and provide
|
||||
* the proper response on the ring.
|
||||
*/
|
||||
if (atomic_dec_and_test(&pending_req->pendcnt)) {
|
||||
xen_blkbk_unmap(pending_req);
|
||||
make_response(pending_req->blkif, pending_req->id,
|
||||
pending_req->operation, pending_req->status);
|
||||
xen_blkif_put(pending_req->blkif);
|
||||
free_req(pending_req);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* bio callback.
|
||||
*/
|
||||
static void end_block_io_op(struct bio *bio, int error)
|
||||
{
|
||||
__end_block_io_op(bio->bi_private, error);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Function to copy the from the ring buffer the 'struct blkif_request'
|
||||
* (which has the sectors we want, number of them, grant references, etc),
|
||||
* and transmute it to the block API to hand it over to the proper block disk.
|
||||
*/
|
||||
static int do_block_io_op(struct xen_blkif *blkif)
|
||||
{
|
||||
union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
||||
struct blkif_request req;
|
||||
struct pending_req *pending_req;
|
||||
RING_IDX rc, rp;
|
||||
int more_to_do = 0;
|
||||
|
||||
rc = blk_rings->common.req_cons;
|
||||
rp = blk_rings->common.sring->req_prod;
|
||||
rmb(); /* Ensure we see queued requests up to 'rp'. */
|
||||
|
||||
while (rc != rp) {
|
||||
|
||||
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
|
||||
break;
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
more_to_do = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
pending_req = alloc_req();
|
||||
if (NULL == pending_req) {
|
||||
blkif->st_oo_req++;
|
||||
more_to_do = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (blkif->blk_protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
blk_rings->common.req_cons = ++rc; /* before make_response() */
|
||||
|
||||
/* Apply all sanity checks to /private copy/ of request. */
|
||||
barrier();
|
||||
|
||||
if (dispatch_rw_block_io(blkif, &req, pending_req))
|
||||
break;
|
||||
|
||||
/* Yield point for this unbounded loop. */
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return more_to_do;
|
||||
}
|
||||
|
||||
/*
|
||||
* Transmutation of the 'struct blkif_request' to a proper 'struct bio'
|
||||
* and call the 'submit_bio' to pass it to the underlying storage.
|
||||
*/
|
||||
static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
struct blkif_request *req,
|
||||
struct pending_req *pending_req)
|
||||
{
|
||||
struct phys_req preq;
|
||||
struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
unsigned int nseg;
|
||||
struct bio *bio = NULL;
|
||||
struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
int i, nbio = 0;
|
||||
int operation;
|
||||
struct blk_plug plug;
|
||||
|
||||
switch (req->operation) {
|
||||
case BLKIF_OP_READ:
|
||||
blkif->st_rd_req++;
|
||||
operation = READ;
|
||||
break;
|
||||
case BLKIF_OP_WRITE:
|
||||
blkif->st_wr_req++;
|
||||
operation = WRITE_ODIRECT;
|
||||
break;
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
blkif->st_f_req++;
|
||||
operation = WRITE_FLUSH;
|
||||
break;
|
||||
case BLKIF_OP_WRITE_BARRIER:
|
||||
default:
|
||||
operation = 0; /* make gcc happy */
|
||||
goto fail_response;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check that the number of segments is sane. */
|
||||
nseg = req->nr_segments;
|
||||
if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
|
||||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
|
||||
pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
|
||||
nseg);
|
||||
/* Haven't submitted any bio's yet. */
|
||||
goto fail_response;
|
||||
}
|
||||
|
||||
preq.dev = req->handle;
|
||||
preq.sector_number = req->u.rw.sector_number;
|
||||
preq.nr_sects = 0;
|
||||
|
||||
pending_req->blkif = blkif;
|
||||
pending_req->id = req->id;
|
||||
pending_req->operation = req->operation;
|
||||
pending_req->status = BLKIF_RSP_OKAY;
|
||||
pending_req->nr_pages = nseg;
|
||||
|
||||
for (i = 0; i < nseg; i++) {
|
||||
seg[i].nsec = req->u.rw.seg[i].last_sect -
|
||||
req->u.rw.seg[i].first_sect + 1;
|
||||
if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
|
||||
(req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
|
||||
goto fail_response;
|
||||
preq.nr_sects += seg[i].nsec;
|
||||
|
||||
}
|
||||
|
||||
if (xen_vbd_translate(&preq, blkif, operation) != 0) {
|
||||
pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
|
||||
operation == READ ? "read" : "write",
|
||||
preq.sector_number,
|
||||
preq.sector_number + preq.nr_sects, preq.dev);
|
||||
goto fail_response;
|
||||
}
|
||||
|
||||
/*
|
||||
* This check _MUST_ be done after xen_vbd_translate as the preq.bdev
|
||||
* is set there.
|
||||
*/
|
||||
for (i = 0; i < nseg; i++) {
|
||||
if (((int)preq.sector_number|(int)seg[i].nsec) &
|
||||
((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
|
||||
pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
|
||||
blkif->domid);
|
||||
goto fail_response;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have failed at this point, we need to undo the M2P override,
|
||||
* set gnttab_set_unmap_op on all of the grant references and perform
|
||||
* the hypercall to unmap the grants - that is all done in
|
||||
* xen_blkbk_unmap.
|
||||
*/
|
||||
if (xen_blkbk_map(req, pending_req, seg))
|
||||
goto fail_flush;
|
||||
|
||||
/* This corresponding xen_blkif_put is done in __end_block_io_op */
|
||||
xen_blkif_get(blkif);
|
||||
|
||||
for (i = 0; i < nseg; i++) {
|
||||
while ((bio == NULL) ||
|
||||
(bio_add_page(bio,
|
||||
blkbk->pending_page(pending_req, i),
|
||||
seg[i].nsec << 9,
|
||||
seg[i].buf & ~PAGE_MASK) == 0)) {
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, nseg-i);
|
||||
if (unlikely(bio == NULL))
|
||||
goto fail_put_bio;
|
||||
|
||||
biolist[nbio++] = bio;
|
||||
bio->bi_bdev = preq.bdev;
|
||||
bio->bi_private = pending_req;
|
||||
bio->bi_end_io = end_block_io_op;
|
||||
bio->bi_sector = preq.sector_number;
|
||||
}
|
||||
|
||||
preq.sector_number += seg[i].nsec;
|
||||
}
|
||||
|
||||
/* This will be hit if the operation was a flush. */
|
||||
if (!bio) {
|
||||
BUG_ON(operation != WRITE_FLUSH);
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, 0);
|
||||
if (unlikely(bio == NULL))
|
||||
goto fail_put_bio;
|
||||
|
||||
biolist[nbio++] = bio;
|
||||
bio->bi_bdev = preq.bdev;
|
||||
bio->bi_private = pending_req;
|
||||
bio->bi_end_io = end_block_io_op;
|
||||
}
|
||||
|
||||
/*
|
||||
* We set it one so that the last submit_bio does not have to call
|
||||
* atomic_inc.
|
||||
*/
|
||||
atomic_set(&pending_req->pendcnt, nbio);
|
||||
|
||||
/* Get a reference count for the disk queue and start sending I/O */
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (i = 0; i < nbio; i++)
|
||||
submit_bio(operation, biolist[i]);
|
||||
|
||||
/* Let the I/Os go.. */
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
if (operation == READ)
|
||||
blkif->st_rd_sect += preq.nr_sects;
|
||||
else if (operation == WRITE || operation == WRITE_FLUSH)
|
||||
blkif->st_wr_sect += preq.nr_sects;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_flush:
|
||||
xen_blkbk_unmap(pending_req);
|
||||
fail_response:
|
||||
/* Haven't submitted any bio's yet. */
|
||||
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
|
||||
free_req(pending_req);
|
||||
msleep(1); /* back off a bit */
|
||||
return -EIO;
|
||||
|
||||
fail_put_bio:
|
||||
for (i = 0; i < nbio; i++)
|
||||
bio_put(biolist[i]);
|
||||
__end_block_io_op(pending_req, -EINVAL);
|
||||
msleep(1); /* back off a bit */
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Put a response on the ring on how the operation fared.
|
||||
*/
|
||||
static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
unsigned short op, int st)
|
||||
{
|
||||
struct blkif_response resp;
|
||||
unsigned long flags;
|
||||
union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
||||
int more_to_do = 0;
|
||||
int notify;
|
||||
|
||||
resp.id = id;
|
||||
resp.operation = op;
|
||||
resp.status = st;
|
||||
|
||||
spin_lock_irqsave(&blkif->blk_ring_lock, flags);
|
||||
/* Place on the response ring for the relevant domain. */
|
||||
switch (blkif->blk_protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
blk_rings->common.rsp_prod_pvt++;
|
||||
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
|
||||
if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
|
||||
/*
|
||||
* Tail check for pending requests. Allows frontend to avoid
|
||||
* notifications if requests are already in flight (lower
|
||||
* overheads and promotes batching).
|
||||
*/
|
||||
RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
|
||||
|
||||
} else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
|
||||
more_to_do = 1;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
|
||||
|
||||
if (more_to_do)
|
||||
blkif_notify_work(blkif);
|
||||
if (notify)
|
||||
notify_remote_via_irq(blkif->irq);
|
||||
}
|
||||
|
||||
static int __init xen_blkif_init(void)
|
||||
{
|
||||
int i, mmap_pages;
|
||||
int rc = 0;
|
||||
|
||||
if (!xen_pv_domain())
|
||||
return -ENODEV;
|
||||
|
||||
blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
|
||||
if (!blkbk) {
|
||||
pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
|
||||
blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) *
|
||||
xen_blkif_reqs, GFP_KERNEL);
|
||||
blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
|
||||
mmap_pages, GFP_KERNEL);
|
||||
blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
|
||||
mmap_pages, GFP_KERNEL);
|
||||
|
||||
if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
|
||||
!blkbk->pending_pages) {
|
||||
rc = -ENOMEM;
|
||||
goto out_of_memory;
|
||||
}
|
||||
|
||||
for (i = 0; i < mmap_pages; i++) {
|
||||
blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
|
||||
blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
|
||||
if (blkbk->pending_pages[i] == NULL) {
|
||||
rc = -ENOMEM;
|
||||
goto out_of_memory;
|
||||
}
|
||||
}
|
||||
rc = xen_blkif_interface_init();
|
||||
if (rc)
|
||||
goto failed_init;
|
||||
|
||||
memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
|
||||
|
||||
INIT_LIST_HEAD(&blkbk->pending_free);
|
||||
spin_lock_init(&blkbk->pending_free_lock);
|
||||
init_waitqueue_head(&blkbk->pending_free_wq);
|
||||
|
||||
for (i = 0; i < xen_blkif_reqs; i++)
|
||||
list_add_tail(&blkbk->pending_reqs[i].free_list,
|
||||
&blkbk->pending_free);
|
||||
|
||||
rc = xen_blkif_xenbus_init();
|
||||
if (rc)
|
||||
goto failed_init;
|
||||
|
||||
return 0;
|
||||
|
||||
out_of_memory:
|
||||
pr_alert(DRV_PFX "%s: out of memory\n", __func__);
|
||||
failed_init:
|
||||
kfree(blkbk->pending_reqs);
|
||||
kfree(blkbk->pending_grant_handles);
|
||||
for (i = 0; i < mmap_pages; i++) {
|
||||
if (blkbk->pending_pages[i])
|
||||
__free_page(blkbk->pending_pages[i]);
|
||||
}
|
||||
kfree(blkbk->pending_pages);
|
||||
kfree(blkbk);
|
||||
blkbk = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
module_init(xen_blkif_init);
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
233
drivers/block/xen-blkback/common.h
Normal file
233
drivers/block/xen-blkback/common.h
Normal file
@ -0,0 +1,233 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
|
||||
#define __XEN_BLKIF__BACKEND__COMMON_H__
|
||||
|
||||
#include <linux/version.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/io.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <xen/grant_table.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include <xen/interface/io/ring.h>
|
||||
#include <xen/interface/io/blkif.h>
|
||||
#include <xen/interface/io/protocols.h>
|
||||
|
||||
#define DRV_PFX "xen-blkback:"
|
||||
#define DPRINTK(fmt, args...) \
|
||||
pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
|
||||
__func__, __LINE__, ##args)
|
||||
|
||||
|
||||
/* Not a real protocol. Used to generate ring structs which contain
|
||||
* the elements common to all protocols only. This way we get a
|
||||
* compiler-checkable way to use common struct elements, so we can
|
||||
* avoid using switch(protocol) in a number of places. */
|
||||
struct blkif_common_request {
|
||||
char dummy;
|
||||
};
|
||||
struct blkif_common_response {
|
||||
char dummy;
|
||||
};
|
||||
|
||||
/* i386 protocol version */
|
||||
#pragma pack(push, 4)
|
||||
struct blkif_x86_32_request {
|
||||
uint8_t operation; /* BLKIF_OP_??? */
|
||||
uint8_t nr_segments; /* number of segments */
|
||||
blkif_vdev_t handle; /* only for read/write requests */
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
||||
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
};
|
||||
struct blkif_x86_32_response {
|
||||
uint64_t id; /* copied from request */
|
||||
uint8_t operation; /* copied from request */
|
||||
int16_t status; /* BLKIF_RSP_??? */
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
/* x86_64 protocol version */
|
||||
struct blkif_x86_64_request {
|
||||
uint8_t operation; /* BLKIF_OP_??? */
|
||||
uint8_t nr_segments; /* number of segments */
|
||||
blkif_vdev_t handle; /* only for read/write requests */
|
||||
uint64_t __attribute__((__aligned__(8))) id;
|
||||
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
||||
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
};
|
||||
struct blkif_x86_64_response {
|
||||
uint64_t __attribute__((__aligned__(8))) id;
|
||||
uint8_t operation; /* copied from request */
|
||||
int16_t status; /* BLKIF_RSP_??? */
|
||||
};
|
||||
|
||||
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
|
||||
struct blkif_common_response);
|
||||
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
|
||||
struct blkif_x86_32_response);
|
||||
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
|
||||
struct blkif_x86_64_response);
|
||||
|
||||
union blkif_back_rings {
|
||||
struct blkif_back_ring native;
|
||||
struct blkif_common_back_ring common;
|
||||
struct blkif_x86_32_back_ring x86_32;
|
||||
struct blkif_x86_64_back_ring x86_64;
|
||||
};
|
||||
|
||||
enum blkif_protocol {
|
||||
BLKIF_PROTOCOL_NATIVE = 1,
|
||||
BLKIF_PROTOCOL_X86_32 = 2,
|
||||
BLKIF_PROTOCOL_X86_64 = 3,
|
||||
};
|
||||
|
||||
struct xen_vbd {
|
||||
/* What the domain refers to this vbd as. */
|
||||
blkif_vdev_t handle;
|
||||
/* Non-zero -> read-only */
|
||||
unsigned char readonly;
|
||||
/* VDISK_xxx */
|
||||
unsigned char type;
|
||||
/* phys device that this vbd maps to. */
|
||||
u32 pdevice;
|
||||
struct block_device *bdev;
|
||||
/* Cached size parameter. */
|
||||
sector_t size;
|
||||
bool flush_support;
|
||||
};
|
||||
|
||||
struct backend_info;
|
||||
|
||||
struct xen_blkif {
|
||||
/* Unique identifier for this interface. */
|
||||
domid_t domid;
|
||||
unsigned int handle;
|
||||
/* Physical parameters of the comms window. */
|
||||
unsigned int irq;
|
||||
/* Comms information. */
|
||||
enum blkif_protocol blk_protocol;
|
||||
union blkif_back_rings blk_rings;
|
||||
struct vm_struct *blk_ring_area;
|
||||
/* The VBD attached to this interface. */
|
||||
struct xen_vbd vbd;
|
||||
/* Back pointer to the backend_info. */
|
||||
struct backend_info *be;
|
||||
/* Private fields. */
|
||||
spinlock_t blk_ring_lock;
|
||||
atomic_t refcnt;
|
||||
|
||||
wait_queue_head_t wq;
|
||||
/* One thread per one blkif. */
|
||||
struct task_struct *xenblkd;
|
||||
unsigned int waiting_reqs;
|
||||
|
||||
/* statistics */
|
||||
unsigned long st_print;
|
||||
int st_rd_req;
|
||||
int st_wr_req;
|
||||
int st_oo_req;
|
||||
int st_f_req;
|
||||
int st_rd_sect;
|
||||
int st_wr_sect;
|
||||
|
||||
wait_queue_head_t waiting_to_free;
|
||||
|
||||
grant_handle_t shmem_handle;
|
||||
grant_ref_t shmem_ref;
|
||||
};
|
||||
|
||||
|
||||
#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
|
||||
(_v)->bdev->bd_part->nr_sects : \
|
||||
get_capacity((_v)->bdev->bd_disk))
|
||||
|
||||
#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
|
||||
#define xen_blkif_put(_b) \
|
||||
do { \
|
||||
if (atomic_dec_and_test(&(_b)->refcnt)) \
|
||||
wake_up(&(_b)->waiting_to_free);\
|
||||
} while (0)
|
||||
|
||||
struct phys_req {
|
||||
unsigned short dev;
|
||||
unsigned short nr_sects;
|
||||
struct block_device *bdev;
|
||||
blkif_sector_t sector_number;
|
||||
};
|
||||
int xen_blkif_interface_init(void);
|
||||
|
||||
int xen_blkif_xenbus_init(void);
|
||||
|
||||
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
|
||||
int xen_blkif_schedule(void *arg);
|
||||
|
||||
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
|
||||
struct backend_info *be, int state);
|
||||
|
||||
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
|
||||
|
||||
static inline void blkif_get_x86_32_req(struct blkif_request *dst,
|
||||
struct blkif_x86_32_request *src)
|
||||
{
|
||||
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
dst->operation = src->operation;
|
||||
dst->nr_segments = src->nr_segments;
|
||||
dst->handle = src->handle;
|
||||
dst->id = src->id;
|
||||
dst->u.rw.sector_number = src->sector_number;
|
||||
barrier();
|
||||
if (n > dst->nr_segments)
|
||||
n = dst->nr_segments;
|
||||
for (i = 0; i < n; i++)
|
||||
dst->u.rw.seg[i] = src->seg[i];
|
||||
}
|
||||
|
||||
static inline void blkif_get_x86_64_req(struct blkif_request *dst,
|
||||
struct blkif_x86_64_request *src)
|
||||
{
|
||||
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
dst->operation = src->operation;
|
||||
dst->nr_segments = src->nr_segments;
|
||||
dst->handle = src->handle;
|
||||
dst->id = src->id;
|
||||
dst->u.rw.sector_number = src->sector_number;
|
||||
barrier();
|
||||
if (n > dst->nr_segments)
|
||||
n = dst->nr_segments;
|
||||
for (i = 0; i < n; i++)
|
||||
dst->u.rw.seg[i] = src->seg[i];
|
||||
}
|
||||
|
||||
#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
|
768
drivers/block/xen-blkback/xenbus.c
Normal file
768
drivers/block/xen-blkback/xenbus.c
Normal file
@ -0,0 +1,768 @@
|
||||
/* Xenbus code for blkif backend
|
||||
Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
||||
Copyright (C) 2005 XenSource Ltd
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/grant_table.h>
|
||||
#include "common.h"
|
||||
|
||||
struct backend_info {
|
||||
struct xenbus_device *dev;
|
||||
struct xen_blkif *blkif;
|
||||
struct xenbus_watch backend_watch;
|
||||
unsigned major;
|
||||
unsigned minor;
|
||||
char *mode;
|
||||
};
|
||||
|
||||
static struct kmem_cache *xen_blkif_cachep;
|
||||
static void connect(struct backend_info *);
|
||||
static int connect_ring(struct backend_info *);
|
||||
static void backend_changed(struct xenbus_watch *, const char **,
|
||||
unsigned int);
|
||||
|
||||
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
|
||||
{
|
||||
return be->dev;
|
||||
}
|
||||
|
||||
static int blkback_name(struct xen_blkif *blkif, char *buf)
|
||||
{
|
||||
char *devpath, *devname;
|
||||
struct xenbus_device *dev = blkif->be->dev;
|
||||
|
||||
devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
|
||||
if (IS_ERR(devpath))
|
||||
return PTR_ERR(devpath);
|
||||
|
||||
devname = strstr(devpath, "/dev/");
|
||||
if (devname != NULL)
|
||||
devname += strlen("/dev/");
|
||||
else
|
||||
devname = devpath;
|
||||
|
||||
snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
|
||||
kfree(devpath);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xen_update_blkif_status(struct xen_blkif *blkif)
|
||||
{
|
||||
int err;
|
||||
char name[TASK_COMM_LEN];
|
||||
|
||||
/* Not ready to connect? */
|
||||
if (!blkif->irq || !blkif->vbd.bdev)
|
||||
return;
|
||||
|
||||
/* Already connected? */
|
||||
if (blkif->be->dev->state == XenbusStateConnected)
|
||||
return;
|
||||
|
||||
/* Attempt to connect: exit if we fail to. */
|
||||
connect(blkif->be);
|
||||
if (blkif->be->dev->state != XenbusStateConnected)
|
||||
return;
|
||||
|
||||
err = blkback_name(blkif, name);
|
||||
if (err) {
|
||||
xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
|
||||
return;
|
||||
}
|
||||
|
||||
err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
|
||||
if (err) {
|
||||
xenbus_dev_error(blkif->be->dev, err, "block flush");
|
||||
return;
|
||||
}
|
||||
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
|
||||
|
||||
blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name);
|
||||
if (IS_ERR(blkif->xenblkd)) {
|
||||
err = PTR_ERR(blkif->xenblkd);
|
||||
blkif->xenblkd = NULL;
|
||||
xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
|
||||
}
|
||||
}
|
||||
|
||||
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
||||
{
|
||||
struct xen_blkif *blkif;
|
||||
|
||||
blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL);
|
||||
if (!blkif)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memset(blkif, 0, sizeof(*blkif));
|
||||
blkif->domid = domid;
|
||||
spin_lock_init(&blkif->blk_ring_lock);
|
||||
atomic_set(&blkif->refcnt, 1);
|
||||
init_waitqueue_head(&blkif->wq);
|
||||
blkif->st_print = jiffies;
|
||||
init_waitqueue_head(&blkif->waiting_to_free);
|
||||
|
||||
return blkif;
|
||||
}
|
||||
|
||||
static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page)
|
||||
{
|
||||
struct gnttab_map_grant_ref op;
|
||||
|
||||
gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
||||
GNTMAP_host_map, shared_page, blkif->domid);
|
||||
|
||||
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
|
||||
BUG();
|
||||
|
||||
if (op.status) {
|
||||
DPRINTK("Grant table operation failure !\n");
|
||||
return op.status;
|
||||
}
|
||||
|
||||
blkif->shmem_ref = shared_page;
|
||||
blkif->shmem_handle = op.handle;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unmap_frontend_page(struct xen_blkif *blkif)
|
||||
{
|
||||
struct gnttab_unmap_grant_ref op;
|
||||
|
||||
gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
||||
GNTMAP_host_map, blkif->shmem_handle);
|
||||
|
||||
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
||||
BUG();
|
||||
}
|
||||
|
||||
static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
|
||||
unsigned int evtchn)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Already connected through? */
|
||||
if (blkif->irq)
|
||||
return 0;
|
||||
|
||||
blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE);
|
||||
if (!blkif->blk_ring_area)
|
||||
return -ENOMEM;
|
||||
|
||||
err = map_frontend_page(blkif, shared_page);
|
||||
if (err) {
|
||||
free_vm_area(blkif->blk_ring_area);
|
||||
return err;
|
||||
}
|
||||
|
||||
switch (blkif->blk_protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
{
|
||||
struct blkif_sring *sring;
|
||||
sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
|
||||
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
|
||||
break;
|
||||
}
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
{
|
||||
struct blkif_x86_32_sring *sring_x86_32;
|
||||
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
|
||||
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
|
||||
break;
|
||||
}
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
{
|
||||
struct blkif_x86_64_sring *sring_x86_64;
|
||||
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
|
||||
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
|
||||
xen_blkif_be_int, 0,
|
||||
"blkif-backend", blkif);
|
||||
if (err < 0) {
|
||||
unmap_frontend_page(blkif);
|
||||
free_vm_area(blkif->blk_ring_area);
|
||||
blkif->blk_rings.common.sring = NULL;
|
||||
return err;
|
||||
}
|
||||
blkif->irq = err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xen_blkif_disconnect(struct xen_blkif *blkif)
|
||||
{
|
||||
if (blkif->xenblkd) {
|
||||
kthread_stop(blkif->xenblkd);
|
||||
blkif->xenblkd = NULL;
|
||||
}
|
||||
|
||||
atomic_dec(&blkif->refcnt);
|
||||
wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
|
||||
atomic_inc(&blkif->refcnt);
|
||||
|
||||
if (blkif->irq) {
|
||||
unbind_from_irqhandler(blkif->irq, blkif);
|
||||
blkif->irq = 0;
|
||||
}
|
||||
|
||||
if (blkif->blk_rings.common.sring) {
|
||||
unmap_frontend_page(blkif);
|
||||
free_vm_area(blkif->blk_ring_area);
|
||||
blkif->blk_rings.common.sring = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void xen_blkif_free(struct xen_blkif *blkif)
|
||||
{
|
||||
if (!atomic_dec_and_test(&blkif->refcnt))
|
||||
BUG();
|
||||
kmem_cache_free(xen_blkif_cachep, blkif);
|
||||
}
|
||||
|
||||
int __init xen_blkif_interface_init(void)
|
||||
{
|
||||
xen_blkif_cachep = kmem_cache_create("blkif_cache",
|
||||
sizeof(struct xen_blkif),
|
||||
0, 0, NULL);
|
||||
if (!xen_blkif_cachep)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* sysfs interface for VBD I/O requests
|
||||
*/
|
||||
|
||||
#define VBD_SHOW(name, format, args...) \
|
||||
static ssize_t show_##name(struct device *_dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
struct xenbus_device *dev = to_xenbus_device(_dev); \
|
||||
struct backend_info *be = dev_get_drvdata(&dev->dev); \
|
||||
\
|
||||
return sprintf(buf, format, ##args); \
|
||||
} \
|
||||
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
||||
|
||||
VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
|
||||
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
|
||||
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
|
||||
VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
|
||||
VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
|
||||
VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
|
||||
|
||||
static struct attribute *xen_vbdstat_attrs[] = {
|
||||
&dev_attr_oo_req.attr,
|
||||
&dev_attr_rd_req.attr,
|
||||
&dev_attr_wr_req.attr,
|
||||
&dev_attr_f_req.attr,
|
||||
&dev_attr_rd_sect.attr,
|
||||
&dev_attr_wr_sect.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group xen_vbdstat_group = {
|
||||
.name = "statistics",
|
||||
.attrs = xen_vbdstat_attrs,
|
||||
};
|
||||
|
||||
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
|
||||
VBD_SHOW(mode, "%s\n", be->mode);
|
||||
|
||||
int xenvbd_sysfs_addif(struct xenbus_device *dev)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = device_create_file(&dev->dev, &dev_attr_physical_device);
|
||||
if (error)
|
||||
goto fail1;
|
||||
|
||||
error = device_create_file(&dev->dev, &dev_attr_mode);
|
||||
if (error)
|
||||
goto fail2;
|
||||
|
||||
error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
|
||||
if (error)
|
||||
goto fail3;
|
||||
|
||||
return 0;
|
||||
|
||||
fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
|
||||
fail2: device_remove_file(&dev->dev, &dev_attr_mode);
|
||||
fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
|
||||
return error;
|
||||
}
|
||||
|
||||
void xenvbd_sysfs_delif(struct xenbus_device *dev)
|
||||
{
|
||||
sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
|
||||
device_remove_file(&dev->dev, &dev_attr_mode);
|
||||
device_remove_file(&dev->dev, &dev_attr_physical_device);
|
||||
}
|
||||
|
||||
|
||||
static void xen_vbd_free(struct xen_vbd *vbd)
|
||||
{
|
||||
if (vbd->bdev)
|
||||
blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
|
||||
vbd->bdev = NULL;
|
||||
}
|
||||
|
||||
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
|
||||
unsigned major, unsigned minor, int readonly,
|
||||
int cdrom)
|
||||
{
|
||||
struct xen_vbd *vbd;
|
||||
struct block_device *bdev;
|
||||
struct request_queue *q;
|
||||
|
||||
vbd = &blkif->vbd;
|
||||
vbd->handle = handle;
|
||||
vbd->readonly = readonly;
|
||||
vbd->type = 0;
|
||||
|
||||
vbd->pdevice = MKDEV(major, minor);
|
||||
|
||||
bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
|
||||
FMODE_READ : FMODE_WRITE, NULL);
|
||||
|
||||
if (IS_ERR(bdev)) {
|
||||
DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
|
||||
vbd->pdevice);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
vbd->bdev = bdev;
|
||||
vbd->size = vbd_sz(vbd);
|
||||
|
||||
if (vbd->bdev->bd_disk == NULL) {
|
||||
DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
|
||||
vbd->pdevice);
|
||||
xen_vbd_free(vbd);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
|
||||
vbd->type |= VDISK_CDROM;
|
||||
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
|
||||
vbd->type |= VDISK_REMOVABLE;
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (q && q->flush_flags)
|
||||
vbd->flush_support = true;
|
||||
|
||||
DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
|
||||
handle, blkif->domid);
|
||||
return 0;
|
||||
}
|
||||
static int xen_blkbk_remove(struct xenbus_device *dev)
|
||||
{
|
||||
struct backend_info *be = dev_get_drvdata(&dev->dev);
|
||||
|
||||
DPRINTK("");
|
||||
|
||||
if (be->major || be->minor)
|
||||
xenvbd_sysfs_delif(dev);
|
||||
|
||||
if (be->backend_watch.node) {
|
||||
unregister_xenbus_watch(&be->backend_watch);
|
||||
kfree(be->backend_watch.node);
|
||||
be->backend_watch.node = NULL;
|
||||
}
|
||||
|
||||
if (be->blkif) {
|
||||
xen_blkif_disconnect(be->blkif);
|
||||
xen_vbd_free(&be->blkif->vbd);
|
||||
xen_blkif_free(be->blkif);
|
||||
be->blkif = NULL;
|
||||
}
|
||||
|
||||
kfree(be);
|
||||
dev_set_drvdata(&dev->dev, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
|
||||
struct backend_info *be, int state)
|
||||
{
|
||||
struct xenbus_device *dev = be->dev;
|
||||
int err;
|
||||
|
||||
err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
|
||||
"%d", state);
|
||||
if (err)
|
||||
xenbus_dev_fatal(dev, err, "writing feature-flush-cache");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Entry point to this code when a new device is created. Allocate the basic
|
||||
* structures, and watch the store waiting for the hotplug scripts to tell us
|
||||
* the device's physical major and minor numbers. Switch to InitWait.
|
||||
*/
|
||||
static int xen_blkbk_probe(struct xenbus_device *dev,
|
||||
const struct xenbus_device_id *id)
|
||||
{
|
||||
int err;
|
||||
struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
||||
GFP_KERNEL);
|
||||
if (!be) {
|
||||
xenbus_dev_fatal(dev, -ENOMEM,
|
||||
"allocating backend structure");
|
||||
return -ENOMEM;
|
||||
}
|
||||
be->dev = dev;
|
||||
dev_set_drvdata(&dev->dev, be);
|
||||
|
||||
be->blkif = xen_blkif_alloc(dev->otherend_id);
|
||||
if (IS_ERR(be->blkif)) {
|
||||
err = PTR_ERR(be->blkif);
|
||||
be->blkif = NULL;
|
||||
xenbus_dev_fatal(dev, err, "creating block interface");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* setup back pointer */
|
||||
be->blkif->be = be;
|
||||
|
||||
err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
|
||||
"%s/%s", dev->nodename, "physical-device");
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = xenbus_switch_state(dev, XenbusStateInitWait);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
DPRINTK("failed");
|
||||
xen_blkbk_remove(dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Callback received when the hotplug scripts have placed the physical-device
|
||||
* node. Read it and the mode node, and create a vbd. If the frontend is
|
||||
* ready, connect.
|
||||
*/
|
||||
static void backend_changed(struct xenbus_watch *watch,
|
||||
const char **vec, unsigned int len)
|
||||
{
|
||||
int err;
|
||||
unsigned major;
|
||||
unsigned minor;
|
||||
struct backend_info *be
|
||||
= container_of(watch, struct backend_info, backend_watch);
|
||||
struct xenbus_device *dev = be->dev;
|
||||
int cdrom = 0;
|
||||
char *device_type;
|
||||
|
||||
DPRINTK("");
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
|
||||
&major, &minor);
|
||||
if (XENBUS_EXIST_ERR(err)) {
|
||||
/*
|
||||
* Since this watch will fire once immediately after it is
|
||||
* registered, we expect this. Ignore it, and wait for the
|
||||
* hotplug scripts.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
if (err != 2) {
|
||||
xenbus_dev_fatal(dev, err, "reading physical-device");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((be->major || be->minor) &&
|
||||
((be->major != major) || (be->minor != minor))) {
|
||||
pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
|
||||
be->major, be->minor, major, minor);
|
||||
return;
|
||||
}
|
||||
|
||||
be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
|
||||
if (IS_ERR(be->mode)) {
|
||||
err = PTR_ERR(be->mode);
|
||||
be->mode = NULL;
|
||||
xenbus_dev_fatal(dev, err, "reading mode");
|
||||
return;
|
||||
}
|
||||
|
||||
device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
|
||||
if (!IS_ERR(device_type)) {
|
||||
cdrom = strcmp(device_type, "cdrom") == 0;
|
||||
kfree(device_type);
|
||||
}
|
||||
|
||||
if (be->major == 0 && be->minor == 0) {
|
||||
/* Front end dir is a number, which is used as the handle. */
|
||||
|
||||
char *p = strrchr(dev->otherend, '/') + 1;
|
||||
long handle;
|
||||
err = strict_strtoul(p, 0, &handle);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
be->major = major;
|
||||
be->minor = minor;
|
||||
|
||||
err = xen_vbd_create(be->blkif, handle, major, minor,
|
||||
(NULL == strchr(be->mode, 'w')), cdrom);
|
||||
if (err) {
|
||||
be->major = 0;
|
||||
be->minor = 0;
|
||||
xenbus_dev_fatal(dev, err, "creating vbd structure");
|
||||
return;
|
||||
}
|
||||
|
||||
err = xenvbd_sysfs_addif(dev);
|
||||
if (err) {
|
||||
xen_vbd_free(&be->blkif->vbd);
|
||||
be->major = 0;
|
||||
be->minor = 0;
|
||||
xenbus_dev_fatal(dev, err, "creating sysfs entries");
|
||||
return;
|
||||
}
|
||||
|
||||
/* We're potentially connected now */
|
||||
xen_update_blkif_status(be->blkif);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Callback received when the frontend's state changes.
|
||||
*/
|
||||
static void frontend_changed(struct xenbus_device *dev,
|
||||
enum xenbus_state frontend_state)
|
||||
{
|
||||
struct backend_info *be = dev_get_drvdata(&dev->dev);
|
||||
int err;
|
||||
|
||||
DPRINTK("%s", xenbus_strstate(frontend_state));
|
||||
|
||||
switch (frontend_state) {
|
||||
case XenbusStateInitialising:
|
||||
if (dev->state == XenbusStateClosed) {
|
||||
pr_info(DRV_PFX "%s: prepare for reconnect\n",
|
||||
dev->nodename);
|
||||
xenbus_switch_state(dev, XenbusStateInitWait);
|
||||
}
|
||||
break;
|
||||
|
||||
case XenbusStateInitialised:
|
||||
case XenbusStateConnected:
|
||||
/*
|
||||
* Ensure we connect even when two watches fire in
|
||||
* close successsion and we miss the intermediate value
|
||||
* of frontend_state.
|
||||
*/
|
||||
if (dev->state == XenbusStateConnected)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Enforce precondition before potential leak point.
|
||||
* blkif_disconnect() is idempotent.
|
||||
*/
|
||||
xen_blkif_disconnect(be->blkif);
|
||||
|
||||
err = connect_ring(be);
|
||||
if (err)
|
||||
break;
|
||||
xen_update_blkif_status(be->blkif);
|
||||
break;
|
||||
|
||||
case XenbusStateClosing:
|
||||
xen_blkif_disconnect(be->blkif);
|
||||
xenbus_switch_state(dev, XenbusStateClosing);
|
||||
break;
|
||||
|
||||
case XenbusStateClosed:
|
||||
xenbus_switch_state(dev, XenbusStateClosed);
|
||||
if (xenbus_dev_is_online(dev))
|
||||
break;
|
||||
/* fall through if not online */
|
||||
case XenbusStateUnknown:
|
||||
/* implies blkif_disconnect() via blkback_remove() */
|
||||
device_unregister(&dev->dev);
|
||||
break;
|
||||
|
||||
default:
|
||||
xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
||||
frontend_state);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ** Connection ** */
|
||||
|
||||
|
||||
/*
|
||||
* Write the physical details regarding the block device to the store, and
|
||||
* switch to Connected state.
|
||||
*/
|
||||
static void connect(struct backend_info *be)
|
||||
{
|
||||
struct xenbus_transaction xbt;
|
||||
int err;
|
||||
struct xenbus_device *dev = be->dev;
|
||||
|
||||
DPRINTK("%s", dev->otherend);
|
||||
|
||||
/* Supply the information about the device the frontend needs */
|
||||
again:
|
||||
err = xenbus_transaction_start(&xbt);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "starting transaction");
|
||||
return;
|
||||
}
|
||||
|
||||
err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
|
||||
if (err)
|
||||
goto abort;
|
||||
|
||||
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
|
||||
(unsigned long long)vbd_sz(&be->blkif->vbd));
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "writing %s/sectors",
|
||||
dev->nodename);
|
||||
goto abort;
|
||||
}
|
||||
|
||||
/* FIXME: use a typename instead */
|
||||
err = xenbus_printf(xbt, dev->nodename, "info", "%u",
|
||||
be->blkif->vbd.type |
|
||||
(be->blkif->vbd.readonly ? VDISK_READONLY : 0));
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "writing %s/info",
|
||||
dev->nodename);
|
||||
goto abort;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
|
||||
(unsigned long)
|
||||
bdev_logical_block_size(be->blkif->vbd.bdev));
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "writing %s/sector-size",
|
||||
dev->nodename);
|
||||
goto abort;
|
||||
}
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
if (err)
|
||||
xenbus_dev_fatal(dev, err, "ending transaction");
|
||||
|
||||
err = xenbus_switch_state(dev, XenbusStateConnected);
|
||||
if (err)
|
||||
xenbus_dev_fatal(dev, err, "switching to Connected state",
|
||||
dev->nodename);
|
||||
|
||||
return;
|
||||
abort:
|
||||
xenbus_transaction_end(xbt, 1);
|
||||
}
|
||||
|
||||
|
||||
static int connect_ring(struct backend_info *be)
|
||||
{
|
||||
struct xenbus_device *dev = be->dev;
|
||||
unsigned long ring_ref;
|
||||
unsigned int evtchn;
|
||||
char protocol[64] = "";
|
||||
int err;
|
||||
|
||||
DPRINTK("%s", dev->otherend);
|
||||
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
|
||||
&ring_ref, "event-channel", "%u", &evtchn, NULL);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err,
|
||||
"reading %s/ring-ref and event-channel",
|
||||
dev->otherend);
|
||||
return err;
|
||||
}
|
||||
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
|
||||
"%63s", protocol, NULL);
|
||||
if (err)
|
||||
strcpy(protocol, "unspecified, assuming native");
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
|
||||
else {
|
||||
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
||||
return -1;
|
||||
}
|
||||
pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
|
||||
ring_ref, evtchn, be->blkif->blk_protocol, protocol);
|
||||
|
||||
/* Map the shared frame, irq etc. */
|
||||
err = xen_blkif_map(be->blkif, ring_ref, evtchn);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
|
||||
ring_ref, evtchn);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ** Driver Registration ** */
|
||||
|
||||
|
||||
static const struct xenbus_device_id xen_blkbk_ids[] = {
|
||||
{ "vbd" },
|
||||
{ "" }
|
||||
};
|
||||
|
||||
|
||||
static struct xenbus_driver xen_blkbk = {
|
||||
.name = "vbd",
|
||||
.owner = THIS_MODULE,
|
||||
.ids = xen_blkbk_ids,
|
||||
.probe = xen_blkbk_probe,
|
||||
.remove = xen_blkbk_remove,
|
||||
.otherend_changed = frontend_changed
|
||||
};
|
||||
|
||||
|
||||
int xen_blkif_xenbus_init(void)
|
||||
{
|
||||
return xenbus_register_backend(&xen_blkbk);
|
||||
}
|
@ -97,6 +97,7 @@ struct blkfront_info
|
||||
struct blk_shadow shadow[BLK_RING_SIZE];
|
||||
unsigned long shadow_free;
|
||||
unsigned int feature_flush;
|
||||
unsigned int flush_op;
|
||||
int is_ready;
|
||||
};
|
||||
|
||||
@ -250,8 +251,7 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
|
||||
/*
|
||||
* Generate a Xen blkfront IO request from a blk layer request. Reads
|
||||
* and writes are handled as expected. Since we lack a loose flush
|
||||
* request, we map flushes into a full ordered barrier.
|
||||
* and writes are handled as expected.
|
||||
*
|
||||
* @req: a request struct
|
||||
*/
|
||||
@ -293,14 +293,13 @@ static int blkif_queue_request(struct request *req)
|
||||
|
||||
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
|
||||
/*
|
||||
* Ideally we could just do an unordered
|
||||
* flush-to-disk, but all we have is a full write
|
||||
* barrier at the moment. However, a barrier write is
|
||||
* Ideally we can do an unordered flush-to-disk. In case the
|
||||
* backend onlysupports barriers, use that. A barrier request
|
||||
* a superset of FUA, so we can implement it the same
|
||||
* way. (It's also a FLUSH+FUA, since it is
|
||||
* guaranteed ordered WRT previous writes.)
|
||||
*/
|
||||
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
|
||||
ring_req->operation = info->flush_op;
|
||||
}
|
||||
|
||||
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
|
||||
@ -433,8 +432,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
||||
static void xlvbd_flush(struct blkfront_info *info)
|
||||
{
|
||||
blk_queue_flush(info->rq, info->feature_flush);
|
||||
printk(KERN_INFO "blkfront: %s: barriers %s\n",
|
||||
printk(KERN_INFO "blkfront: %s: %s: %s\n",
|
||||
info->gd->disk_name,
|
||||
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
||||
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
|
||||
"flush diskcache" : "barrier or flush"),
|
||||
info->feature_flush ? "enabled" : "disabled");
|
||||
}
|
||||
|
||||
@ -720,15 +722,20 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
|
||||
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
|
||||
switch (bret->operation) {
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
case BLKIF_OP_WRITE_BARRIER:
|
||||
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
||||
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
|
||||
printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
|
||||
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
||||
"barrier" : "flush disk cache",
|
||||
info->gd->disk_name);
|
||||
error = -EOPNOTSUPP;
|
||||
}
|
||||
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
|
||||
info->shadow[id].req.nr_segments == 0)) {
|
||||
printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
|
||||
printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
|
||||
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
||||
"barrier" : "flush disk cache",
|
||||
info->gd->disk_name);
|
||||
error = -EOPNOTSUPP;
|
||||
}
|
||||
@ -736,6 +743,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
if (error == -EOPNOTSUPP)
|
||||
error = 0;
|
||||
info->feature_flush = 0;
|
||||
info->flush_op = 0;
|
||||
xlvbd_flush(info);
|
||||
}
|
||||
/* fall through */
|
||||
@ -1100,7 +1108,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
unsigned long sector_size;
|
||||
unsigned int binfo;
|
||||
int err;
|
||||
int barrier;
|
||||
int barrier, flush;
|
||||
|
||||
switch (info->connected) {
|
||||
case BLKIF_STATE_CONNECTED:
|
||||
@ -1140,8 +1148,11 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
return;
|
||||
}
|
||||
|
||||
info->feature_flush = 0;
|
||||
info->flush_op = 0;
|
||||
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"feature-barrier", "%lu", &barrier,
|
||||
"feature-barrier", "%d", &barrier,
|
||||
NULL);
|
||||
|
||||
/*
|
||||
@ -1151,10 +1162,22 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
*
|
||||
* If there are barriers, then we use flush.
|
||||
*/
|
||||
info->feature_flush = 0;
|
||||
|
||||
if (!err && barrier)
|
||||
if (!err && barrier) {
|
||||
info->feature_flush = REQ_FLUSH | REQ_FUA;
|
||||
info->flush_op = BLKIF_OP_WRITE_BARRIER;
|
||||
}
|
||||
/*
|
||||
* And if there is "feature-flush-cache" use that above
|
||||
* barriers.
|
||||
*/
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"feature-flush-cache", "%d", &flush,
|
||||
NULL);
|
||||
|
||||
if (!err && flush) {
|
||||
info->feature_flush = REQ_FLUSH;
|
||||
info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
|
||||
}
|
||||
|
||||
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
||||
if (err) {
|
||||
|
@ -38,7 +38,7 @@
|
||||
|
||||
/* Although the Linux source code makes a difference between
|
||||
generic endianness and the bitfields' endianness, there is no
|
||||
architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
|
||||
architecture as of Linux-2.6.24-rc4 where the bitfields' endianness
|
||||
does not match the generic endianness. */
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
@ -53,7 +53,7 @@
|
||||
|
||||
|
||||
extern const char *drbd_buildtag(void);
|
||||
#define REL_VERSION "8.3.10"
|
||||
#define REL_VERSION "8.3.11"
|
||||
#define API_VERSION 88
|
||||
#define PRO_VERSION_MIN 86
|
||||
#define PRO_VERSION_MAX 96
|
||||
@ -195,7 +195,7 @@ enum drbd_conns {
|
||||
C_WF_REPORT_PARAMS, /* we have a socket */
|
||||
C_CONNECTED, /* we have introduced each other */
|
||||
C_STARTING_SYNC_S, /* starting full sync by admin request. */
|
||||
C_STARTING_SYNC_T, /* stariing full sync by admin request. */
|
||||
C_STARTING_SYNC_T, /* starting full sync by admin request. */
|
||||
C_WF_BITMAP_S,
|
||||
C_WF_BITMAP_T,
|
||||
C_WF_SYNC_UUID,
|
||||
@ -236,7 +236,7 @@ union drbd_state {
|
||||
* pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
|
||||
* even though we transmit as "cpu_to_be32(state)",
|
||||
* the offsets of the bitfields still need to be swapped
|
||||
* on different endianess.
|
||||
* on different endianness.
|
||||
*/
|
||||
struct {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
@ -266,7 +266,7 @@ union drbd_state {
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
#else
|
||||
# error "this endianess is not supported"
|
||||
# error "this endianness is not supported"
|
||||
#endif
|
||||
};
|
||||
unsigned int i;
|
||||
|
@ -30,7 +30,7 @@ enum packet_types {
|
||||
int tag_and_len ## member;
|
||||
#include "linux/drbd_nl.h"
|
||||
|
||||
/* declate tag-list-sizes */
|
||||
/* declare tag-list-sizes */
|
||||
static const int tag_list_sizes[] = {
|
||||
#define NL_PACKET(name, number, fields) 2 fields ,
|
||||
#define NL_INTEGER(pn, pr, member) + 4 + 4
|
||||
|
@ -139,9 +139,9 @@ write intent log information, three of which are mentioned here.
|
||||
* .list is on one of three lists:
|
||||
* in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
|
||||
* lru: unused but ready to be reused or recycled
|
||||
* (ts_refcnt == 0, lc_number != LC_FREE),
|
||||
* (lc_refcnt == 0, lc_number != LC_FREE),
|
||||
* free: unused but ready to be recycled
|
||||
* (ts_refcnt == 0, lc_number == LC_FREE),
|
||||
* (lc_refcnt == 0, lc_number == LC_FREE),
|
||||
*
|
||||
* an element is said to be "in the active set",
|
||||
* if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
|
||||
@ -160,8 +160,8 @@ struct lc_element {
|
||||
struct hlist_node colision;
|
||||
struct list_head list; /* LRU list or free list */
|
||||
unsigned refcnt;
|
||||
/* back "pointer" into ts_cache->element[index],
|
||||
* for paranoia, and for "ts_element_to_index" */
|
||||
/* back "pointer" into lc_cache->element[index],
|
||||
* for paranoia, and for "lc_element_to_index" */
|
||||
unsigned lc_index;
|
||||
/* if we want to track a larger set of objects,
|
||||
* it needs to become arch independend u64 */
|
||||
@ -190,8 +190,8 @@ struct lru_cache {
|
||||
/* Arbitrary limit on maximum tracked objects. Practical limit is much
|
||||
* lower due to allocation failures, probably. For typical use cases,
|
||||
* nr_elements should be a few thousand at most.
|
||||
* This also limits the maximum value of ts_element.ts_index, allowing the
|
||||
* 8 high bits of .ts_index to be overloaded with flags in the future. */
|
||||
* This also limits the maximum value of lc_element.lc_index, allowing the
|
||||
* 8 high bits of .lc_index to be overloaded with flags in the future. */
|
||||
#define LC_MAX_ACTIVE (1<<24)
|
||||
|
||||
/* statistics */
|
||||
|
@ -44,6 +44,19 @@ typedef uint64_t blkif_sector_t;
|
||||
*/
|
||||
#define BLKIF_OP_WRITE_BARRIER 2
|
||||
|
||||
/*
|
||||
* Recognised if "feature-flush-cache" is present in backend xenbus
|
||||
* info. A flush will ask the underlying storage hardware to flush its
|
||||
* non-volatile caches as appropriate. The "feature-flush-cache" node
|
||||
* contains a boolean indicating whether flush requests are likely to
|
||||
* succeed or fail. Either way, a flush request may fail at any time
|
||||
* with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
|
||||
* block-device hardware. The boolean simply indicates whether or not it
|
||||
* is worthwhile for the frontend to attempt flushes. If a backend does
|
||||
* not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
|
||||
* "feature-flush-cache" node!
|
||||
*/
|
||||
#define BLKIF_OP_FLUSH_DISKCACHE 3
|
||||
/*
|
||||
* Maximum scatter/gather segments per request.
|
||||
* This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
|
||||
|
Loading…
Reference in New Issue
Block a user