Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block updates and fixes from Jens Axboe: - NVMe updates and fixes that missed the first pull request. This includes bug fixes, and support for autonomous power management. - Fix from Christoph for missing clear of the request payload, causing a problem with (at least) the storvsc driver. - Further fixes for the queue/bdi life time issues from Jan. - The Kconfig mq scheduler update from me. - Fixing a use-after-free in dm-rq, spotted by Bart, introduced in this merge window. - Three fixes for nbd from Josef. - Bug fix from Omar, fixing a bug in sas transport code that oopses when bsg ioctls were used. From Omar. - Improvements to the queue restart and tag wait from from Omar. - Set of fixes for the sed/opal code from Scott. - Three trivial patches to cciss from Tobin * 'for-linus' of git://git.kernel.dk/linux-block: (41 commits) dm-rq: don't dereference request payload after ending request blk-mq-sched: separate mark hctx and queue restart operations blk-mq: use sbq wait queues instead of restart for driver tags block/sed-opal: Propagate original error message to userland. nvme/pci: re-check security protocol support after reset block/sed-opal: Introduce free_opal_dev to free the structure and clean up state nvme: detect NVMe controller in recent MacBooks nvme-rdma: add support for host_traddr nvmet-rdma: Fix error handling nvmet-rdma: use nvme cm status helper nvme-rdma: move nvme cm status helper to .h file nvme-fc: don't bother to validate ioccsz and iorcsz nvme/pci: No special case for queue busy on IO nvme/core: Fix race kicking freed request_queue nvme/pci: Disable on removal when disconnected nvme: Enable autonomous power state transitions nvme: Add a quirk mechanism that uses identify_ctrl nvme: make nvmf_register_transport require a create_ctrl callback nvme: Use CNS as 8-bit field and avoid endianness conversion nvme: add semicolon in nvme_command setting ...
2017-02-24 14:13:34 -08:00
parent f1ef09fde1 61febef40b
commit 1802979ab1
32 changed files with 991 additions and 766 deletions
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@ -69,50 +69,6 @@ config MQ_IOSCHED_DEADLINE
 	---help---
 	  MQ version of the deadline IO scheduler.
 config MQ_IOSCHED_NONE
 	bool
 	default y
 choice
 	prompt "Default single-queue blk-mq I/O scheduler"
 	default DEFAULT_SQ_NONE
 	help
 	  Select the I/O scheduler which will be used by default for blk-mq
 	  managed block devices with a single queue.
 	config DEFAULT_SQ_DEADLINE
 		bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
 	config DEFAULT_SQ_NONE
 		bool "None"
 endchoice
 config DEFAULT_SQ_IOSCHED
 	string
 	default "mq-deadline" if DEFAULT_SQ_DEADLINE
 	default "none" if DEFAULT_SQ_NONE
 choice
 	prompt "Default multi-queue blk-mq I/O scheduler"
 	default DEFAULT_MQ_NONE
 	help
 	  Select the I/O scheduler which will be used by default for blk-mq
 	  managed block devices with multiple queues.
 	config DEFAULT_MQ_DEADLINE
 		bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
 	config DEFAULT_MQ_NONE
 		bool "None"
 endchoice
 config DEFAULT_MQ_IOSCHED
 	string
 	default "mq-deadline" if DEFAULT_MQ_DEADLINE
 	default "none" if DEFAULT_MQ_NONE
 endmenu
 endif
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@ -205,7 +205,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	 * needing a restart in that case.
 	 */
 	if (!list_empty(&rq_list)) {
-		blk_mq_sched_mark_restart(hctx);
+		blk_mq_sched_mark_restart_hctx(hctx);
 		did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
 	} else if (!has_sched_dispatch) {
 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
@ -331,20 +331,16 @@ static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
 void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	unsigned int i;
-	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
+	if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
 		if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
 			queue_for_each_hw_ctx(q, hctx, i)
 				blk_mq_sched_restart_hctx(hctx);
 		}
 	} else {
 		blk_mq_sched_restart_hctx(hctx);
 	else {
 		struct request_queue *q = hctx->queue;
 		if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
 			return;
 		clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
 		queue_for_each_hw_ctx(q, hctx, i)
 			blk_mq_sched_restart_hctx(hctx);
 	}
 }
@ -498,15 +494,6 @@ int blk_mq_sched_init(struct request_queue *q)
 {
 	int ret;
 #if defined(CONFIG_DEFAULT_SQ_NONE)
 	if (q->nr_hw_queues == 1)
 		return 0;
 #endif
 #if defined(CONFIG_DEFAULT_MQ_NONE)
 	if (q->nr_hw_queues > 1)
 		return 0;
 #endif
 	mutex_lock(&q->sysfs_lock);
 	ret = elevator_init(q, NULL);
 	mutex_unlock(&q->sysfs_lock);
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@ -122,17 +122,27 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
 	return false;
 }
-static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
+/*
 * Mark a hardware queue as needing a restart.
 */
 static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
+	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
 		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-		if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+}
 			struct request_queue *q = hctx->queue;
-			if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
+/*
-				set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
+ * Mark a hardware queue and the request queue it belongs to as needing a
-		}
+ * restart.
-	}
+ */
 static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
 		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 	if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
 		set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
 }
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@ -904,6 +904,44 @@ static bool reorder_tags_to_front(struct list_head *list)
 	return first != NULL;
 }
 static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
 				void *key)
 {
 	struct blk_mq_hw_ctx *hctx;
 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 	list_del(&wait->task_list);
 	clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
 }
 static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
 {
 	struct sbq_wait_state *ws;
 	/*
 	 * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
 	 * The thread which wins the race to grab this bit adds the hardware
 	 * queue to the wait queue.
 	 */
 	if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
 	    test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
 		return false;
 	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
 	ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
 	/*
 	 * As soon as this returns, it's no longer safe to fiddle with
 	 * hctx->dispatch_wait, since a completion can wake up the wait queue
 	 * and unlock the bit.
 	 */
 	add_wait_queue(&ws->wait, &hctx->dispatch_wait);
 	return true;
 }
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 {
 	struct request_queue *q = hctx->queue;
@ -931,15 +969,22 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 				continue;
 			/*
-			 * We failed getting a driver tag. Mark the queue(s)
+			 * The initial allocation attempt failed, so we need to
-			 * as needing a restart. Retry getting a tag again,
+			 * rerun the hardware queue when a tag is freed.
 			 * in case the needed IO completed right before we
 			 * marked the queue as needing a restart.
 			 */
-			blk_mq_sched_mark_restart(hctx);
+			if (blk_mq_dispatch_wait_add(hctx)) {
-			if (!blk_mq_get_driver_tag(rq, &hctx, false))
+				/*
 				 * It's possible that a tag was freed in the
 				 * window between the allocation failure and
 				 * adding the hardware queue to the wait queue.
 				 */
 				if (!blk_mq_get_driver_tag(rq, &hctx, false))
 					break;
 			} else {
 				break;
 			}
 		}
 		list_del_init(&rq->queuelist);
 		bd.rq = rq;
@ -995,10 +1040,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 		 *
 		 * blk_mq_run_hw_queue() already checks the STOPPED bit
 		 *
-		 * If RESTART is set, then let completion restart the queue
+		 * If RESTART or TAG_WAITING is set, then let completion restart
-		 * instead of potentially looping here.
+		 * the queue instead of potentially looping here.
 		 */
-		if (!blk_mq_sched_needs_restart(hctx))
+		if (!blk_mq_sched_needs_restart(hctx) &&
 		    !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
 			blk_mq_run_hw_queue(hctx, true);
 	}
--- a/block/elevator.c
+++ b/block/elevator.c
@ -220,17 +220,24 @@ int elevator_init(struct request_queue *q, char *name)
 	}
 	if (!e) {
-		if (q->mq_ops && q->nr_hw_queues == 1)
+		/*
-			e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
+		 * For blk-mq devices, we default to using mq-deadline,
-		else if (q->mq_ops)
+		 * if available, for single queue devices. If deadline
-			e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
+		 * isn't available OR we have multiple queues, default
-		else
+		 * to "none".
 		 */
 		if (q->mq_ops) {
 			if (q->nr_hw_queues == 1)
 				e = elevator_get("mq-deadline", false);
 			if (!e)
 				return 0;
 		} else
 			e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
 		if (!e) {
 			printk(KERN_ERR
 				"Default I/O scheduler not found. " \
-				"Using noop/none.\n");
+				"Using noop.\n");
 			e = elevator_get("noop", false);
 		}
 	}
--- a/block/genhd.c
+++ b/block/genhd.c
@ -669,14 +669,14 @@ void del_gendisk(struct gendisk *disk)
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
 	while ((part = disk_part_iter_next(&piter))) {
 		bdev_unhash_inode(MKDEV(disk->major,
 					disk->first_minor + part->partno));
 		invalidate_partition(disk, part->partno);
 		bdev_unhash_inode(part_devt(part));
 		delete_partition(disk, part->partno);
 	}
 	disk_part_iter_exit(&piter);
 	invalidate_partition(disk, 0);
 	bdev_unhash_inode(disk_devt(disk));
 	set_capacity(disk, 0);
 	disk->flags &= ~GENHD_FL_UP;
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@ -17,15 +17,15 @@
 *    02111-1307, USA.
 *
 *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
- *    
+ *
 *    Author: Stephen M. Cameron
 */
 #ifdef CONFIG_CISS_SCSI_TAPE
-/* Here we have code to present the driver as a scsi driver 
+/* Here we have code to present the driver as a scsi driver
-   as it is simultaneously presented as a block driver.  The 
+   as it is simultaneously presented as a block driver.  The
   reason for doing this is to allow access to SCSI tape drives
-   through the array controller.  Note in particular, neither 
+   through the array controller.  Note in particular, neither
   physical nor logical disks are presented through the scsi layer. */
 #include <linux/timer.h>
@ -37,7 +37,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
-#include <scsi/scsi_host.h> 
+#include <scsi/scsi_host.h>
 #include "cciss_scsi.h"
@ -120,7 +120,7 @@ struct cciss_scsi_adapter_data_t {
 	struct cciss_scsi_cmd_stack_t cmd_stack;
 	SGDescriptor_struct **cmd_sg_list;
 	int registered;
-	spinlock_t lock; // to protect ccissscsi[ctlr]; 
+	spinlock_t lock; // to protect ccissscsi[ctlr];
 };
 #define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
@ -143,36 +143,36 @@ scsi_cmd_alloc(ctlr_info_t *h)
 	u64bit temp64;
 	sa = h->scsi_ctlr;
-	stk = &sa->cmd_stack; 
+	stk = &sa->cmd_stack;
-	if (stk->top < 0) 
+	if (stk->top < 0)
 		return NULL;
-	c = stk->elem[stk->top]; 	
+	c = stk->elem[stk->top];
 	/* memset(c, 0, sizeof(*c)); */
 	memset(&c->cmd, 0, sizeof(c->cmd));
 	memset(&c->Err, 0, sizeof(c->Err));
 	/* set physical addr of cmd and addr of scsi parameters */
-	c->cmd.busaddr = c->busaddr; 
+	c->cmd.busaddr = c->busaddr;
 	c->cmd.cmdindex = c->cmdindex;
-	/* (__u32) (stk->cmd_pool_handle + 
+	/* (__u32) (stk->cmd_pool_handle +
 		(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */
 	temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct));
-	/* (__u64) (stk->cmd_pool_handle + 
+	/* (__u64) (stk->cmd_pool_handle +
 		(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) +
 		 sizeof(CommandList_struct)); */
 	stk->top--;
 	c->cmd.ErrDesc.Addr.lower = temp64.val32.lower;
 	c->cmd.ErrDesc.Addr.upper = temp64.val32.upper;
 	c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct);
-	
+
 	c->cmd.ctlr = h->ctlr;
 	c->cmd.err_info = &c->Err;
 	return (CommandList_struct *) c;
 }
-static void 
+static void
 scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
 {
 	/* assume only one process in here at a time, locking done by caller. */
@ -183,7 +183,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
 	struct cciss_scsi_cmd_stack_t *stk;
 	sa = h->scsi_ctlr;
-	stk = &sa->cmd_stack; 
+	stk = &sa->cmd_stack;
 	stk->top++;
 	if (stk->top >= stk->nelems) {
 		dev_err(&h->pdev->dev,
@ -228,7 +228,7 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
 	}
 	for (i = 0; i < stk->nelems; i++) {
 		stk->elem[i] = &stk->pool[i];
-		stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + 
+		stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
 			(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
 		stk->elem[i]->cmdindex = i;
 	}
@ -244,7 +244,7 @@ scsi_cmd_stack_free(ctlr_info_t *h)
 	size_t size;
 	sa = h->scsi_ctlr;
-	stk = &sa->cmd_stack; 
+	stk = &sa->cmd_stack;
 	if (stk->top != stk->nelems-1) {
 		dev_warn(&h->pdev->dev,
 			"bug: %d scsi commands are still outstanding.\n",
@ -266,7 +266,7 @@ print_cmd(CommandList_struct *cp)
 	printk("queue:%d\n", cp->Header.ReplyQueue);
 	printk("sglist:%d\n", cp->Header.SGList);
 	printk("sgtot:%d\n", cp->Header.SGTotal);
-	printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, 
+	printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper,
 			cp->Header.Tag.lower);
 	printk("LUN:0x%8phN\n", cp->Header.LUN.LunAddrBytes);
 	printk("CDBLen:%d\n", cp->Request.CDBLen);
@ -275,8 +275,8 @@ print_cmd(CommandList_struct *cp)
 	printk(" Dir:%d\n",cp->Request.Type.Direction);
 	printk("Timeout:%d\n",cp->Request.Timeout);
 	printk("CDB: %16ph\n", cp->Request.CDB);
-	printk("edesc.Addr: 0x%08x/0%08x, Len  = %d\n", 
+	printk("edesc.Addr: 0x%08x/0%08x, Len  = %d\n",
-		cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, 
+		cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower,
 			cp->ErrDesc.Len);
 	printk("sgs..........Errorinfo:\n");
 	printk("scsistatus:%d\n", cp->err_info->ScsiStatus);
@ -289,7 +289,7 @@ print_cmd(CommandList_struct *cp)
 }
 #endif
-static int 
+static int
 find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
 {
 	/* finds an unused bus, target, lun for a new device */
@ -299,24 +299,24 @@ find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
 	memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
-	target_taken[SELF_SCSI_ID] = 1;	
+	target_taken[SELF_SCSI_ID] = 1;
 	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
 		target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
-	
+
 	for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
 		if (!target_taken[i]) {
 			*bus = 0; *target=i; *lun = 0; found=1;
 			break;
 		}
 	}
-	return (!found);	
+	return (!found);
 }
 struct scsi2map {
 	char scsi3addr[8];
 	int bus, target, lun;
 };
-static int 
+static int
 cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
 		struct cciss_scsi_dev_t *device,
 		struct scsi2map *added, int *nadded)
@ -381,8 +381,8 @@ cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
 	ccissscsi[h->ctlr].ndevices++;
-	/* initially, (before registering with scsi layer) we don't 
+	/* initially, (before registering with scsi layer) we don't
-	   know our hostno and we don't want to print anything first 
+	   know our hostno and we don't want to print anything first
 	   time anyway (the scsi layer's inquiries will show that info) */
 	if (hostno != -1)
 		dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
@ -467,7 +467,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
 	/* sd contains scsi3 addresses and devtypes, but
 	   bus target and lun are not filled in.  This funciton
 	   takes what's in sd to be the current and adjusts
-	   ccissscsi[] to be in line with what's in sd. */ 
+	   ccissscsi[] to be in line with what's in sd. */
 	int i,j, found, changes=0;
 	struct cciss_scsi_dev_t *csd;
@ -492,7 +492,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
 	if (hostno != -1)  /* if it's not the first time... */
 		sh = h->scsi_ctlr->scsi_host;
-	/* find any devices in ccissscsi[] that are not in 
+	/* find any devices in ccissscsi[] that are not in
 	   sd[] and remove them from ccissscsi[] */
 	i = 0;
@ -512,7 +512,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
 			}
 		}
-		if (found == 0) { /* device no longer present. */ 
+		if (found == 0) { /* device no longer present. */
 			changes++;
 			cciss_scsi_remove_entry(h, hostno, i,
 				removed, &nremoved);
@ -641,14 +641,13 @@ lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
 	return -1;
 }
-static void 
+static void
 cciss_scsi_setup(ctlr_info_t *h)
 {
 	struct cciss_scsi_adapter_data_t * shba;
 	ccissscsi[h->ctlr].ndevices = 0;
-	shba = (struct cciss_scsi_adapter_data_t *)
+	shba = kmalloc(sizeof(*shba), GFP_KERNEL);
 		kmalloc(sizeof(*shba), GFP_KERNEL);	
 	if (shba == NULL)
 		return;
 	shba->scsi_host = NULL;
@ -693,20 +692,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
 	/* copy the sense data whether we need to or not. */
-	memcpy(cmd->sense_buffer, ei->SenseInfo, 
+	memcpy(cmd->sense_buffer, ei->SenseInfo,
 		ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
-			SCSI_SENSE_BUFFERSIZE : 
+			SCSI_SENSE_BUFFERSIZE :
 			ei->SenseLen);
 	scsi_set_resid(cmd, ei->ResidualCnt);
-	if(ei->CommandStatus != 0) 
+	if (ei->CommandStatus != 0) { /* an error has occurred */
-	{ /* an error has occurred */ 
+		switch (ei->CommandStatus) {
 		switch(ei->CommandStatus)
 		{
 			case CMD_TARGET_STATUS:
 				/* Pass it up to the upper layers... */
 				if (!ei->ScsiStatus) {
-					
+
 	/* Ordinarily, this case should never happen, but there is a bug
 	   in some released firmware revisions that allows it to happen
 	   if, for example, a 4100 backplane loses power and the tape
@ -731,7 +728,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
 				print_cmd(c);
 				 */
     /* We get CMD_INVALID if you address a non-existent tape drive instead
-	of a selection timeout (no response).  You will see this if you yank 
+	of a selection timeout (no response).  You will see this if you yank
 	out a tape drive, then try to access it. This is kind of a shame
 	because it means that any other CMD_INVALID (e.g. driver bug) will
 	get interpreted as a missing target. */
@ -780,7 +777,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
 				cmd->result = DID_ERROR << 16;
 				dev_warn(&h->pdev->dev,
 					"%p returned unknown status %x\n", c,
-						ei->CommandStatus); 
+						ei->CommandStatus);
 		}
 	}
 	cmd->scsi_done(cmd);
@ -796,15 +793,15 @@ cciss_scsi_detect(ctlr_info_t *h)
 	sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *));
 	if (sh == NULL)
 		goto fail;
-	sh->io_port = 0;	// good enough?  FIXME, 
+	sh->io_port = 0;	// good enough?  FIXME,
 	sh->n_io_port = 0;	// I don't think we use these two...
-	sh->this_id = SELF_SCSI_ID;  
+	sh->this_id = SELF_SCSI_ID;
 	sh->can_queue = cciss_tape_cmds;
 	sh->sg_tablesize = h->maxsgentries;
 	sh->max_cmd_len = MAX_COMMAND_SIZE;
 	sh->max_sectors = h->cciss_max_sectors;
-	((struct cciss_scsi_adapter_data_t *) 
+	((struct cciss_scsi_adapter_data_t *)
 		h->scsi_ctlr)->scsi_host = sh;
 	sh->hostdata[0] = (unsigned long) h;
 	sh->irq = h->intr[SIMPLE_MODE_INT];
@ -856,7 +853,7 @@ cciss_map_one(struct pci_dev *pdev,
 static int
 cciss_scsi_do_simple_cmd(ctlr_info_t *h,
 			CommandList_struct *c,
-			unsigned char *scsi3addr, 
+			unsigned char *scsi3addr,
 			unsigned char *cdb,
 			unsigned char cdblen,
 			unsigned char *buf, int bufsize,
@ -871,7 +868,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
 	c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
 	// Fill in the request block...
-	/* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", 
+	/* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n",
 		scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
 		scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
@ -885,7 +882,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
 	/* Fill in the SG list and do dma mapping */
 	cciss_map_one(h->pdev, c, (unsigned char *) buf,
-			bufsize, DMA_FROM_DEVICE); 
+			bufsize, DMA_FROM_DEVICE);
 	c->waiting = &wait;
 	enqueue_cmd_and_start_io(h, c);
@ -896,14 +893,13 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
 	return(0);
 }
-static void 
+static void
 cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
 {
 	ErrorInfo_struct *ei;
 	ei = c->err_info;
-	switch(ei->CommandStatus)
+	switch (ei->CommandStatus) {
 	{
 		case CMD_TARGET_STATUS:
 			dev_warn(&h->pdev->dev,
 				"cmd %p has completed with errors\n", c);
@ -1005,7 +1001,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
 	if (rc != 0) return rc; /* something went wrong */
-	if (ei->CommandStatus != 0 && 
+	if (ei->CommandStatus != 0 &&
 	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		cciss_scsi_interpret_error(h, c);
 		rc = -1;
@ -1013,7 +1009,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
 	spin_lock_irqsave(&h->lock, flags);
 	scsi_cmd_free(h, c);
 	spin_unlock_irqrestore(&h->lock, flags);
-	return rc;	
+	return rc;
 }
 /* Get the device id from inquiry page 0x83 */
@ -1042,7 +1038,7 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
 	int rc;
 	CommandList_struct *c;
 	unsigned char cdb[12];
-	unsigned char scsi3addr[8]; 
+	unsigned char scsi3addr[8];
 	ErrorInfo_struct *ei;
 	unsigned long flags;
@ -1069,14 +1065,14 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
 	cdb[11] = 0;
 	rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
-				cdb, 12, 
+				cdb, 12,
-				(unsigned char *) buf, 
+				(unsigned char *) buf,
 				bufsize, XFER_READ);
 	if (rc != 0) return rc; /* something went wrong */
 	ei = c->err_info;
-	if (ei->CommandStatus != 0 && 
+	if (ei->CommandStatus != 0 &&
 	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		cciss_scsi_interpret_error(h, c);
 		rc = -1;
@ -1084,36 +1080,36 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
 	spin_lock_irqsave(&h->lock, flags);
 	scsi_cmd_free(h, c);
 	spin_unlock_irqrestore(&h->lock, flags);
-	return rc;	
+	return rc;
 }
 static void
 cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 {
 	/* the idea here is we could get notified from /proc
-	   that some devices have changed, so we do a report 
+	   that some devices have changed, so we do a report
-	   physical luns cmd, and adjust our list of devices 
+	   physical luns cmd, and adjust our list of devices
 	   accordingly.  (We can't rely on the scsi-mid layer just
-	   doing inquiries, because the "busses" that the scsi 
+	   doing inquiries, because the "busses" that the scsi
 	   mid-layer probes are totally fabricated by this driver,
 	   so new devices wouldn't show up.
-	   the scsi3addr's of devices won't change so long as the 
+	   the scsi3addr's of devices won't change so long as the
-	   adapter is not reset.  That means we can rescan and 
+	   adapter is not reset.  That means we can rescan and
-	   tell which devices we already know about, vs. new 
+	   tell which devices we already know about, vs. new
 	   devices, vs.  disappearing devices.
 	   Also, if you yank out a tape drive, then put in a disk
-	   in it's place, (say, a configured volume from another 
+	   in it's place, (say, a configured volume from another
-	   array controller for instance)  _don't_ poke this driver 
+	   array controller for instance)  _don't_ poke this driver
-           (so it thinks it's still a tape, but _do_ poke the scsi 
+           (so it thinks it's still a tape, but _do_ poke the scsi
-           mid layer, so it does an inquiry... the scsi mid layer 
+           mid layer, so it does an inquiry... the scsi mid layer
           will see the physical disk.  This would be bad.  Need to
-	   think about how to prevent that.  One idea would be to 
+	   think about how to prevent that.  One idea would be to
 	   snoop all scsi responses and if an inquiry repsonse comes
 	   back that reports a disk, chuck it an return selection
 	   timeout instead and adjust our table...  Not sure i like
-	   that though.  
+	   that though.
 	 */
 #define OBDR_TAPE_INQ_SIZE 49
@ -1141,9 +1137,9 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 		ch = &ld_buff->LUNListLength[0];
 		num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
 		if (num_luns > CISS_MAX_PHYS_LUN) {
-			printk(KERN_WARNING 
+			printk(KERN_WARNING
 				"cciss: Maximum physical LUNs (%d) exceeded.  "
-				"%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, 
+				"%d LUNs ignored.\n", CISS_MAX_PHYS_LUN,
 				num_luns - CISS_MAX_PHYS_LUN);
 			num_luns = CISS_MAX_PHYS_LUN;
 		}
@ -1154,7 +1150,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 	}
-	/* adjust our table of devices */	
+	/* adjust our table of devices */
 	for (i = 0; i < num_luns; i++) {
 		/* for each physical lun, do an inquiry */
 		if (ld_buff->LUN[i][3] & 0xC0) continue;
@ -1182,8 +1178,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 		cciss_scsi_get_device_id(h, scsi3addr,
 			this_device->device_id, sizeof(this_device->device_id));
-		switch (this_device->devtype)
+		switch (this_device->devtype) {
 		{
 		  case 0x05: /* CD-ROM */ {
 			/* We don't *really* support actual CD-ROM devices,
@ -1213,7 +1208,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 			currentsd[ncurrent] = *this_device;
 			ncurrent++;
 			break;
-		  default: 
+		  default:
 			break;
 		}
 	}
@ -1258,8 +1253,8 @@ cciss_scsi_write_info(struct Scsi_Host *sh,
 		return -EINVAL;
 	return cciss_scsi_user_command(h, sh->host_no,
-			buffer, length);	
+			buffer, length);
-} 
+}
 static int
 cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
@ -1297,8 +1292,8 @@ cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
 	return 0;
 }
-/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci 
+/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
-   dma mapping  and fills in the scatter gather entries of the 
+   dma mapping  and fills in the scatter gather entries of the
   cciss command, c. */
 static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
@ -1394,7 +1389,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 	// Fill in the command list header
-	cmd->scsi_done = done;    // save this for use by completion code 
+	cmd->scsi_done = done;    // save this for use by completion code
 	/* save c in case we have to abort it */
 	cmd->host_scribble = (unsigned char *) c;
@ -1404,7 +1399,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 	c->Header.ReplyQueue = 0;  /* unused in simple mode */
 	memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
 	c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
-	
+
 	// Fill in the request block...
 	c->Request.Timeout = 0;
@ -1414,8 +1409,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 	memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
 	c->Request.Type.Type = TYPE_CMD;
 	c->Request.Type.Attribute = ATTR_SIMPLE;
-	switch(cmd->sc_data_direction)
+	switch (cmd->sc_data_direction) {
 	{
 	  case DMA_TO_DEVICE:
 		c->Request.Type.Direction = XFER_WRITE;
 		break;
@ -1432,15 +1426,15 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 		c->Request.Type.Direction = XFER_RSVD;
 		// This is technically wrong, and cciss controllers should
-		// reject it with CMD_INVALID, which is the most correct 
+		// reject it with CMD_INVALID, which is the most correct
-		// response, but non-fibre backends appear to let it 
+		// response, but non-fibre backends appear to let it
 		// slide by, and give the same results as if this field
 		// were set correctly.  Either way is acceptable for
 		// our purposes here.
 		break;
-	  default: 
+	  default:
 		dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
 			cmd->sc_data_direction);
 		BUG();
@ -1464,9 +1458,9 @@ static void cciss_unregister_scsi(ctlr_info_t *h)
 	spin_lock_irqsave(&h->lock, flags);
 	sa = h->scsi_ctlr;
-	stk = &sa->cmd_stack; 
+	stk = &sa->cmd_stack;
-	/* if we weren't ever actually registered, don't unregister */ 
+	/* if we weren't ever actually registered, don't unregister */
 	if (sa->registered) {
 		spin_unlock_irqrestore(&h->lock, flags);
 		scsi_remove_host(sa->scsi_host);
@ -1474,7 +1468,7 @@ static void cciss_unregister_scsi(ctlr_info_t *h)
 		spin_lock_irqsave(&h->lock, flags);
 	}
-	/* set scsi_host to NULL so our detect routine will 
+	/* set scsi_host to NULL so our detect routine will
 	   find us on register */
 	sa->scsi_host = NULL;
 	spin_unlock_irqrestore(&h->lock, flags);
@ -1490,7 +1484,7 @@ static int cciss_engage_scsi(ctlr_info_t *h)
 	spin_lock_irqsave(&h->lock, flags);
 	sa = h->scsi_ctlr;
-	stk = &sa->cmd_stack; 
+	stk = &sa->cmd_stack;
 	if (sa->registered) {
 		dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
@ -1586,13 +1580,13 @@ retry_tur:
 	return rc;
 }
-/* Need at least one of these error handlers to keep ../scsi/hosts.c from 
+/* Need at least one of these error handlers to keep ../scsi/hosts.c from
- * complaining.  Doing a host- or bus-reset can't do anything good here. 
+ * complaining.  Doing a host- or bus-reset can't do anything good here.
 * Despite what it might say in scsi_error.c, there may well be commands
 * on the controller, as the cciss driver registers twice, once as a block
 * device for the logical drives, and once as a scsi device, for any tape
 * drives.  So we know there are no commands out on the tape drives, but we
- * don't know there are no commands on the controller, and it is likely 
+ * don't know there are no commands on the controller, and it is likely
 * that there probably are, as the cciss block device is most commonly used
 * as a boot device (embedded controller on HP/Compaq systems.)
 */
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@ -96,6 +96,10 @@ static int max_part;
 static struct workqueue_struct *recv_workqueue;
 static int part_shift;
 static int nbd_dev_dbg_init(struct nbd_device *nbd);
 static void nbd_dev_dbg_close(struct nbd_device *nbd);
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
 	return disk_to_dev(nbd->disk);
@ -120,7 +124,7 @@ static const char *nbdcmd_to_ascii(int cmd)
 static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
 {
-	bdev->bd_inode->i_size = 0;
+	bd_set_size(bdev, 0);
 	set_capacity(nbd->disk, 0);
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
@ -129,29 +133,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
 static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
 {
-	if (!nbd_is_connected(nbd))
+	blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
-		return;
+	blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
-
+	bd_set_size(bdev, nbd->bytesize);
 	bdev->bd_inode->i_size = nbd->bytesize;
 	set_capacity(nbd->disk, nbd->bytesize >> 9);
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 }
-static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
+static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
 			loff_t blocksize, loff_t nr_blocks)
 {
 	int ret;
 	ret = set_blocksize(bdev, blocksize);
 	if (ret)
 		return ret;
 	nbd->blksize = blocksize;
 	nbd->bytesize = blocksize * nr_blocks;
-
+	if (nbd_is_connected(nbd))
-	nbd_size_update(nbd, bdev);
+		nbd_size_update(nbd, bdev);
 	return 0;
 }
 static void nbd_end_request(struct nbd_cmd *cmd)
@ -571,10 +566,17 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_MQ_RQ_QUEUE_OK;
 }
-static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
+static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
 			  unsigned long arg)
 {
 	struct socket *sock;
 	struct nbd_sock **socks;
 	struct nbd_sock *nsock;
 	int err;
 	sock = sockfd_lookup(arg, &err);
 	if (!sock)
 		return err;
 	if (!nbd->task_setup)
 		nbd->task_setup = current;
@ -598,26 +600,20 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
 	nsock->sock = sock;
 	socks[nbd->num_connections++] = nsock;
 	if (max_part)
 		bdev->bd_invalidated = 1;
 	return 0;
 }
 /* Reset all properties of an NBD device */
 static void nbd_reset(struct nbd_device *nbd)
 {
 	int i;
 	for (i = 0; i < nbd->num_connections; i++)
 		kfree(nbd->socks[i]);
 	kfree(nbd->socks);
 	nbd->socks = NULL;
 	nbd->runtime_flags = 0;
 	nbd->blksize = 1024;
 	nbd->bytesize = 0;
 	set_capacity(nbd->disk, 0);
 	nbd->flags = 0;
 	nbd->tag_set.timeout = 0;
 	nbd->num_connections = 0;
 	nbd->task_setup = NULL;
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 }
@ -659,81 +655,143 @@ static void send_disconnects(struct nbd_device *nbd)
 	}
 }
-static int nbd_dev_dbg_init(struct nbd_device *nbd);
+static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
-static void nbd_dev_dbg_close(struct nbd_device *nbd);
+{
 	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
 	if (!nbd->socks)
 		return -EINVAL;
 	mutex_unlock(&nbd->config_lock);
 	fsync_bdev(bdev);
 	mutex_lock(&nbd->config_lock);
 	/* Check again after getting mutex back.  */
 	if (!nbd->socks)
 		return -EINVAL;
 	if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
 			      &nbd->runtime_flags))
 		send_disconnects(nbd);
 	return 0;
 }
 static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
 {
 	sock_shutdown(nbd);
 	nbd_clear_que(nbd);
 	kill_bdev(bdev);
 	nbd_bdev_reset(bdev);
 	/*
 	 * We want to give the run thread a chance to wait for everybody
 	 * to clean up and then do it's own cleanup.
 	 */
 	if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
 	    nbd->num_connections) {
 		int i;
 		for (i = 0; i < nbd->num_connections; i++)
 			kfree(nbd->socks[i]);
 		kfree(nbd->socks);
 		nbd->socks = NULL;
 		nbd->num_connections = 0;
 	}
 	nbd->task_setup = NULL;
 	return 0;
 }
 static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
 {
 	struct recv_thread_args *args;
 	int num_connections = nbd->num_connections;
 	int error = 0, i;
 	if (nbd->task_recv)
 		return -EBUSY;
 	if (!nbd->socks)
 		return -EINVAL;
 	if (num_connections > 1 &&
 	    !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
 		dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
 		error = -EINVAL;
 		goto out_err;
 	}
 	set_bit(NBD_RUNNING, &nbd->runtime_flags);
 	blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
 	args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
 	if (!args) {
 		error = -ENOMEM;
 		goto out_err;
 	}
 	nbd->task_recv = current;
 	mutex_unlock(&nbd->config_lock);
 	nbd_parse_flags(nbd, bdev);
 	error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
 	if (error) {
 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
 		goto out_recv;
 	}
 	nbd_size_update(nbd, bdev);
 	nbd_dev_dbg_init(nbd);
 	for (i = 0; i < num_connections; i++) {
 		sk_set_memalloc(nbd->socks[i]->sock->sk);
 		atomic_inc(&nbd->recv_threads);
 		INIT_WORK(&args[i].work, recv_work);
 		args[i].nbd = nbd;
 		args[i].index = i;
 		queue_work(recv_workqueue, &args[i].work);
 	}
 	wait_event_interruptible(nbd->recv_wq,
 				 atomic_read(&nbd->recv_threads) == 0);
 	for (i = 0; i < num_connections; i++)
 		flush_work(&args[i].work);
 	nbd_dev_dbg_close(nbd);
 	nbd_size_clear(nbd, bdev);
 	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
 out_recv:
 	mutex_lock(&nbd->config_lock);
 	nbd->task_recv = NULL;
 out_err:
 	clear_bit(NBD_RUNNING, &nbd->runtime_flags);
 	nbd_clear_sock(nbd, bdev);
 	/* user requested, ignore socket errors */
 	if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
 		error = 0;
 	if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
 		error = -ETIMEDOUT;
 	nbd_reset(nbd);
 	return error;
 }
 /* Must be called with config_lock held */
 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		       unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
-	case NBD_DISCONNECT: {
+	case NBD_DISCONNECT:
-		dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
+		return nbd_disconnect(nbd, bdev);
 		if (!nbd->socks)
 			return -EINVAL;
 		mutex_unlock(&nbd->config_lock);
 		fsync_bdev(bdev);
 		mutex_lock(&nbd->config_lock);
 		/* Check again after getting mutex back.  */
 		if (!nbd->socks)
 			return -EINVAL;
 		if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
 				      &nbd->runtime_flags))
 			send_disconnects(nbd);
 		return 0;
 	}
 	case NBD_CLEAR_SOCK:
-		sock_shutdown(nbd);
+		return nbd_clear_sock(nbd, bdev);
-		nbd_clear_que(nbd);
+	case NBD_SET_SOCK:
-		kill_bdev(bdev);
+		return nbd_add_socket(nbd, bdev, arg);
-		nbd_bdev_reset(bdev);
+	case NBD_SET_BLKSIZE:
-		/*
+		nbd_size_set(nbd, bdev, arg,
-		 * We want to give the run thread a chance to wait for everybody
+			     div_s64(nbd->bytesize, arg));
 		 * to clean up and then do it's own cleanup.
 		 */
 		if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) {
 			int i;
 			for (i = 0; i < nbd->num_connections; i++)
 				kfree(nbd->socks[i]);
 			kfree(nbd->socks);
 			nbd->socks = NULL;
 			nbd->num_connections = 0;
 			nbd->task_setup = NULL;
 		}
 		return 0;
 	case NBD_SET_SOCK: {
 		int err;
 		struct socket *sock = sockfd_lookup(arg, &err);
 		if (!sock)
 			return err;
 		err = nbd_add_socket(nbd, sock);
 		if (!err && max_part)
 			bdev->bd_invalidated = 1;
 		return err;
 	}
 	case NBD_SET_BLKSIZE: {
 		loff_t bsize = div_s64(nbd->bytesize, arg);
 		return nbd_size_set(nbd, bdev, arg, bsize);
 	}
 	case NBD_SET_SIZE:
-		return nbd_size_set(nbd, bdev, nbd->blksize,
+		nbd_size_set(nbd, bdev, nbd->blksize,
-					div_s64(arg, nbd->blksize));
+			     div_s64(arg, nbd->blksize));
-
+		return 0;
 	case NBD_SET_SIZE_BLOCKS:
-		return nbd_size_set(nbd, bdev, nbd->blksize, arg);
+		nbd_size_set(nbd, bdev, nbd->blksize, arg);
-
+		return 0;
 	case NBD_SET_TIMEOUT:
 		nbd->tag_set.timeout = arg * HZ;
 		return 0;
@ -741,85 +799,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 	case NBD_SET_FLAGS:
 		nbd->flags = arg;
 		return 0;
-
+	case NBD_DO_IT:
-	case NBD_DO_IT: {
+		return nbd_start_device(nbd, bdev);
 		struct recv_thread_args *args;
 		int num_connections = nbd->num_connections;
 		int error = 0, i;
 		if (nbd->task_recv)
 			return -EBUSY;
 		if (!nbd->socks)
 			return -EINVAL;
 		if (num_connections > 1 &&
 		    !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
 			dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
 			error = -EINVAL;
 			goto out_err;
 		}
 		set_bit(NBD_RUNNING, &nbd->runtime_flags);
 		blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
 		args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
 		if (!args) {
 			error = -ENOMEM;
 			goto out_err;
 		}
 		nbd->task_recv = current;
 		mutex_unlock(&nbd->config_lock);
 		nbd_parse_flags(nbd, bdev);
 		error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
 		if (error) {
 			dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
 			goto out_recv;
 		}
 		nbd_size_update(nbd, bdev);
 		nbd_dev_dbg_init(nbd);
 		for (i = 0; i < num_connections; i++) {
 			sk_set_memalloc(nbd->socks[i]->sock->sk);
 			atomic_inc(&nbd->recv_threads);
 			INIT_WORK(&args[i].work, recv_work);
 			args[i].nbd = nbd;
 			args[i].index = i;
 			queue_work(recv_workqueue, &args[i].work);
 		}
 		wait_event_interruptible(nbd->recv_wq,
 					 atomic_read(&nbd->recv_threads) == 0);
 		for (i = 0; i < num_connections; i++)
 			flush_work(&args[i].work);
 		nbd_dev_dbg_close(nbd);
 		nbd_size_clear(nbd, bdev);
 		device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
 out_recv:
 		mutex_lock(&nbd->config_lock);
 		nbd->task_recv = NULL;
 out_err:
 		sock_shutdown(nbd);
 		nbd_clear_que(nbd);
 		kill_bdev(bdev);
 		nbd_bdev_reset(bdev);
 		/* user requested, ignore socket errors */
 		if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
 			error = 0;
 		if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
 			error = -ETIMEDOUT;
 		nbd_reset(nbd);
 		return error;
 	}
 	case NBD_CLEAR_QUE:
 		/*
 		 * This is for compatibility only.  The queue is always cleared
 		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
 		 */
 		return 0;
 	case NBD_PRINT_DEBUG:
 		/*
 		 * For compatibility only, we no longer keep a list of
@ -1134,8 +1121,10 @@ static int __init nbd_init(void)
 	if (!recv_workqueue)
 		return -ENOMEM;
-	if (register_blkdev(NBD_MAJOR, "nbd"))
+	if (register_blkdev(NBD_MAJOR, "nbd")) {
 		destroy_workqueue(recv_workqueue);
 		return -EIO;
 	}
 	nbd_dbg_init();
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@ -328,13 +328,15 @@ static void dm_softirq_done(struct request *rq)
 	int rw;
 	if (!clone) {
-		rq_end_stats(tio->md, rq);
+		struct mapped_device *md = tio->md;
 		rq_end_stats(md, rq);
 		rw = rq_data_dir(rq);
 		if (!rq->q->mq_ops)
 			blk_end_request_all(rq, tio->error);
 		else
 			blk_mq_end_request(rq, tio->error);
-		rq_completed(tio->md, rw, false);
+		rq_completed(md, rw, false);
 		return;
 	}
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@ -26,6 +26,7 @@
 #include <linux/ptrace.h>
 #include <linux/nvme_ioctl.h>
 #include <linux/t10-pi.h>
 #include <linux/pm_qos.h>
 #include <scsi/sg.h>
 #include <asm/unaligned.h>
@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries);
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 static unsigned long default_ps_max_latency_us = 25000;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
 		 "max power saving latency for new devices; use PM QOS to change per device");
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
@ -560,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
 	c.identify.opcode = nvme_admin_identify;
-	c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL);
+	c.identify.cns = NVME_ID_CNS_CTRL;
 	*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
 	if (!*id)
@ -578,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
 	struct nvme_command c = { };
 	c.identify.opcode = nvme_admin_identify;
-	c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST);
+	c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
 	c.identify.nsid = cpu_to_le32(nsid);
 	return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
@ -590,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 	int error;
 	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
-	c.identify.opcode = nvme_admin_identify,
+	c.identify.opcode = nvme_admin_identify;
-	c.identify.nsid = cpu_to_le32(nsid),
+	c.identify.nsid = cpu_to_le32(nsid);
 	c.identify.cns = NVME_ID_CNS_NS;
 	*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
 	if (!*id)
@ -1251,6 +1258,176 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 	blk_queue_write_cache(q, vwc, vwc);
 }
 static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 {
 	/*
 	 * APST (Autonomous Power State Transition) lets us program a
 	 * table of power state transitions that the controller will
 	 * perform automatically.  We configure it with a simple
 	 * heuristic: we are willing to spend at most 2% of the time
 	 * transitioning between power states.  Therefore, when running
 	 * in any given state, we will enter the next lower-power
 	 * non-operational state after waiting 100 * (enlat + exlat)
 	 * microseconds, as long as that state's total latency is under
 	 * the requested maximum latency.
 	 *
 	 * We will not autonomously enter any non-operational state for
 	 * which the total latency exceeds ps_max_latency_us.  Users
 	 * can set ps_max_latency_us to zero to turn off APST.
 	 */
 	unsigned apste;
 	struct nvme_feat_auto_pst *table;
 	int ret;
 	/*
 	 * If APST isn't supported or if we haven't been initialized yet,
 	 * then don't do anything.
 	 */
 	if (!ctrl->apsta)
 		return;
 	if (ctrl->npss > 31) {
 		dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
 		return;
 	}
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return;
 	if (ctrl->ps_max_latency_us == 0) {
 		/* Turn off APST. */
 		apste = 0;
 	} else {
 		__le64 target = cpu_to_le64(0);
 		int state;
 		/*
 		 * Walk through all states from lowest- to highest-power.
 		 * According to the spec, lower-numbered states use more
 		 * power.  NPSS, despite the name, is the index of the
 		 * lowest-power state, not the number of states.
 		 */
 		for (state = (int)ctrl->npss; state >= 0; state--) {
 			u64 total_latency_us, transition_ms;
 			if (target)
 				table->entries[state] = target;
 			/*
 			 * Is this state a useful non-operational state for
 			 * higher-power states to autonomously transition to?
 			 */
 			if (!(ctrl->psd[state].flags &
 			      NVME_PS_FLAGS_NON_OP_STATE))
 				continue;
 			total_latency_us =
 				(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
 				+ le32_to_cpu(ctrl->psd[state].exit_lat);
 			if (total_latency_us > ctrl->ps_max_latency_us)
 				continue;
 			/*
 			 * This state is good.  Use it as the APST idle
 			 * target for higher power states.
 			 */
 			transition_ms = total_latency_us + 19;
 			do_div(transition_ms, 20);
 			if (transition_ms > (1 << 24) - 1)
 				transition_ms = (1 << 24) - 1;
 			target = cpu_to_le64((state << 3) |
 					     (transition_ms << 8));
 		}
 		apste = 1;
 	}
 	ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
 				table, sizeof(*table), NULL);
 	if (ret)
 		dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
 	kfree(table);
 }
 static void nvme_set_latency_tolerance(struct device *dev, s32 val)
 {
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 	u64 latency;
 	switch (val) {
 	case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
 	case PM_QOS_LATENCY_ANY:
 		latency = U64_MAX;
 		break;
 	default:
 		latency = val;
 	}
 	if (ctrl->ps_max_latency_us != latency) {
 		ctrl->ps_max_latency_us = latency;
 		nvme_configure_apst(ctrl);
 	}
 }
 struct nvme_core_quirk_entry {
 	/*
 	 * NVMe model and firmware strings are padded with spaces.  For
 	 * simplicity, strings in the quirk table are padded with NULLs
 	 * instead.
 	 */
 	u16 vid;
 	const char *mn;
 	const char *fr;
 	unsigned long quirks;
 };
 static const struct nvme_core_quirk_entry core_quirks[] = {
 	/*
 	 * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
 	 * the controller to go out to lunch.  It dies when the watchdog
 	 * timer reads CSTS and gets 0xffffffff.
 	 */
 	{
 		.vid = 0x144d,
 		.fr = "BXW75D0Q",
 		.quirks = NVME_QUIRK_NO_APST,
 	},
 };
 /* match is null-terminated but idstr is space-padded. */
 static bool string_matches(const char *idstr, const char *match, size_t len)
 {
 	size_t matchlen;
 	if (!match)
 		return true;
 	matchlen = strlen(match);
 	WARN_ON_ONCE(matchlen > len);
 	if (memcmp(idstr, match, matchlen))
 		return false;
 	for (; matchlen < len; matchlen++)
 		if (idstr[matchlen] != ' ')
 			return false;
 	return true;
 }
 static bool quirk_matches(const struct nvme_id_ctrl *id,
 			  const struct nvme_core_quirk_entry *q)
 {
 	return q->vid == le16_to_cpu(id->vid) &&
 		string_matches(id->mn, q->mn, sizeof(id->mn)) &&
 		string_matches(id->fr, q->fr, sizeof(id->fr));
 }
 /*
 * Initialize the cached copies of the Identify data and various controller
 * register in our nvme_ctrl structure.  This should be called as soon as
@ -1262,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	u64 cap;
 	int ret, page_shift;
 	u32 max_hw_sectors;
 	u8 prev_apsta;
 	ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
 	if (ret) {
@ -1285,6 +1463,24 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 		return -EIO;
 	}
 	if (!ctrl->identified) {
 		/*
 		 * Check for quirks.  Quirk can depend on firmware version,
 		 * so, in principle, the set of quirks present can change
 		 * across a reset.  As a possible future enhancement, we
 		 * could re-scan for quirks every time we reinitialize
 		 * the device, but we'd have to make sure that the driver
 		 * behaves intelligently if the quirks change.
 		 */
 		int i;
 		for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
 			if (quirk_matches(id, &core_quirks[i]))
 				ctrl->quirks |= core_quirks[i].quirks;
 		}
 	}
 	ctrl->oacs = le16_to_cpu(id->oacs);
 	ctrl->vid = le16_to_cpu(id->vid);
 	ctrl->oncs = le16_to_cpup(&id->oncs);
@ -1305,6 +1501,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->sgls = le32_to_cpu(id->sgls);
 	ctrl->kas = le16_to_cpu(id->kas);
 	ctrl->npss = id->npss;
 	prev_apsta = ctrl->apsta;
 	ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
 	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 	if (ctrl->ops->is_fabrics) {
 		ctrl->icdoff = le16_to_cpu(id->icdoff);
 		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
@ -1328,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	}
 	kfree(id);
 	if (ctrl->apsta && !prev_apsta)
 		dev_pm_qos_expose_latency_tolerance(ctrl->device);
 	else if (!ctrl->apsta && prev_apsta)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
 	nvme_configure_apst(ctrl);
 	ctrl->identified = true;
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_init_identify);
@ -1577,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev,
 }
 static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
 static ssize_t nvme_sysfs_show_state(struct device *dev,
 				     struct device_attribute *attr,
 				     char *buf)
 {
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 	static const char *const state_name[] = {
 		[NVME_CTRL_NEW]		= "new",
 		[NVME_CTRL_LIVE]	= "live",
 		[NVME_CTRL_RESETTING]	= "resetting",
 		[NVME_CTRL_RECONNECTING]= "reconnecting",
 		[NVME_CTRL_DELETING]	= "deleting",
 		[NVME_CTRL_DEAD]	= "dead",
 	};
 	if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
 	    state_name[ctrl->state])
 		return sprintf(buf, "%s\n", state_name[ctrl->state]);
 	return sprintf(buf, "unknown state\n");
 }
 static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
 static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
@ -1609,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_transport.attr,
 	&dev_attr_subsysnqn.attr,
 	&dev_attr_address.attr,
 	&dev_attr_state.attr,
 	NULL
 };
@ -2065,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	list_add_tail(&ctrl->node, &nvme_ctrl_list);
 	spin_unlock(&dev_list_lock);
 	/*
 	 * Initialize latency tolerance controls.  The sysfs files won't
 	 * be visible to userspace unless the device actually supports APST.
 	 */
 	ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
 	dev_pm_qos_update_user_latency_tolerance(ctrl->device,
 		min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 	return 0;
 out_release_instance:
 	nvme_release_instance(ctrl);
@ -2090,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		 * Revalidating a dead namespace sets capacity to 0. This will
 		 * end buffered writers dirtying pages that can't be synced.
 		 */
-		if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+		if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
-			revalidate_disk(ns->disk);
+			continue;
-
+		revalidate_disk(ns->disk);
 		blk_set_queue_dying(ns->queue);
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
 * being implemented to the common NVMe fabrics library. Part of
 * the overall init sequence of starting up a fabrics driver.
 */
-void nvmf_register_transport(struct nvmf_transport_ops *ops)
+int nvmf_register_transport(struct nvmf_transport_ops *ops)
 {
 	if (!ops->create_ctrl)
 		return -EINVAL;
 	mutex_lock(&nvmf_transports_mutex);
 	list_add_tail(&ops->entry, &nvmf_transports);
 	mutex_unlock(&nvmf_transports_mutex);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvmf_register_transport);
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
-void nvmf_register_transport(struct nvmf_transport_ops *ops);
+int nvmf_register_transport(struct nvmf_transport_ops *ops);
 void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
 const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	/* sanity checks */
 	/* FC-NVME supports 64-byte SQE only */
 	if (ctrl->ctrl.ioccsz != 4) {
 		dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n",
 				ctrl->ctrl.ioccsz);
 		goto out_remove_admin_queue;
 	}
 	/* FC-NVME supports 16-byte CQE only */
 	if (ctrl->ctrl.iorcsz != 1) {
 		dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n",
 				ctrl->ctrl.iorcsz);
 		goto out_remove_admin_queue;
 	}
 	/* FC-NVME does not have other data in the capsule */
 	if (ctrl->ctrl.icdoff) {
 		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void)
 	if (!nvme_fc_wq)
 		return -ENOMEM;
-	nvmf_register_transport(&nvme_fc_transport);
+	return nvmf_register_transport(&nvme_fc_transport);
 	return 0;
 }
 static void __exit nvme_fc_exit_module(void)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@ -78,6 +78,11 @@ enum nvme_quirks {
 	 * readiness, which is done by reading the NVME_CSTS_RDY bit.
 	 */
 	NVME_QUIRK_DELAY_BEFORE_CHK_RDY		= (1 << 3),
 	/*
 	 * APST should not be used.
 	 */
 	NVME_QUIRK_NO_APST			= (1 << 4),
 };
 /*
@ -112,6 +117,7 @@ enum nvme_ctrl_state {
 struct nvme_ctrl {
 	enum nvme_ctrl_state state;
 	bool identified;
 	spinlock_t lock;
 	const struct nvme_ctrl_ops *ops;
 	struct request_queue *admin_q;
@ -147,13 +153,19 @@ struct nvme_ctrl {
 	u32 vs;
 	u32 sgls;
 	u16 kas;
 	u8 npss;
 	u8 apsta;
 	unsigned int kato;
 	bool subsystem;
 	unsigned long quirks;
 	struct nvme_id_power_state psd[32];
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
 	/* Power saving configuration */
 	u64 ps_max_latency_us;
 	/* Fabrics only */
 	u16 sqsize;
 	u32 ioccsz;
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@ -613,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	spin_lock_irq(&nvmeq->q_lock);
 	if (unlikely(nvmeq->cq_vector < 0)) {
-		if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
+		ret = BLK_MQ_RQ_QUEUE_ERROR;
 			ret = BLK_MQ_RQ_QUEUE_BUSY;
 		else
 			ret = BLK_MQ_RQ_QUEUE_ERROR;
 		spin_unlock_irq(&nvmeq->q_lock);
 		goto out_cleanup_iod;
 	}
@ -1739,7 +1736,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 	if (dev->ctrl.admin_q)
 		blk_put_queue(dev->ctrl.admin_q);
 	kfree(dev->queues);
-	kfree(dev->ctrl.opal_dev);
+	free_opal_dev(dev->ctrl.opal_dev);
 	kfree(dev);
 }
@ -1789,14 +1786,17 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
-	if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
+	if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
-		dev->ctrl.opal_dev =
+		if (!dev->ctrl.opal_dev)
-			init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+			dev->ctrl.opal_dev =
 				init_opal_dev(&dev->ctrl, &nvme_sec_submit);
 		else if (was_suspend)
 			opal_unlock_from_suspend(dev->ctrl.opal_dev);
 	} else {
 		free_opal_dev(dev->ctrl.opal_dev);
 		dev->ctrl.opal_dev = NULL;
 	}
 	if (was_suspend)
 		opal_unlock_from_suspend(dev->ctrl.opal_dev);
 	result = nvme_setup_io_queues(dev);
 	if (result)
 		goto out;
@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
-	if (!pci_device_is_present(pdev))
+	if (!pci_device_is_present(pdev)) {
 		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
 		nvme_dev_disable(dev, false);
 	}
 	flush_work(&dev->reset_work);
 	nvme_uninit_ctrl(&dev->ctrl);
@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, nvme_id_table);
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@ -42,28 +42,6 @@
 #define NVME_RDMA_MAX_INLINE_SEGMENTS	1
 static const char *const nvme_rdma_cm_status_strs[] = {
 	[NVME_RDMA_CM_INVALID_LEN]	= "invalid length",
 	[NVME_RDMA_CM_INVALID_RECFMT]	= "invalid record format",
 	[NVME_RDMA_CM_INVALID_QID]	= "invalid queue ID",
 	[NVME_RDMA_CM_INVALID_HSQSIZE]	= "invalid host SQ size",
 	[NVME_RDMA_CM_INVALID_HRQSIZE]	= "invalid host RQ size",
 	[NVME_RDMA_CM_NO_RSC]		= "resource not found",
 	[NVME_RDMA_CM_INVALID_IRD]	= "invalid IRD",
 	[NVME_RDMA_CM_INVALID_ORD]	= "Invalid ORD",
 };
 static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
 {
 	size_t index = status;
 	if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
 	    nvme_rdma_cm_status_strs[index])
 		return nvme_rdma_cm_status_strs[index];
 	else
 		return "unrecognized reason";
 };
 /*
 * We handle AEN commands ourselves and don't even let the
 * block layer know about them.
@ -155,6 +133,10 @@ struct nvme_rdma_ctrl {
 		struct sockaddr addr;
 		struct sockaddr_in addr_in;
 	};
 	union {
 		struct sockaddr src_addr;
 		struct sockaddr_in src_addr_in;
 	};
 	struct nvme_ctrl	ctrl;
 };
@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		int idx, size_t queue_size)
 {
 	struct nvme_rdma_queue *queue;
 	struct sockaddr *src_addr = NULL;
 	int ret;
 	queue = &ctrl->queues[idx];
@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 	}
 	queue->cm_error = -ETIMEDOUT;
-	ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
+	if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
 		src_addr = &ctrl->src_addr;
 	ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
 			NVME_RDMA_CONNECT_TIMEOUT_MS);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
@ -1905,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 		goto out_free_ctrl;
 	}
 	if (opts->mask & NVMF_OPT_HOST_TRADDR) {
 		ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
 				opts->host_traddr);
 		if (ret) {
 			pr_err("malformed src IP address passed: %s\n",
 			       opts->host_traddr);
 			goto out_free_ctrl;
 		}
 	}
 	if (opts->mask & NVMF_OPT_TRSVCID) {
 		u16 port;
@ -2016,7 +2012,8 @@ out_free_ctrl:
 static struct nvmf_transport_ops nvme_rdma_transport = {
 	.name		= "rdma",
 	.required_opts	= NVMF_OPT_TRADDR,
-	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
+	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
 			  NVMF_OPT_HOST_TRADDR,
 	.create_ctrl	= nvme_rdma_create_ctrl,
 };
@ -2063,8 +2060,7 @@ static int __init nvme_rdma_init_module(void)
 		return ret;
 	}
-	nvmf_register_transport(&nvme_rdma_transport);
+	return nvmf_register_transport(&nvme_rdma_transport);
 	return 0;
 }
 static void __exit nvme_rdma_cleanup_module(void)
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@ -41,7 +41,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
 	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
 	if (!ns) {
 		status = NVME_SC_INVALID_NS;
-		pr_err("nvmet : Counld not find namespace id : %d\n",
+		pr_err("nvmet : Could not find namespace id : %d\n",
 				le32_to_cpu(req->cmd->get_log_page.nsid));
 		goto out;
 	}
@ -509,7 +509,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
 		break;
 	case nvme_admin_identify:
 		req->data_len = 4096;
-		switch (le32_to_cpu(cmd->identify.cns)) {
+		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_NS:
 			req->execute = nvmet_execute_identify_ns;
 			return 0;
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@ -17,6 +17,7 @@
 #include "nvmet.h"
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
 static DEFINE_IDA(cntlid_ida);
 /*
 * This read/write semaphore is used to synchronize access to configuration
@ -749,7 +750,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	if (!ctrl->sqs)
 		goto out_free_cqs;
-	ret = ida_simple_get(&subsys->cntlid_ida,
+	ret = ida_simple_get(&cntlid_ida,
 			     NVME_CNTLID_MIN, NVME_CNTLID_MAX,
 			     GFP_KERNEL);
 	if (ret < 0) {
@ -819,7 +820,7 @@ static void nvmet_ctrl_free(struct kref *ref)
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fatal_err_work);
-	ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
+	ida_simple_remove(&cntlid_ida, ctrl->cntlid);
 	nvmet_subsys_put(subsys);
 	kfree(ctrl->sqs);
@ -918,9 +919,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	mutex_init(&subsys->lock);
 	INIT_LIST_HEAD(&subsys->namespaces);
 	INIT_LIST_HEAD(&subsys->ctrls);
 	ida_init(&subsys->cntlid_ida);
 	INIT_LIST_HEAD(&subsys->hosts);
 	return subsys;
@ -933,7 +931,6 @@ static void nvmet_subsys_free(struct kref *ref)
 	WARN_ON_ONCE(!list_empty(&subsys->namespaces));
 	ida_destroy(&subsys->cntlid_ida);
 	kfree(subsys->subsysnqn);
 	kfree(subsys);
 }
@ -976,6 +973,7 @@ static void __exit nvmet_exit(void)
 {
 	nvmet_exit_configfs();
 	nvmet_exit_discovery();
 	ida_destroy(&cntlid_ida);
 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
 		}
 	case nvme_admin_identify:
 		req->data_len = 4096;
-		switch (le32_to_cpu(cmd->identify.cns)) {
+		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_CTRL:
 			req->execute =
 				nvmet_execute_identify_disc_ctrl;
 			return 0;
 		default:
 			pr_err("nvmet: unsupported identify cns %d\n",
-				le32_to_cpu(cmd->identify.cns));
+				cmd->identify.cns);
 			return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 	default:
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@ -153,8 +153,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 		goto out;
 	}
-	pr_info("creating controller %d for NQN %s.\n",
+	pr_info("creating controller %d for subsystem %s for NQN %s.\n",
-			ctrl->cntlid, ctrl->hostnqn);
+		ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
 	req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
 out:
@ -220,7 +220,7 @@ int nvmet_parse_connect_cmd(struct nvmet_req *req)
 	req->ns = NULL;
-	if (req->cmd->common.opcode != nvme_fabrics_command) {
+	if (cmd->common.opcode != nvme_fabrics_command) {
 		pr_err("invalid command 0x%x on unconnected queue.\n",
 			cmd->fabrics.opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@ -1817,16 +1817,14 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 		/* data no longer needed */
 		nvmet_fc_free_tgt_pgs(fod);
-		if (fcpreq->fcp_error || abort)
+		nvmet_req_complete(&fod->req, fcpreq->fcp_error);
 			nvmet_req_complete(&fod->req, fcpreq->fcp_error);
 		return;
 	}
 	switch (fcpreq->op) {
 	case NVMET_FCOP_WRITEDATA:
-		if (abort || fcpreq->fcp_error ||
+		if (fcpreq->fcp_error ||
 		    fcpreq->transferred_length != fcpreq->transfer_length) {
 			nvmet_req_complete(&fod->req,
 					NVME_SC_FC_TRANSPORT_ERROR);
@ -1849,7 +1847,7 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
-		if (abort || fcpreq->fcp_error ||
+		if (fcpreq->fcp_error ||
 		    fcpreq->transferred_length != fcpreq->transfer_length) {
 			/* data no longer needed */
 			nvmet_fc_free_tgt_pgs(fod);
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@ -724,8 +724,7 @@ static int __init nvme_loop_init_module(void)
 	ret = nvmet_register_transport(&nvme_loop_ops);
 	if (ret)
 		return ret;
-	nvmf_register_transport(&nvme_loop_transport);
+	return nvmf_register_transport(&nvme_loop_transport);
 	return 0;
 }
 static void __exit nvme_loop_cleanup_module(void)
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@ -142,7 +142,6 @@ struct nvmet_subsys {
 	unsigned int		max_nsid;
 	struct list_head	ctrls;
 	struct ida		cntlid_ida;
 	struct list_head	hosts;
 	bool			allow_any_host;
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@ -1041,6 +1041,9 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id,
 {
 	struct nvme_rdma_cm_rej rej;
 	pr_debug("rejecting connect request: status %d (%s)\n",
 		 status, nvme_rdma_cm_msg(status));
 	rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
 	rej.sts = cpu_to_le16(status);
@ -1091,7 +1094,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
 	if (queue->idx < 0) {
 		ret = NVME_RDMA_CM_NO_RSC;
-		goto out_free_queue;
+		goto out_destroy_sq;
 	}
 	ret = nvmet_rdma_alloc_rsps(queue);
@ -1135,7 +1138,6 @@ out_destroy_sq:
 out_free_queue:
 	kfree(queue);
 out_reject:
 	pr_debug("rejecting connect request with status code %d\n", ret);
 	nvmet_rdma_cm_reject(cm_id, ret);
 	return NULL;
 }
@ -1188,7 +1190,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 	ndev = nvmet_rdma_find_get_device(cm_id);
 	if (!ndev) {
 		pr_err("no client data!\n");
 		nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
 		return -ECONNREFUSED;
 	}
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@ -1167,7 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 	/* zero out the cmd, except for the embedded scsi_request */
 	memset((char *)cmd + sizeof(cmd->req), 0,
-		sizeof(*cmd) - sizeof(cmd->req));
+		sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
 	cmd->device = dev;
 	cmd->sense_buffer = buf;
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@ -227,27 +227,31 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 		return 0;
 	}
 	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
 	q->cmd_size = sizeof(struct scsi_request);
 	if (rphy) {
-		q = blk_init_queue(sas_non_host_smp_request, NULL);
+		q->request_fn = sas_non_host_smp_request;
 		dev = &rphy->dev;
 		name = dev_name(dev);
 		release = NULL;
 	} else {
-		q = blk_init_queue(sas_host_smp_request, NULL);
+		q->request_fn = sas_host_smp_request;
 		dev = &shost->shost_gendev;
 		snprintf(namebuf, sizeof(namebuf),
 			 "sas_host%d", shost->host_no);
 		name = namebuf;
 		release = sas_host_release;
 	}
-	if (!q)
+	error = blk_init_allocated_queue(q);
-		return -ENOMEM;
+	if (error)
 		goto out_cleanup_queue;
 	error = bsg_register_queue(q, dev, name, release);
-	if (error) {
+	if (error)
-		blk_cleanup_queue(q);
+		goto out_cleanup_queue;
 		return -ENOMEM;
 	}
 	if (rphy)
 		rphy->q = q;
@ -261,6 +265,10 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
 	return 0;
 out_cleanup_queue:
 	blk_cleanup_queue(q);
 	return error;
 }
 static void sas_bsg_remove(struct Scsi_Host *shost, struct sas_rphy *rphy)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@ -1043,13 +1043,22 @@ static struct block_device *bd_acquire(struct inode *inode)
 	spin_lock(&bdev_lock);
 	bdev = inode->i_bdev;
-	if (bdev) {
+	if (bdev && !inode_unhashed(bdev->bd_inode)) {
 		bdgrab(bdev);
 		spin_unlock(&bdev_lock);
 		return bdev;
 	}
 	spin_unlock(&bdev_lock);
 	/*
 	 * i_bdev references block device inode that was already shut down
 	 * (corresponding device got removed).  Remove the reference and look
 	 * up block device inode again just in case new device got
 	 * reestablished under the same device number.
 	 */
 	if (bdev)
 		bd_forget(inode);
 	bdev = bdget(inode->i_rdev);
 	if (bdev) {
 		spin_lock(&bdev_lock);
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@ -33,6 +33,7 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 	wait_queue_t		dispatch_wait;
 	atomic_t		wait_index;
 	struct blk_mq_tags	*tags;
@ -160,6 +161,7 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 	BLK_MQ_S_SCHED_RESTART	= 2,
 	BLK_MQ_S_TAG_WAITING	= 3,
 	BLK_MQ_MAX_DEPTH	= 10240,
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@ -29,6 +29,30 @@ enum nvme_rdma_cm_status {
 	NVME_RDMA_CM_INVALID_ORD	= 0x08,
 };
 static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
 {
 	switch (status) {
 	case NVME_RDMA_CM_INVALID_LEN:
 		return "invalid length";
 	case NVME_RDMA_CM_INVALID_RECFMT:
 		return "invalid record format";
 	case NVME_RDMA_CM_INVALID_QID:
 		return "invalid queue ID";
 	case NVME_RDMA_CM_INVALID_HSQSIZE:
 		return "invalid host SQ size";
 	case NVME_RDMA_CM_INVALID_HRQSIZE:
 		return "invalid host RQ size";
 	case NVME_RDMA_CM_NO_RSC:
 		return "resource not found";
 	case NVME_RDMA_CM_INVALID_IRD:
 		return "invalid IRD";
 	case NVME_RDMA_CM_INVALID_ORD:
 		return "Invalid ORD";
 	default:
 		return "unrecognized reason";
 	}
 }
 /**
 * struct nvme_rdma_cm_req - rdma connect request
 *
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@ -579,6 +579,12 @@ struct nvme_write_zeroes_cmd {
 	__le16			appmask;
 };
 /* Features */
 struct nvme_feat_auto_pst {
 	__le64 entries[32];
 };
 /* Admin commands */
 enum nvme_admin_opcode {
@ -644,7 +650,9 @@ struct nvme_identify {
 	__le32			nsid;
 	__u64			rsvd2[2];
 	union nvme_data_ptr	dptr;
-	__le32			cns;
+	__u8			cns;
 	__u8			rsvd3;
 	__le16			ctrlid;
 	__u32			rsvd11[5];
 };
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@ -27,6 +27,7 @@ typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
 		size_t len, bool send);
 #ifdef CONFIG_BLK_SED_OPAL
 void free_opal_dev(struct opal_dev *dev);
 bool opal_unlock_from_suspend(struct opal_dev *dev);
 struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
@ -51,6 +52,10 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	return false;
 }
 #else
 static inline void free_opal_dev(struct opal_dev *dev)
 {
 }
 static inline bool is_sed_ioctl(unsigned int cmd)
 {
 	return false;