From d45b3279a5a2252cafcd665bbf2db8c9b31ef783 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Jul 2014 12:25:28 +0200 Subject: [PATCH 1/4] block: don't assume last put of shared tags is for the host There is no inherent reason why the last put of a tag structure must be the one for the Scsi_Host, as device model objects can be held for arbitrary periods. Merge blk_free_tags and __blk_free_tags into a single funtion that just release a references and get rid of the BUG() when the host reference wasn't the last. Signed-off-by: Christoph Hellwig Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-tag.c | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/block/blk-tag.c b/block/blk-tag.c index 3f33d8672268..a185b86741e5 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -27,18 +27,15 @@ struct request *blk_queue_find_tag(struct request_queue *q, int tag) EXPORT_SYMBOL(blk_queue_find_tag); /** - * __blk_free_tags - release a given set of tag maintenance info + * blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * - * Tries to free the specified @bqt. Returns true if it was - * actually freed and false if there are still references using it + * Drop the reference count on @bqt and frees it when the last reference + * is dropped. */ -static int __blk_free_tags(struct blk_queue_tag *bqt) +void blk_free_tags(struct blk_queue_tag *bqt) { - int retval; - - retval = atomic_dec_and_test(&bqt->refcnt); - if (retval) { + if (atomic_dec_and_test(&bqt->refcnt)) { BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) < bqt->max_depth); @@ -50,9 +47,8 @@ static int __blk_free_tags(struct blk_queue_tag *bqt) kfree(bqt); } - - return retval; } +EXPORT_SYMBOL(blk_free_tags); /** * __blk_queue_free_tags - release tag maintenance info @@ -69,27 +65,12 @@ void __blk_queue_free_tags(struct request_queue *q) if (!bqt) return; - __blk_free_tags(bqt); + blk_free_tags(bqt); q->queue_tags = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q); } -/** - * blk_free_tags - release a given set of tag maintenance info - * @bqt: the tag map to free - * - * For externally managed @bqt frees the map. Callers of this - * function must guarantee to have released all the queues that - * might have been using this tag map. - */ -void blk_free_tags(struct blk_queue_tag *bqt) -{ - if (unlikely(!__blk_free_tags(bqt))) - BUG(); -} -EXPORT_SYMBOL(blk_free_tags); - /** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device From bbc1c5e8ad6dfebf9d13b8a4ccdf66c92913eac9 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 9 Jul 2014 21:18:32 +0200 Subject: [PATCH 2/4] drbd: fix regression 'out of mem, failed to invoke fence-peer helper' Since linux kernel 3.13, kthread_run() internally uses wait_for_completion_killable(). We sometimes may use kthread_run() while we still have a signal pending, which we used to kick our threads out of potentially blocking network functions, causing kthread_run() to mistake that as a new fatal signal and fail. Fix: flush_signals() before kthread_run(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1b35c45c92b7..3f2e16738080 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -544,6 +544,12 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection) struct task_struct *opa; kref_get(&connection->kref); + /* We may just have force_sig()'ed this thread + * to get it out of some blocking network function. + * Clear signals; otherwise kthread_run(), which internally uses + * wait_on_completion_killable(), will mistake our pending signal + * for a new fatal signal and fail. */ + flush_signals(current); opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h"); if (IS_ERR(opa)) { drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n"); From 0b462c89e31f7eb6789713437eb551833ee16ff3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 5 Jul 2014 18:43:21 -0400 Subject: [PATCH 3/4] blkcg: don't call into policy draining if root_blkg is already gone While a queue is being destroyed, all the blkgs are destroyed and its ->root_blkg pointer is set to NULL. If someone else starts to drain while the queue is in this state, the following oops happens. NULL pointer dereference at 0000000000000028 IP: [] blk_throtl_drain+0x84/0x230 PGD e4a1067 PUD b773067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched] CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000 RIP: 0010:[] [] blk_throtl_drain+0x84/0x230 RSP: 0018:ffff88000efd7bf0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450 R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28 FS: 00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0 Stack: ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80 ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58 ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450 Call Trace: [] blkcg_drain_queue+0x1f/0x60 [] __blk_drain_queue+0x71/0x180 [] blk_queue_bypass_start+0x6e/0xb0 [] blkcg_deactivate_policy+0x38/0x120 [] blk_throtl_exit+0x34/0x50 [] blkcg_exit_queue+0x35/0x40 [] blk_release_queue+0x26/0xd0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] blk_put_queue+0x15/0x20 [] scsi_device_dev_release_usercontext+0x16b/0x1c0 [] execute_in_process_context+0x89/0xa0 [] scsi_device_dev_release+0x1c/0x20 [] device_release+0x32/0xa0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] put_device+0x17/0x20 [] __scsi_remove_device+0xa9/0xe0 [] scsi_remove_device+0x2b/0x40 [] sdev_store_delete+0x27/0x30 [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x3e/0x50 [] kernfs_fop_write+0xe7/0x170 [] vfs_write+0xaf/0x1d0 [] SyS_write+0x4d/0xc0 [] system_call_fastpath+0x16/0x1b 776687bce42b ("block, blk-mq: draining can't be skipped even if bypass_depth was non-zero") made it easier to trigger this bug by making blk_queue_bypass_start() drain even when it loses the first bypass test to blk_cleanup_queue(); however, the bug has always been there even before the commit as blk_queue_bypass_start() could race against queue destruction, win the initial bypass test but perform the actual draining after blk_cleanup_queue() already destroyed all blkgs. Fix it by skippping calling into policy draining if all the blkgs are already gone. Signed-off-by: Tejun Heo Reported-by: Shirish Pargaonkar Reported-by: Sasha Levin Reported-by: Jet Chen Cc: stable@vger.kernel.org Tested-by: Shirish Pargaonkar Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b9f4cc494ece..28d227c5ca77 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -872,6 +872,13 @@ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + blk_throtl_drain(q); } From 3b3a1814d1703027f9867d0f5cbbfaf6c7482474 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Jul 2014 12:46:23 -0400 Subject: [PATCH 4/4] block: provide compat ioctl for BLKZEROOUT This patch provides the compat BLKZEROOUT ioctl. The argument is a pointer to two uint64_t values, so there is no need to translate it. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # 3.7+ Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index fbd5a67cb773..a0926a6094b2 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -690,6 +690,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKROSET: case BLKDISCARD: case BLKSECDISCARD: + case BLKZEROOUT: /* * the ones below are implemented in blkdev_locked_ioctl, * but we call blkdev_ioctl, which gets the lock for us