From acfe0ad74d2e1bfc81d1d7bf5e15b043985d3650 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 14 Jun 2014 13:44:31 -0400 Subject: [PATCH 1/4] dm: allocate a special workqueue for deferred device removal The commit 2c140a246dc ("dm: allow remove to be deferred") introduced a deferred removal feature for the device mapper. When this feature is used (by passing a flag DM_DEFERRED_REMOVE to DM_DEV_REMOVE_CMD ioctl) and the user tries to remove a device that is currently in use, the device will be removed automatically in the future when the last user closes it. Device mapper used the system workqueue to perform deferred removals. However, some targets (dm-raid1, dm-mpath, dm-stripe) flush work items scheduled for the system workqueue from their destructor. If the destructor itself is called from the system workqueue during deferred removal, it introduces a possible deadlock - the workqueue tries to flush itself. Fix this possible deadlock by introducing a new workqueue for deferred removals. We allocate just one workqueue for all dm targets. The ability of dm targets to process IOs isn't dependent on deferred removal of unused targets, so a deadlock due to shared workqueue isn't possible. Also, cleanup local_init() to eliminate potential for returning success on failure. Signed-off-by: Mikulas Patocka Signed-off-by: Dan Carpenter Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.13+ --- drivers/md/dm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 437d99045ef2..32b958dbc499 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -54,6 +54,8 @@ static void do_deferred_remove(struct work_struct *w); static DECLARE_WORK(deferred_remove_work, do_deferred_remove); +static struct workqueue_struct *deferred_remove_workqueue; + /* * For bio-based dm. * One of these is allocated per bio. @@ -276,16 +278,24 @@ static int __init local_init(void) if (r) goto out_free_rq_tio_cache; + deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); + if (!deferred_remove_workqueue) { + r = -ENOMEM; + goto out_uevent_exit; + } + _major = major; r = register_blkdev(_major, _name); if (r < 0) - goto out_uevent_exit; + goto out_free_workqueue; if (!_major) _major = r; return 0; +out_free_workqueue: + destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); out_free_rq_tio_cache: @@ -299,6 +309,7 @@ out_free_io_cache: static void local_exit(void) { flush_scheduled_work(); + destroy_workqueue(deferred_remove_workqueue); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); @@ -407,7 +418,7 @@ static void dm_blk_close(struct gendisk *disk, fmode_t mode) if (atomic_dec_and_test(&md->open_count) && (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) - schedule_work(&deferred_remove_work); + queue_work(deferred_remove_workqueue, &deferred_remove_work); dm_put(md); From bf14299f1ce96c9d632533c4557303f8a74afc9e Mon Sep 17 00:00:00 2001 From: Jana Saout Date: Tue, 24 Jun 2014 14:27:04 -0400 Subject: [PATCH 2/4] dm crypt, dm zero: update author name following legal name change Signed-off-by: Jana Saout Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 4 ++-- drivers/md/dm-zero.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53b213226c01..4cba2d808afb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003 Christophe Saout + * Copyright (C) 2003 Jana Saout * Copyright (C) 2004 Clemens Fruhwirth * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. * Copyright (C) 2013 Milan Broz @@ -1996,6 +1996,6 @@ static void __exit dm_crypt_exit(void) module_init(dm_crypt_init); module_exit(dm_crypt_exit); -MODULE_AUTHOR("Christophe Saout "); +MODULE_AUTHOR("Jana Saout "); MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index c99003e0d47a..b9a64bbce304 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003 Christophe Saout + * Copyright (C) 2003 Jana Saout * * This file is released under the GPL. */ @@ -79,6 +79,6 @@ static void __exit dm_zero_exit(void) module_init(dm_zero_init) module_exit(dm_zero_exit) -MODULE_AUTHOR("Christophe Saout "); +MODULE_AUTHOR("Jana Saout "); MODULE_DESCRIPTION(DM_NAME " dummy target returning zeros"); MODULE_LICENSE("GPL"); From 10f1d5d111e8aed46a0f1179faf9a3cf422f689e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jun 2014 15:29:04 -0400 Subject: [PATCH 3/4] dm io: fix a race condition in the wake up code for sync_io There's a race condition between the atomic_dec_and_test(&io->count) in dec_count() and the waking of the sync_io() thread. If the thread is spuriously woken immediately after the decrement it may exit, making the on stack io struct invalid, yet the dec_count could still be using it. Fix this race by using a completion in sync_io() and dec_count(). Reported-by: Minfei Huang Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Acked-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-io.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 3842ac738f98..db404a0f7e2c 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -32,7 +33,7 @@ struct dm_io_client { struct io { unsigned long error_bits; atomic_t count; - struct task_struct *sleeper; + struct completion *wait; struct dm_io_client *client; io_notify_fn callback; void *context; @@ -121,8 +122,8 @@ static void dec_count(struct io *io, unsigned int region, int error) invalidate_kernel_vmap_range(io->vma_invalidate_address, io->vma_invalidate_size); - if (io->sleeper) - wake_up_process(io->sleeper); + if (io->wait) + complete(io->wait); else { unsigned long r = io->error_bits; @@ -387,6 +388,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, */ volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); + DECLARE_COMPLETION_ONSTACK(wait); if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); @@ -395,7 +397,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->sleeper = current; + io->wait = &wait; io->client = client; io->vma_invalidate_address = dp->vma_invalidate_address; @@ -403,15 +405,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, dispatch_io(rw, num_regions, where, dp, io, 1); - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - - if (!atomic_read(&io->count)) - break; - - io_schedule(); - } - set_current_state(TASK_RUNNING); + wait_for_completion_io(&wait); if (error_bits) *error_bits = io->error_bits; @@ -434,7 +428,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->sleeper = NULL; + io->wait = NULL; io->client = client; io->callback = fn; io->context = context; From 7a7a3b45fed9a144dbf766ee842a4c5d0632b81d Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Tue, 8 Jul 2014 00:55:14 +0000 Subject: [PATCH 4/4] dm mpath: fix IO hang due to logic bug in multipath_busy Commit e80991773 ("dm mpath: push back requests instead of queueing") modified multipath_busy() to return true if !pg_ready(). pg_ready() checks the current state of the multipath device and may return false even if a new IO is needed to change the state. Bart Van Assche reported that he had multipath IO lockup when he was performing cable pull tests. Analysis showed that the multipath device had a single path group with both paths active, but that the path group itself was not active. During the multipath device state transitions 'queue_io' got set but nothing could clear it. Clearing 'queue_io' only happens in __choose_pgpath(), but it won't be called if multipath_busy() returns true due to pg_ready() returning false when 'queue_io' is set. As such the !pg_ready() check in multipath_busy() is wrong because new IO will not be sent to multipath target and the multipath state change won't happen. That results in multipath IO lockup. The intent of multipath_busy() is to avoid unnecessary cycles of dequeue + request_fn + requeue if it is known that the multipath device will requeue. Such "busy" situations would be: - path group is being activated - there is no path and the multipath is setup to requeue if no path Fix multipath_busy() to return "busy" early only for these specific situations. Reported-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v3.15 --- drivers/md/dm-mpath.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3f6fd9d33ba3..f4167b013d99 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1611,8 +1611,9 @@ static int multipath_busy(struct dm_target *ti) spin_lock_irqsave(&m->lock, flags); - /* pg_init in progress, requeue until done */ - if (!pg_ready(m)) { + /* pg_init in progress or no paths available */ + if (m->pg_init_in_progress || + (!m->nr_valid_paths && m->queue_if_no_path)) { busy = 1; goto out; }