- Add a dm-crypt optional "high_priority" flag that enables the crypt

workqueues to use WQ_HIGHPRI.
 
 - Export dm-crypt workqueues via sysfs (by enabling WQ_SYSFS) to allow
   for improved visibility and controls over IO and crypt workqueues.
 
 - Fix dm-crypt to no longer constrain max_segment_size to PAGE_SIZE.
   This limit isn't needed given that the block core provides late bio
   splitting if bio exceeds underlying limits (e.g. max_segment_size).
 
 - Fix dm-crypt crypt_queue's use of WQ_UNBOUND to not use
   WQ_CPU_INTENSIVE because it is meaningless with WQ_UNBOUND.
 
 - Fix various issues with dm-delay target (ranging from a resource
   teardown fix, a fix for hung task when using kthread mode, and other
   improvements that followed from code inspection).
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEJfWUX4UqZ4x1O2wixSPxCi2dA1oFAmZDiggACgkQxSPxCi2d
 A1oQrwf7BUHy7ehwCjRrVlFTteIlx0ULTpPxictakN/S+xZfcZUcbE20OjNVzdk9
 m5dx4Gn557rlMkiC4NDlHazVEVM5BbTpih27rvUgvX2nchUUdfHIT1OvU0isT4Yi
 h9g/o7i9DnBPjvyNjpXjP9YE7Xg8u2X9mxpv8DyU5M+QpFuofwzsfkCP7g14B0g2
 btGxT3AZ5Bo8A/csKeSqHq13Nbq/dcAZZ3IvjIg1xSXjm6CoQ04rfO0TN6SKfsFJ
 GXteBS2JT1MMcXf3oKweAeQduTE+psVFea7gt/8haKFldnV+DpPNg1gU/7rza5Os
 eL1+L1iPY5fuEJIkaqPjBVRGkxQqHg==
 =+OhH
 -----END PGP SIGNATURE-----

Merge tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Add a dm-crypt optional "high_priority" flag that enables the crypt
   workqueues to use WQ_HIGHPRI.

 - Export dm-crypt workqueues via sysfs (by enabling WQ_SYSFS) to allow
   for improved visibility and controls over IO and crypt workqueues.

 - Fix dm-crypt to no longer constrain max_segment_size to PAGE_SIZE.
   This limit isn't needed given that the block core provides late bio
   splitting if bio exceeds underlying limits (e.g. max_segment_size).

 - Fix dm-crypt crypt_queue's use of WQ_UNBOUND to not use
   WQ_CPU_INTENSIVE because it is meaningless with WQ_UNBOUND.

 - Fix various issues with dm-delay target (ranging from a resource
   teardown fix, a fix for hung task when using kthread mode, and other
   improvements that followed from code inspection).

* tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm-delay: remove timer_lock
  dm-delay: change locking to avoid contention
  dm-delay: fix max_delay calculations
  dm-delay: fix hung task introduced by kthread mode
  dm-delay: fix workqueue delay_timer race
  dm-crypt: don't set WQ_CPU_INTENSIVE for WQ_UNBOUND crypt_queue
  dm: use queue_limits_set
  dm-crypt: stop constraining max_segment_size to PAGE_SIZE
  dm-crypt: export sysfs of all workqueues
  dm-crypt: add the optional "high_priority" flag
This commit is contained in:
Linus Torvalds 2024-05-14 18:34:19 -07:00
commit 4f8b6f25eb
4 changed files with 97 additions and 68 deletions

View File

@ -113,6 +113,11 @@ same_cpu_crypt
The default is to use an unbound workqueue so that encryption work
is automatically balanced between available CPUs.
high_priority
Set dm-crypt workqueues and the writer thread to high priority. This
improves throughput and latency of dm-crypt while degrading general
responsiveness of the system.
submit_from_crypt_cpus
Disable offloading writes to a separate thread after encryption.
There are some situations where offloading write bios from the

View File

@ -47,6 +47,8 @@
#define DM_MSG_PREFIX "crypt"
static DEFINE_IDA(workqueue_ida);
/*
* context holding the current state of a multi-part conversion
*/
@ -137,9 +139,9 @@ struct iv_elephant_private {
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
DM_CRYPT_WRITE_INLINE };
DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };
enum cipher_flags {
CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */
@ -184,6 +186,7 @@ struct crypt_config {
struct crypto_aead **tfms_aead;
} cipher_tfm;
unsigned int tfms_count;
int workqueue_id;
unsigned long cipher_flags;
/*
@ -1653,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations (but only because
* max_segment_size is being constrained to PAGE_SIZE).
* This should never violate the device limitations (but if it did then block
* core should split the bio as needed).
*
* This function may be called concurrently. If we allocate from the mempool
* concurrently, there is a possibility of deadlock. For example, if we have
@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);
if (cc->workqueue_id)
ida_free(&workqueue_ida, cc->workqueue_id);
crypt_free_tfms(cc);
bioset_exit(&cc->bs);
@ -3134,7 +3140,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
struct crypt_config *cc = ti->private;
struct dm_arg_set as;
static const struct dm_arg _args[] = {
{0, 8, "Invalid number of feature args"},
{0, 9, "Invalid number of feature args"},
};
unsigned int opt_params, val;
const char *opt_string, *sval;
@ -3161,6 +3167,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
else if (!strcasecmp(opt_string, "same_cpu_crypt"))
set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
else if (!strcasecmp(opt_string, "high_priority"))
set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
@ -3230,8 +3238,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
const char *devname = dm_table_device_name(ti->table);
int key_size;
int key_size, wq_id;
unsigned int align_mask;
unsigned int common_wq_flags;
unsigned long long tmpll;
int ret;
size_t iv_size_padding, additional_req_size;
@ -3398,20 +3407,38 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->tag_pool_max_sectors <<= cc->sector_shift;
}
wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
if (wq_id < 0) {
ti->error = "Couldn't get workqueue id";
ret = wq_id;
goto bad;
}
cc->workqueue_id = wq_id;
ret = -ENOMEM;
cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
common_wq_flags |= WQ_HIGHPRI;
cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus(), devname);
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
common_wq_flags | WQ_CPU_INTENSIVE,
1, devname, wq_id);
} else {
/*
* While crypt_queue is certainly CPU intensive, the use of
* WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
*/
cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
common_wq_flags | WQ_UNBOUND,
num_online_cpus(), devname, wq_id);
}
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
@ -3427,6 +3454,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Couldn't spawn write thread";
goto bad;
}
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
set_user_nice(cc->write_thread, MIN_NICE);
ti->num_flush_bios = 1;
ti->limit_swap_bios = true;
@ -3547,6 +3576,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
num_feature_args += !!ti->num_discard_bios;
num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
@ -3560,6 +3590,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" allow_discards");
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
DMEMIT(" same_cpu_crypt");
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
DMEMIT(" high_priority");
if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
DMEMIT(" submit_from_crypt_cpus");
if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
@ -3579,6 +3611,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT_TARGET_NAME_VERSION(ti->type);
DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
'y' : 'n');
DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
@ -3688,14 +3721,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct crypt_config *cc = ti->private;
/*
* Unfortunate constraint that is required to avoid the potential
* for exceeding underlying device's max_segments limits -- due to
* crypt_alloc_buffer() possibly allocating pages for the encryption
* bio that are not as physically contiguous as the original bio.
*/
limits->max_segment_size = PAGE_SIZE;
limits->logical_block_size =
max_t(unsigned int, limits->logical_block_size, cc->sector_size);
limits->physical_block_size =
@ -3706,7 +3731,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
.version = {1, 25, 0},
.version = {1, 26, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,

View File

@ -28,7 +28,8 @@ struct delay_class {
struct delay_c {
struct timer_list delay_timer;
struct mutex timer_lock;
struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
@ -49,8 +50,6 @@ struct dm_delay_info {
unsigned long expires;
};
static DEFINE_MUTEX(delayed_bios_lock);
static void handle_delayed_timer(struct timer_list *t)
{
struct delay_c *dc = from_timer(dc, t, delay_timer);
@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)
static void queue_timeout(struct delay_c *dc, unsigned long expires)
{
mutex_lock(&dc->timer_lock);
if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
mod_timer(&dc->delay_timer, expires);
mutex_unlock(&dc->timer_lock);
timer_reduce(&dc->delay_timer, expires);
}
static inline bool delay_is_fast(struct delay_c *dc)
@ -89,12 +83,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
struct bio_list flush_bio_list;
LIST_HEAD(local_list);
unsigned long next_expires = 0;
bool start_timer = false;
bio_list_init(&flush_bio_list);
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
mutex_lock(&dc->process_bios_lock);
spin_lock(&dc->delayed_bios_lock);
list_replace_init(&dc->delayed_bios, &local_list);
spin_unlock(&dc->delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &local_list, list) {
cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
@ -114,7 +112,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
}
}
}
mutex_unlock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
list_splice(&local_list, &dc->delayed_bios);
spin_unlock(&dc->delayed_bios_lock);
mutex_unlock(&dc->process_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
@ -128,13 +129,13 @@ static int flush_worker_fn(void *data)
while (!kthread_should_stop()) {
flush_delayed_bios(dc, false);
mutex_lock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
schedule();
} else {
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
cond_resched();
}
}
@ -154,8 +155,10 @@ static void delay_dtr(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
if (dc->kdelayd_wq)
if (dc->kdelayd_wq) {
timer_shutdown_sync(&dc->delay_timer);
destroy_workqueue(dc->kdelayd_wq);
}
if (dc->read.dev)
dm_put_device(ti, dc->read.dev);
@ -166,7 +169,7 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
mutex_destroy(&dc->timer_lock);
mutex_destroy(&dc->process_bios_lock);
kfree(dc);
}
@ -224,7 +227,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
mutex_init(&dc->timer_lock);
mutex_init(&dc->process_bios_lock);
spin_lock_init(&dc->delayed_bios_lock);
dc->may_delay = true;
dc->argc = argc;
@ -240,19 +244,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret)
goto bad;
max_delay = max(max_delay, dc->write.delay);
max_delay = max(max_delay, dc->flush.delay);
goto out;
}
ret = delay_class_ctr(ti, &dc->write, argv + 3);
if (ret)
goto bad;
max_delay = max(max_delay, dc->write.delay);
if (argc == 6) {
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret)
goto bad;
max_delay = max(max_delay, dc->flush.delay);
goto out;
}
@ -267,8 +270,7 @@ out:
* In case of small requested delays, use kthread instead of
* timers and workqueue to achieve better latency.
*/
dc->worker = kthread_create(&flush_worker_fn, dc,
"dm-delay-flush-worker");
dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
dc->worker = NULL;
@ -309,14 +311,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->context = dc;
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
mutex_lock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
if (unlikely(!dc->may_delay)) {
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
return DM_MAPIO_REMAPPED;
}
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
if (delay_is_fast(dc))
wake_up_process(dc->worker);
@ -330,12 +332,12 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
mutex_lock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
dc->may_delay = false;
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
if (!delay_is_fast(dc))
del_timer_sync(&dc->delay_timer);
timer_delete(&dc->delay_timer);
flush_delayed_bios(dc, true);
}

View File

@ -1963,26 +1963,27 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
bool wc = false, fua = false;
int r;
/*
* Copy table's limits to the DM device's request_queue
*/
q->limits = *limits;
if (dm_table_supports_nowait(t))
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
else
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
if (!dm_table_supports_discards(t)) {
q->limits.max_discard_sectors = 0;
q->limits.max_hw_discard_sectors = 0;
q->limits.discard_granularity = 0;
q->limits.discard_alignment = 0;
q->limits.discard_misaligned = 0;
limits->max_hw_discard_sectors = 0;
limits->discard_granularity = 0;
limits->discard_alignment = 0;
limits->discard_misaligned = 0;
}
if (!dm_table_supports_write_zeroes(t))
limits->max_write_zeroes_sectors = 0;
if (!dm_table_supports_secure_erase(t))
q->limits.max_secure_erase_sectors = 0;
limits->max_secure_erase_sectors = 0;
r = queue_limits_set(q, limits);
if (r)
return r;
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
@ -2007,9 +2008,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
if (!dm_table_supports_write_zeroes(t))
q->limits.max_write_zeroes_sectors = 0;
dm_table_verify_integrity(t);
/*
@ -2048,7 +2046,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
dm_update_crypto_profile(q, t);
disk_update_readahead(t->md->disk);
/*
* Check for request-based device is left to