2005-04-17 02:20:36 +04:00
/*
2014-06-24 22:27:04 +04:00
* Copyright ( C ) 2003 Jana Saout < jana @ saout . de >
2005-04-17 02:20:36 +04:00
* Copyright ( C ) 2004 Clemens Fruhwirth < clemens @ endorphin . org >
2020-01-03 11:20:22 +03:00
* Copyright ( C ) 2006 - 2020 Red Hat , Inc . All rights reserved .
* Copyright ( C ) 2013 - 2020 Milan Broz < gmazyland @ gmail . com >
2005-04-17 02:20:36 +04:00
*
* This file is released under the GPL .
*/
2008-02-08 05:11:09 +03:00
# include <linux/completion.h>
2006-08-22 14:29:17 +04:00
# include <linux/err.h>
2005-04-17 02:20:36 +04:00
# include <linux/module.h>
# include <linux/init.h>
# include <linux/kernel.h>
2016-11-21 17:58:51 +03:00
# include <linux/key.h>
2005-04-17 02:20:36 +04:00
# include <linux/bio.h>
# include <linux/blkdev.h>
# include <linux/mempool.h>
# include <linux/slab.h>
# include <linux/crypto.h>
# include <linux/workqueue.h>
2015-02-13 16:25:59 +03:00
# include <linux/kthread.h>
2006-10-20 10:28:16 +04:00
# include <linux/backing-dev.h>
2011-07-27 03:09:06 +04:00
# include <linux/atomic.h>
2005-09-17 11:55:31 +04:00
# include <linux/scatterlist.h>
2015-02-13 16:27:41 +03:00
# include <linux/rbtree.h>
2016-12-01 20:20:52 +03:00
# include <linux/ctype.h>
2005-04-17 02:20:36 +04:00
# include <asm/page.h>
2006-09-03 02:56:39 +04:00
# include <asm/unaligned.h>
2011-01-13 22:59:55 +03:00
# include <crypto/hash.h>
# include <crypto/md5.h>
# include <crypto/algapi.h>
2016-01-24 16:16:36 +03:00
# include <crypto/skcipher.h>
2017-01-04 22:23:54 +03:00
# include <crypto/aead.h>
# include <crypto/authenc.h>
# include <linux/rtnetlink.h> /* for struct rtattr and RTA macros only */
2020-04-20 16:46:59 +03:00
# include <linux/key-type.h>
2016-11-21 17:58:51 +03:00
# include <keys/user-type.h>
2020-04-20 16:46:59 +03:00
# include <keys/encrypted-type.h>
2005-04-17 02:20:36 +04:00
2008-10-21 20:44:59 +04:00
# include <linux/device-mapper.h>
2005-04-17 02:20:36 +04:00
2006-06-26 11:27:35 +04:00
# define DM_MSG_PREFIX "crypt"
2005-04-17 02:20:36 +04:00
/*
* context holding the current state of a multi - part conversion
*/
struct convert_context {
2008-02-08 05:11:09 +03:00
struct completion restart ;
2005-04-17 02:20:36 +04:00
struct bio * bio_in ;
struct bio * bio_out ;
2013-10-12 02:45:43 +04:00
struct bvec_iter iter_in ;
struct bvec_iter iter_out ;
2018-11-05 10:31:42 +03:00
u64 cc_sector ;
2012-07-27 18:08:04 +04:00
atomic_t cc_pending ;
2017-01-04 22:23:54 +03:00
union {
struct skcipher_request * req ;
struct aead_request * req_aead ;
} r ;
2005-04-17 02:20:36 +04:00
} ;
2008-02-08 05:10:38 +03:00
/*
* per bio private data
*/
struct dm_crypt_io {
2012-07-27 18:08:05 +04:00
struct crypt_config * cc ;
2008-02-08 05:10:38 +03:00
struct bio * base_bio ;
2017-01-04 22:23:54 +03:00
u8 * integrity_metadata ;
bool integrity_metadata_from_pool ;
2008-02-08 05:10:38 +03:00
struct work_struct work ;
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
struct tasklet_struct tasklet ;
2008-02-08 05:10:38 +03:00
struct convert_context ctx ;
2012-07-27 18:08:04 +04:00
atomic_t io_pending ;
2017-06-03 10:38:06 +03:00
blk_status_t error ;
2008-02-08 05:10:54 +03:00
sector_t sector ;
2015-02-13 16:25:59 +03:00
2015-02-13 16:27:41 +03:00
struct rb_node rb_node ;
2014-03-28 23:51:55 +04:00
} CRYPTO_MINALIGN_ATTR ;
2008-02-08 05:10:38 +03:00
2008-02-08 05:11:04 +03:00
struct dm_crypt_request {
2009-03-16 20:44:33 +03:00
struct convert_context * ctx ;
2017-01-04 22:23:54 +03:00
struct scatterlist sg_in [ 4 ] ;
struct scatterlist sg_out [ 4 ] ;
2018-11-05 10:31:42 +03:00
u64 iv_sector ;
2008-02-08 05:11:04 +03:00
} ;
2005-04-17 02:20:36 +04:00
struct crypt_config ;
struct crypt_iv_operations {
int ( * ctr ) ( struct crypt_config * cc , struct dm_target * ti ,
2007-10-20 01:42:37 +04:00
const char * opts ) ;
2005-04-17 02:20:36 +04:00
void ( * dtr ) ( struct crypt_config * cc ) ;
2009-12-11 02:51:56 +03:00
int ( * init ) ( struct crypt_config * cc ) ;
2009-12-11 02:51:57 +03:00
int ( * wipe ) ( struct crypt_config * cc ) ;
2011-01-13 22:59:54 +03:00
int ( * generator ) ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq ) ;
int ( * post ) ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq ) ;
2005-04-17 02:20:36 +04:00
} ;
2009-12-11 02:51:55 +03:00
struct iv_benbi_private {
int shift ;
} ;
2011-01-13 22:59:55 +03:00
# define LMK_SEED_SIZE 64 /* hash + 0 */
struct iv_lmk_private {
struct crypto_shash * hash_tfm ;
u8 * seed ;
} ;
2013-10-29 02:21:04 +04:00
# define TCW_WHITENING_SIZE 16
struct iv_tcw_private {
struct crypto_shash * crc32_tfm ;
u8 * iv_seed ;
u8 * whitening ;
} ;
2020-01-03 11:20:22 +03:00
# define ELEPHANT_MAX_KEY_SIZE 32
struct iv_elephant_private {
struct crypto_skcipher * tfm ;
} ;
2005-04-17 02:20:36 +04:00
/*
* Crypt : maps a linear range of a block device
* and encrypts / decrypts at the same time .
*/
2015-02-13 16:27:08 +03:00
enum flags { DM_CRYPT_SUSPENDED , DM_CRYPT_KEY_VALID ,
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
DM_CRYPT_SAME_CPU , DM_CRYPT_NO_OFFLOAD ,
2020-07-08 12:28:08 +03:00
DM_CRYPT_NO_READ_WORKQUEUE , DM_CRYPT_NO_WRITE_WORKQUEUE ,
DM_CRYPT_WRITE_INLINE } ;
2011-01-13 22:59:53 +03:00
2017-01-04 22:23:54 +03:00
enum cipher_flags {
CRYPT_MODE_INTEGRITY_AEAD , /* Use authenticated mode for cihper */
2017-03-16 17:39:44 +03:00
CRYPT_IV_LARGE_SECTORS , /* Calculate IV from sector_size, not 512B sectors */
2020-01-03 11:20:22 +03:00
CRYPT_ENCRYPT_PREPROCESS , /* Must preprocess data for encryption (elephant) */
2017-01-04 22:23:54 +03:00
} ;
2011-01-13 22:59:53 +03:00
/*
2014-02-21 03:01:01 +04:00
* The fields in here must be read only after initialization .
2011-01-13 22:59:53 +03:00
*/
2005-04-17 02:20:36 +04:00
struct crypt_config {
struct dm_dev * dev ;
sector_t start ;
2017-08-14 05:45:08 +03:00
struct percpu_counter n_allocated_pages ;
2007-10-20 01:38:58 +04:00
struct workqueue_struct * io_queue ;
struct workqueue_struct * crypt_queue ;
2008-03-29 00:16:07 +03:00
2018-07-11 19:10:51 +03:00
spinlock_t write_thread_lock ;
2018-05-23 01:26:20 +03:00
struct task_struct * write_thread ;
2015-02-13 16:27:41 +03:00
struct rb_root write_tree ;
2015-02-13 16:25:59 +03:00
2011-01-13 22:59:52 +03:00
char * cipher_string ;
2017-01-04 22:23:54 +03:00
char * cipher_auth ;
2016-11-21 17:58:51 +03:00
char * key_string ;
2010-08-12 07:14:07 +04:00
2015-11-29 16:09:19 +03:00
const struct crypt_iv_operations * iv_gen_ops ;
2006-12-06 00:41:52 +03:00
union {
2009-12-11 02:51:55 +03:00
struct iv_benbi_private benbi ;
2011-01-13 22:59:55 +03:00
struct iv_lmk_private lmk ;
2013-10-29 02:21:04 +04:00
struct iv_tcw_private tcw ;
2020-01-03 11:20:22 +03:00
struct iv_elephant_private elephant ;
2006-12-06 00:41:52 +03:00
} iv_gen_private ;
2018-11-05 10:31:42 +03:00
u64 iv_offset ;
2005-04-17 02:20:36 +04:00
unsigned int iv_size ;
2017-03-23 17:23:14 +03:00
unsigned short int sector_size ;
unsigned char sector_shift ;
2005-04-17 02:20:36 +04:00
2017-01-04 22:23:54 +03:00
union {
struct crypto_skcipher * * tfms ;
struct crypto_aead * * tfms_aead ;
} cipher_tfm ;
2011-01-13 22:59:54 +03:00
unsigned tfms_count ;
2017-01-04 22:23:54 +03:00
unsigned long cipher_flags ;
2011-01-13 22:59:53 +03:00
2008-02-08 05:11:07 +03:00
/*
* Layout of each crypto request :
*
2016-01-24 16:16:36 +03:00
* struct skcipher_request
2008-02-08 05:11:07 +03:00
* context
* padding
* struct dm_crypt_request
* padding
* IV
*
* The padding is added so that dm_crypt_request and the IV are
* correctly aligned .
*/
unsigned int dmreq_start ;
2014-03-28 23:51:55 +04:00
unsigned int per_bio_data_size ;
2006-10-03 12:15:37 +04:00
unsigned long flags ;
2005-04-17 02:20:36 +04:00
unsigned int key_size ;
2013-10-29 02:21:03 +04:00
unsigned int key_parts ; /* independent parts in key buffer */
unsigned int key_extra_size ; /* additional keys length */
2017-01-04 22:23:54 +03:00
unsigned int key_mac_size ; /* MAC key size for authenc(...) */
unsigned int integrity_tag_size ;
unsigned int integrity_iv_size ;
unsigned int on_disk_tag_size ;
2018-05-23 01:26:20 +03:00
/*
* pool for per bio private data , crypto requests ,
* encryption requeusts / buffer pages and integrity tags
*/
unsigned tag_pool_max_sectors ;
mempool_t tag_pool ;
mempool_t req_pool ;
mempool_t page_pool ;
struct bio_set bs ;
struct mutex bio_alloc_lock ;
2017-01-04 22:23:54 +03:00
u8 * authenc_key ; /* space for keys in authenc() format (if used) */
2020-05-07 21:51:58 +03:00
u8 key [ ] ;
2005-04-17 02:20:36 +04:00
} ;
2017-01-04 22:23:54 +03:00
# define MIN_IOS 64
# define MAX_TAG_SIZE 480
# define POOL_ENTRY_SIZE 512
2005-04-17 02:20:36 +04:00
2017-08-14 05:45:08 +03:00
static DEFINE_SPINLOCK ( dm_crypt_clients_lock ) ;
static unsigned dm_crypt_clients_n = 0 ;
static volatile unsigned long dm_crypt_pages_per_client ;
# define DM_CRYPT_MEMORY_PERCENT 2
# define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16)
2007-07-12 20:26:32 +04:00
static void clone_init ( struct dm_crypt_io * , struct bio * ) ;
2008-02-08 05:10:52 +03:00
static void kcryptd_queue_crypt ( struct dm_crypt_io * io ) ;
2017-01-04 22:23:54 +03:00
static struct scatterlist * crypt_get_sg_data ( struct crypt_config * cc ,
struct scatterlist * sg ) ;
2007-05-09 13:32:52 +04:00
2020-02-13 07:11:26 +03:00
static bool crypt_integrity_aead ( struct crypt_config * cc ) ;
2011-01-13 22:59:53 +03:00
/*
2017-03-31 08:18:48 +03:00
* Use this to access cipher attributes that are independent of the key .
2011-01-13 22:59:53 +03:00
*/
2016-01-24 16:16:36 +03:00
static struct crypto_skcipher * any_tfm ( struct crypt_config * cc )
2011-01-13 22:59:53 +03:00
{
2017-01-04 22:23:54 +03:00
return cc - > cipher_tfm . tfms [ 0 ] ;
}
static struct crypto_aead * any_tfm_aead ( struct crypt_config * cc )
{
return cc - > cipher_tfm . tfms_aead [ 0 ] ;
2011-01-13 22:59:53 +03:00
}
2005-04-17 02:20:36 +04:00
/*
* Different IV generation algorithms :
*
2006-09-02 12:17:33 +04:00
* plain : the initial vector is the 32 - bit little - endian version of the sector
2007-10-20 01:10:43 +04:00
* number , padded with zeros if necessary .
2005-04-17 02:20:36 +04:00
*
2009-12-11 02:52:25 +03:00
* plain64 : the initial vector is the 64 - bit little - endian version of the sector
* number , padded with zeros if necessary .
*
2017-06-06 10:07:01 +03:00
* plain64be : the initial vector is the 64 - bit big - endian version of the sector
* number , padded with zeros if necessary .
*
2006-09-02 12:17:33 +04:00
* essiv : " encrypted sector|salt initial vector " , the sector number is
* encrypted with the bulk cipher using a salt as key . The salt
* should be derived from the bulk cipher ' s key via hashing .
2005-04-17 02:20:36 +04:00
*
2006-09-03 02:56:39 +04:00
* benbi : the 64 - bit " big-endian 'narrow block'-count " , starting at 1
* ( needed for LRW - 32 - AES and possible other narrow block modes )
*
2007-05-09 13:32:55 +04:00
* null : the initial vector is always zero . Provides compatibility with
* obsolete loop_fish2 devices . Do not use for new devices .
*
2011-01-13 22:59:55 +03:00
* lmk : Compatible implementation of the block chaining mode used
* by the Loop - AES block device encryption system
* designed by Jari Ruusu . See http : //loop-aes.sourceforge.net/
* It operates on full 512 byte sectors and uses CBC
* with an IV derived from the sector number , the data and
* optionally extra IV seed .
* This means that after decryption the first block
* of sector must be tweaked according to decrypted data .
* Loop - AES can use three encryption schemes :
* version 1 : is plain aes - cbc mode
* version 2 : uses 64 multikey scheme with lmk IV generator
* version 3 : the same as version 2 with additional IV seed
* ( it uses 65 keys , last key is used as IV seed )
*
2013-10-29 02:21:04 +04:00
* tcw : Compatible implementation of the block chaining mode used
* by the TrueCrypt device encryption system ( prior to version 4.1 ) .
2015-04-05 19:03:10 +03:00
* For more info see : https : //gitlab.com/cryptsetup/cryptsetup/wikis/TrueCryptOnDiskFormat
2013-10-29 02:21:04 +04:00
* It operates on full 512 byte sectors and uses CBC
* with an IV derived from initial key and the sector number .
* In addition , whitening value is applied on every sector , whitening
* is calculated from initial key , sector number and mixed using CRC32 .
* Note that this encryption scheme is vulnerable to watermarking attacks
* and should be used for old compatible containers access only .
2019-07-09 16:22:14 +03:00
*
* eboiv : Encrypted byte - offset IV ( used in Bitlocker in CBC mode )
* The IV is encrypted little - endian byte - offset ( with the same key
* and cipher as the volume ) .
2020-01-03 11:20:22 +03:00
*
* elephant : The extended version of eboiv with additional Elephant diffuser
* used with Bitlocker CBC mode .
* This mode was used in older Windows systems
2020-06-27 13:31:38 +03:00
* https : //download.microsoft.com/download/0/2/3/0238acaf-d3bf-4a6d-b3d6-0a0be4bbb36e/bitlockercipher200608.pdf
2005-04-17 02:20:36 +04:00
*/
2011-01-13 22:59:54 +03:00
static int crypt_iv_plain_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
2005-04-17 02:20:36 +04:00
{
memset ( iv , 0 , cc - > iv_size ) ;
2011-08-02 15:32:01 +04:00
* ( __le32 * ) iv = cpu_to_le32 ( dmreq - > iv_sector & 0xffffffff ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2009-12-11 02:52:25 +03:00
static int crypt_iv_plain64_gen ( struct crypt_config * cc , u8 * iv ,
2011-01-13 22:59:54 +03:00
struct dm_crypt_request * dmreq )
2009-12-11 02:52:25 +03:00
{
memset ( iv , 0 , cc - > iv_size ) ;
2011-08-02 15:32:01 +04:00
* ( __le64 * ) iv = cpu_to_le64 ( dmreq - > iv_sector ) ;
2009-12-11 02:52:25 +03:00
return 0 ;
}
2017-06-06 10:07:01 +03:00
static int crypt_iv_plain64be_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
memset ( iv , 0 , cc - > iv_size ) ;
/* iv_size is at least of size u64; usually it is 16 bytes */
* ( __be64 * ) & iv [ cc - > iv_size - sizeof ( u64 ) ] = cpu_to_be64 ( dmreq - > iv_sector ) ;
return 0 ;
}
2011-01-13 22:59:54 +03:00
static int crypt_iv_essiv_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
2005-04-17 02:20:36 +04:00
{
2019-08-19 17:17:37 +03:00
/*
* ESSIV encryption of the IV is now handled by the crypto API ,
* so just pass the plain sector number here .
*/
2005-04-17 02:20:36 +04:00
memset ( iv , 0 , cc - > iv_size ) ;
2011-08-02 15:32:01 +04:00
* ( __le64 * ) iv = cpu_to_le64 ( dmreq - > iv_sector ) ;
2011-01-13 22:59:53 +03:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
2006-09-03 02:56:39 +04:00
static int crypt_iv_benbi_ctr ( struct crypt_config * cc , struct dm_target * ti ,
const char * opts )
{
2020-01-06 12:11:47 +03:00
unsigned bs ;
int log ;
2020-02-13 07:11:26 +03:00
if ( crypt_integrity_aead ( cc ) )
2020-01-06 12:11:47 +03:00
bs = crypto_aead_blocksize ( any_tfm_aead ( cc ) ) ;
else
bs = crypto_skcipher_blocksize ( any_tfm ( cc ) ) ;
log = ilog2 ( bs ) ;
2006-09-03 02:56:39 +04:00
/* we need to calculate how far we must shift the sector count
* to get the cipher block count , we use this shift in _gen */
if ( 1 < < log ! = bs ) {
ti - > error = " cypher blocksize is not a power of 2 " ;
return - EINVAL ;
}
if ( log > 9 ) {
ti - > error = " cypher blocksize is > 512 " ;
return - EINVAL ;
}
2009-12-11 02:51:55 +03:00
cc - > iv_gen_private . benbi . shift = 9 - log ;
2006-09-03 02:56:39 +04:00
return 0 ;
}
static void crypt_iv_benbi_dtr ( struct crypt_config * cc )
{
}
2011-01-13 22:59:54 +03:00
static int crypt_iv_benbi_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
2006-09-03 02:56:39 +04:00
{
2006-12-06 00:41:52 +03:00
__be64 val ;
2006-09-03 02:56:39 +04:00
memset ( iv , 0 , cc - > iv_size - sizeof ( u64 ) ) ; /* rest is cleared below */
2006-12-06 00:41:52 +03:00
2011-01-13 22:59:54 +03:00
val = cpu_to_be64 ( ( ( u64 ) dmreq - > iv_sector < < cc - > iv_gen_private . benbi . shift ) + 1 ) ;
2006-12-06 00:41:52 +03:00
put_unaligned ( val , ( __be64 * ) ( iv + cc - > iv_size - sizeof ( u64 ) ) ) ;
2006-09-03 02:56:39 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
2011-01-13 22:59:54 +03:00
static int crypt_iv_null_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
2007-05-09 13:32:55 +04:00
{
memset ( iv , 0 , cc - > iv_size ) ;
return 0 ;
}
2011-01-13 22:59:55 +03:00
static void crypt_iv_lmk_dtr ( struct crypt_config * cc )
{
struct iv_lmk_private * lmk = & cc - > iv_gen_private . lmk ;
if ( lmk - > hash_tfm & & ! IS_ERR ( lmk - > hash_tfm ) )
crypto_free_shash ( lmk - > hash_tfm ) ;
lmk - > hash_tfm = NULL ;
2020-08-07 09:18:13 +03:00
kfree_sensitive ( lmk - > seed ) ;
2011-01-13 22:59:55 +03:00
lmk - > seed = NULL ;
}
static int crypt_iv_lmk_ctr ( struct crypt_config * cc , struct dm_target * ti ,
const char * opts )
{
struct iv_lmk_private * lmk = & cc - > iv_gen_private . lmk ;
2017-03-16 17:39:44 +03:00
if ( cc - > sector_size ! = ( 1 < < SECTOR_SHIFT ) ) {
ti - > error = " Unsupported sector size for LMK " ;
return - EINVAL ;
}
2020-07-10 09:20:42 +03:00
lmk - > hash_tfm = crypto_alloc_shash ( " md5 " , 0 ,
CRYPTO_ALG_ALLOCATES_MEMORY ) ;
2011-01-13 22:59:55 +03:00
if ( IS_ERR ( lmk - > hash_tfm ) ) {
ti - > error = " Error initializing LMK hash " ;
return PTR_ERR ( lmk - > hash_tfm ) ;
}
/* No seed in LMK version 2 */
if ( cc - > key_parts = = cc - > tfms_count ) {
lmk - > seed = NULL ;
return 0 ;
}
lmk - > seed = kzalloc ( LMK_SEED_SIZE , GFP_KERNEL ) ;
if ( ! lmk - > seed ) {
crypt_iv_lmk_dtr ( cc ) ;
ti - > error = " Error kmallocing seed storage in LMK " ;
return - ENOMEM ;
}
return 0 ;
}
static int crypt_iv_lmk_init ( struct crypt_config * cc )
{
struct iv_lmk_private * lmk = & cc - > iv_gen_private . lmk ;
int subkey_size = cc - > key_size / cc - > key_parts ;
/* LMK seed is on the position of LMK_KEYS + 1 key */
if ( lmk - > seed )
memcpy ( lmk - > seed , cc - > key + ( cc - > tfms_count * subkey_size ) ,
crypto_shash_digestsize ( lmk - > hash_tfm ) ) ;
return 0 ;
}
static int crypt_iv_lmk_wipe ( struct crypt_config * cc )
{
struct iv_lmk_private * lmk = & cc - > iv_gen_private . lmk ;
if ( lmk - > seed )
memset ( lmk - > seed , 0 , LMK_SEED_SIZE ) ;
return 0 ;
}
static int crypt_iv_lmk_one ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq ,
u8 * data )
{
struct iv_lmk_private * lmk = & cc - > iv_gen_private . lmk ;
2012-07-02 15:50:54 +04:00
SHASH_DESC_ON_STACK ( desc , lmk - > hash_tfm ) ;
2011-01-13 22:59:55 +03:00
struct md5_state md5state ;
2013-10-29 02:21:03 +04:00
__le32 buf [ 4 ] ;
2011-01-13 22:59:55 +03:00
int i , r ;
2012-07-02 15:50:54 +04:00
desc - > tfm = lmk - > hash_tfm ;
2011-01-13 22:59:55 +03:00
2012-07-02 15:50:54 +04:00
r = crypto_shash_init ( desc ) ;
2011-01-13 22:59:55 +03:00
if ( r )
return r ;
if ( lmk - > seed ) {
2012-07-02 15:50:54 +04:00
r = crypto_shash_update ( desc , lmk - > seed , LMK_SEED_SIZE ) ;
2011-01-13 22:59:55 +03:00
if ( r )
return r ;
}
/* Sector is always 512B, block size 16, add data of blocks 1-31 */
2012-07-02 15:50:54 +04:00
r = crypto_shash_update ( desc , data + 16 , 16 * 31 ) ;
2011-01-13 22:59:55 +03:00
if ( r )
return r ;
/* Sector is cropped to 56 bits here */
buf [ 0 ] = cpu_to_le32 ( dmreq - > iv_sector & 0xFFFFFFFF ) ;
buf [ 1 ] = cpu_to_le32 ( ( ( ( u64 ) dmreq - > iv_sector > > 32 ) & 0x00FFFFFF ) | 0x80000000 ) ;
buf [ 2 ] = cpu_to_le32 ( 4024 ) ;
buf [ 3 ] = 0 ;
2012-07-02 15:50:54 +04:00
r = crypto_shash_update ( desc , ( u8 * ) buf , sizeof ( buf ) ) ;
2011-01-13 22:59:55 +03:00
if ( r )
return r ;
/* No MD5 padding here */
2012-07-02 15:50:54 +04:00
r = crypto_shash_export ( desc , & md5state ) ;
2011-01-13 22:59:55 +03:00
if ( r )
return r ;
for ( i = 0 ; i < MD5_HASH_WORDS ; i + + )
__cpu_to_le32s ( & md5state . hash [ i ] ) ;
memcpy ( iv , & md5state . hash , cc - > iv_size ) ;
return 0 ;
}
static int crypt_iv_lmk_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
2017-01-04 22:23:54 +03:00
struct scatterlist * sg ;
2011-01-13 22:59:55 +03:00
u8 * src ;
int r = 0 ;
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) = = WRITE ) {
2017-01-04 22:23:54 +03:00
sg = crypt_get_sg_data ( cc , dmreq - > sg_in ) ;
src = kmap_atomic ( sg_page ( sg ) ) ;
r = crypt_iv_lmk_one ( cc , iv , dmreq , src + sg - > offset ) ;
2011-11-28 09:26:02 +04:00
kunmap_atomic ( src ) ;
2011-01-13 22:59:55 +03:00
} else
memset ( iv , 0 , cc - > iv_size ) ;
return r ;
}
static int crypt_iv_lmk_post ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
2017-01-04 22:23:54 +03:00
struct scatterlist * sg ;
2011-01-13 22:59:55 +03:00
u8 * dst ;
int r ;
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) = = WRITE )
return 0 ;
2017-01-04 22:23:54 +03:00
sg = crypt_get_sg_data ( cc , dmreq - > sg_out ) ;
dst = kmap_atomic ( sg_page ( sg ) ) ;
r = crypt_iv_lmk_one ( cc , iv , dmreq , dst + sg - > offset ) ;
2011-01-13 22:59:55 +03:00
/* Tweak the first block of plaintext sector */
if ( ! r )
2017-01-04 22:23:54 +03:00
crypto_xor ( dst + sg - > offset , iv , cc - > iv_size ) ;
2011-01-13 22:59:55 +03:00
2011-11-28 09:26:02 +04:00
kunmap_atomic ( dst ) ;
2011-01-13 22:59:55 +03:00
return r ;
}
2013-10-29 02:21:04 +04:00
static void crypt_iv_tcw_dtr ( struct crypt_config * cc )
{
struct iv_tcw_private * tcw = & cc - > iv_gen_private . tcw ;
2020-08-07 09:18:13 +03:00
kfree_sensitive ( tcw - > iv_seed ) ;
2013-10-29 02:21:04 +04:00
tcw - > iv_seed = NULL ;
2020-08-07 09:18:13 +03:00
kfree_sensitive ( tcw - > whitening ) ;
2013-10-29 02:21:04 +04:00
tcw - > whitening = NULL ;
if ( tcw - > crc32_tfm & & ! IS_ERR ( tcw - > crc32_tfm ) )
crypto_free_shash ( tcw - > crc32_tfm ) ;
tcw - > crc32_tfm = NULL ;
}
static int crypt_iv_tcw_ctr ( struct crypt_config * cc , struct dm_target * ti ,
const char * opts )
{
struct iv_tcw_private * tcw = & cc - > iv_gen_private . tcw ;
2017-03-16 17:39:44 +03:00
if ( cc - > sector_size ! = ( 1 < < SECTOR_SHIFT ) ) {
ti - > error = " Unsupported sector size for TCW " ;
return - EINVAL ;
}
2013-10-29 02:21:04 +04:00
if ( cc - > key_size < = ( cc - > iv_size + TCW_WHITENING_SIZE ) ) {
ti - > error = " Wrong key size for TCW " ;
return - EINVAL ;
}
2020-07-10 09:20:42 +03:00
tcw - > crc32_tfm = crypto_alloc_shash ( " crc32 " , 0 ,
CRYPTO_ALG_ALLOCATES_MEMORY ) ;
2013-10-29 02:21:04 +04:00
if ( IS_ERR ( tcw - > crc32_tfm ) ) {
ti - > error = " Error initializing CRC32 in TCW " ;
return PTR_ERR ( tcw - > crc32_tfm ) ;
}
tcw - > iv_seed = kzalloc ( cc - > iv_size , GFP_KERNEL ) ;
tcw - > whitening = kzalloc ( TCW_WHITENING_SIZE , GFP_KERNEL ) ;
if ( ! tcw - > iv_seed | | ! tcw - > whitening ) {
crypt_iv_tcw_dtr ( cc ) ;
ti - > error = " Error allocating seed storage in TCW " ;
return - ENOMEM ;
}
return 0 ;
}
static int crypt_iv_tcw_init ( struct crypt_config * cc )
{
struct iv_tcw_private * tcw = & cc - > iv_gen_private . tcw ;
int key_offset = cc - > key_size - cc - > iv_size - TCW_WHITENING_SIZE ;
memcpy ( tcw - > iv_seed , & cc - > key [ key_offset ] , cc - > iv_size ) ;
memcpy ( tcw - > whitening , & cc - > key [ key_offset + cc - > iv_size ] ,
TCW_WHITENING_SIZE ) ;
return 0 ;
}
static int crypt_iv_tcw_wipe ( struct crypt_config * cc )
{
struct iv_tcw_private * tcw = & cc - > iv_gen_private . tcw ;
memset ( tcw - > iv_seed , 0 , cc - > iv_size ) ;
memset ( tcw - > whitening , 0 , TCW_WHITENING_SIZE ) ;
return 0 ;
}
static int crypt_iv_tcw_whitening ( struct crypt_config * cc ,
struct dm_crypt_request * dmreq ,
u8 * data )
{
struct iv_tcw_private * tcw = & cc - > iv_gen_private . tcw ;
2016-06-28 17:32:32 +03:00
__le64 sector = cpu_to_le64 ( dmreq - > iv_sector ) ;
2013-10-29 02:21:04 +04:00
u8 buf [ TCW_WHITENING_SIZE ] ;
2012-07-02 15:50:54 +04:00
SHASH_DESC_ON_STACK ( desc , tcw - > crc32_tfm ) ;
2013-10-29 02:21:04 +04:00
int i , r ;
/* xor whitening with sector number */
crypto: algapi - make crypto_xor() take separate dst and src arguments
There are quite a number of occurrences in the kernel of the pattern
if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
or
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
where crypto_xor() is preceded or followed by a memcpy() invocation
that is only there because crypto_xor() uses its output parameter as
one of the inputs. To avoid having to add new instances of this pattern
in the arm64 code, which will be refactored to implement non-SIMD
fallbacks, add an alternative implementation called crypto_xor_cpy(),
taking separate input and output arguments. This removes the need for
the separate memcpy().
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-07-24 13:28:04 +03:00
crypto_xor_cpy ( buf , tcw - > whitening , ( u8 * ) & sector , 8 ) ;
crypto_xor_cpy ( & buf [ 8 ] , tcw - > whitening + 8 , ( u8 * ) & sector , 8 ) ;
2013-10-29 02:21:04 +04:00
/* calculate crc32 for every 32bit part and xor it */
2012-07-02 15:50:54 +04:00
desc - > tfm = tcw - > crc32_tfm ;
2013-10-29 02:21:04 +04:00
for ( i = 0 ; i < 4 ; i + + ) {
2012-07-02 15:50:54 +04:00
r = crypto_shash_init ( desc ) ;
2013-10-29 02:21:04 +04:00
if ( r )
goto out ;
2012-07-02 15:50:54 +04:00
r = crypto_shash_update ( desc , & buf [ i * 4 ] , 4 ) ;
2013-10-29 02:21:04 +04:00
if ( r )
goto out ;
2012-07-02 15:50:54 +04:00
r = crypto_shash_final ( desc , & buf [ i * 4 ] ) ;
2013-10-29 02:21:04 +04:00
if ( r )
goto out ;
}
crypto_xor ( & buf [ 0 ] , & buf [ 12 ] , 4 ) ;
crypto_xor ( & buf [ 4 ] , & buf [ 8 ] , 4 ) ;
/* apply whitening (8 bytes) to whole sector */
for ( i = 0 ; i < ( ( 1 < < SECTOR_SHIFT ) / 8 ) ; i + + )
crypto_xor ( data + i * 8 , buf , 8 ) ;
out :
2014-11-22 11:36:04 +03:00
memzero_explicit ( buf , sizeof ( buf ) ) ;
2013-10-29 02:21:04 +04:00
return r ;
}
static int crypt_iv_tcw_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
2017-01-04 22:23:54 +03:00
struct scatterlist * sg ;
2013-10-29 02:21:04 +04:00
struct iv_tcw_private * tcw = & cc - > iv_gen_private . tcw ;
2016-06-28 17:32:32 +03:00
__le64 sector = cpu_to_le64 ( dmreq - > iv_sector ) ;
2013-10-29 02:21:04 +04:00
u8 * src ;
int r = 0 ;
/* Remove whitening from ciphertext */
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) ! = WRITE ) {
2017-01-04 22:23:54 +03:00
sg = crypt_get_sg_data ( cc , dmreq - > sg_in ) ;
src = kmap_atomic ( sg_page ( sg ) ) ;
r = crypt_iv_tcw_whitening ( cc , dmreq , src + sg - > offset ) ;
2013-10-29 02:21:04 +04:00
kunmap_atomic ( src ) ;
}
/* Calculate IV */
crypto: algapi - make crypto_xor() take separate dst and src arguments
There are quite a number of occurrences in the kernel of the pattern
if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
or
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
where crypto_xor() is preceded or followed by a memcpy() invocation
that is only there because crypto_xor() uses its output parameter as
one of the inputs. To avoid having to add new instances of this pattern
in the arm64 code, which will be refactored to implement non-SIMD
fallbacks, add an alternative implementation called crypto_xor_cpy(),
taking separate input and output arguments. This removes the need for
the separate memcpy().
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-07-24 13:28:04 +03:00
crypto_xor_cpy ( iv , tcw - > iv_seed , ( u8 * ) & sector , 8 ) ;
2013-10-29 02:21:04 +04:00
if ( cc - > iv_size > 8 )
crypto: algapi - make crypto_xor() take separate dst and src arguments
There are quite a number of occurrences in the kernel of the pattern
if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
or
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
where crypto_xor() is preceded or followed by a memcpy() invocation
that is only there because crypto_xor() uses its output parameter as
one of the inputs. To avoid having to add new instances of this pattern
in the arm64 code, which will be refactored to implement non-SIMD
fallbacks, add an alternative implementation called crypto_xor_cpy(),
taking separate input and output arguments. This removes the need for
the separate memcpy().
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-07-24 13:28:04 +03:00
crypto_xor_cpy ( & iv [ 8 ] , tcw - > iv_seed + 8 , ( u8 * ) & sector ,
cc - > iv_size - 8 ) ;
2013-10-29 02:21:04 +04:00
return r ;
}
static int crypt_iv_tcw_post ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
2017-01-04 22:23:54 +03:00
struct scatterlist * sg ;
2013-10-29 02:21:04 +04:00
u8 * dst ;
int r ;
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) ! = WRITE )
return 0 ;
/* Apply whitening on ciphertext */
2017-01-04 22:23:54 +03:00
sg = crypt_get_sg_data ( cc , dmreq - > sg_out ) ;
dst = kmap_atomic ( sg_page ( sg ) ) ;
r = crypt_iv_tcw_whitening ( cc , dmreq , dst + sg - > offset ) ;
2013-10-29 02:21:04 +04:00
kunmap_atomic ( dst ) ;
return r ;
}
2017-01-04 22:23:54 +03:00
static int crypt_iv_random_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
/* Used only for writes, there must be an additional space to store IV */
get_random_bytes ( iv , cc - > iv_size ) ;
return 0 ;
}
2019-07-09 16:22:14 +03:00
static int crypt_iv_eboiv_ctr ( struct crypt_config * cc , struct dm_target * ti ,
const char * opts )
{
2020-02-13 07:11:26 +03:00
if ( crypt_integrity_aead ( cc ) ) {
2019-08-07 08:50:22 +03:00
ti - > error = " AEAD transforms not supported for EBOIV " ;
return - EINVAL ;
2019-07-09 16:22:14 +03:00
}
2019-08-07 08:50:22 +03:00
if ( crypto_skcipher_blocksize ( any_tfm ( cc ) ) ! = cc - > iv_size ) {
2019-07-09 16:22:14 +03:00
ti - > error = " Block size of EBOIV cipher does "
" not match IV size of block cipher " ;
return - EINVAL ;
}
return 0 ;
}
2019-08-07 08:50:22 +03:00
static int crypt_iv_eboiv_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
2019-07-09 16:22:14 +03:00
{
2019-08-07 08:50:22 +03:00
u8 buf [ MAX_CIPHER_BLOCKSIZE ] __aligned ( __alignof__ ( __le64 ) ) ;
struct skcipher_request * req ;
struct scatterlist src , dst ;
2020-08-31 08:55:55 +03:00
DECLARE_CRYPTO_WAIT ( wait ) ;
2019-07-09 16:22:14 +03:00
int err ;
2020-01-02 16:23:32 +03:00
req = skcipher_request_alloc ( any_tfm ( cc ) , GFP_NOIO ) ;
2019-08-07 08:50:22 +03:00
if ( ! req )
return - ENOMEM ;
2019-07-09 16:22:14 +03:00
2019-08-07 08:50:22 +03:00
memset ( buf , 0 , cc - > iv_size ) ;
* ( __le64 * ) buf = cpu_to_le64 ( dmreq - > iv_sector * cc - > sector_size ) ;
2019-07-09 16:22:14 +03:00
2019-08-07 08:50:22 +03:00
sg_init_one ( & src , page_address ( ZERO_PAGE ( 0 ) ) , cc - > iv_size ) ;
sg_init_one ( & dst , iv , cc - > iv_size ) ;
skcipher_request_set_crypt ( req , & src , & dst , cc - > iv_size , buf ) ;
skcipher_request_set_callback ( req , 0 , crypto_req_done , & wait ) ;
err = crypto_wait_req ( crypto_skcipher_encrypt ( req ) , & wait ) ;
skcipher_request_free ( req ) ;
2019-07-09 16:22:14 +03:00
2019-08-07 08:50:22 +03:00
return err ;
2019-07-09 16:22:14 +03:00
}
2020-01-03 11:20:22 +03:00
static void crypt_iv_elephant_dtr ( struct crypt_config * cc )
{
struct iv_elephant_private * elephant = & cc - > iv_gen_private . elephant ;
crypto_free_skcipher ( elephant - > tfm ) ;
elephant - > tfm = NULL ;
}
static int crypt_iv_elephant_ctr ( struct crypt_config * cc , struct dm_target * ti ,
const char * opts )
{
struct iv_elephant_private * elephant = & cc - > iv_gen_private . elephant ;
int r ;
2020-07-10 09:20:42 +03:00
elephant - > tfm = crypto_alloc_skcipher ( " ecb(aes) " , 0 ,
CRYPTO_ALG_ALLOCATES_MEMORY ) ;
2020-01-03 11:20:22 +03:00
if ( IS_ERR ( elephant - > tfm ) ) {
r = PTR_ERR ( elephant - > tfm ) ;
elephant - > tfm = NULL ;
return r ;
}
r = crypt_iv_eboiv_ctr ( cc , ti , NULL ) ;
if ( r )
crypt_iv_elephant_dtr ( cc ) ;
return r ;
}
static void diffuser_disk_to_cpu ( u32 * d , size_t n )
{
# ifndef __LITTLE_ENDIAN
int i ;
for ( i = 0 ; i < n ; i + + )
d [ i ] = le32_to_cpu ( ( __le32 ) d [ i ] ) ;
# endif
}
static void diffuser_cpu_to_disk ( __le32 * d , size_t n )
{
# ifndef __LITTLE_ENDIAN
int i ;
for ( i = 0 ; i < n ; i + + )
d [ i ] = cpu_to_le32 ( ( u32 ) d [ i ] ) ;
# endif
}
static void diffuser_a_decrypt ( u32 * d , size_t n )
{
int i , i1 , i2 , i3 ;
for ( i = 0 ; i < 5 ; i + + ) {
i1 = 0 ;
i2 = n - 2 ;
i3 = n - 5 ;
while ( i1 < ( n - 1 ) ) {
d [ i1 ] + = d [ i2 ] ^ ( d [ i3 ] < < 9 | d [ i3 ] > > 23 ) ;
i1 + + ; i2 + + ; i3 + + ;
if ( i3 > = n )
i3 - = n ;
d [ i1 ] + = d [ i2 ] ^ d [ i3 ] ;
i1 + + ; i2 + + ; i3 + + ;
if ( i2 > = n )
i2 - = n ;
d [ i1 ] + = d [ i2 ] ^ ( d [ i3 ] < < 13 | d [ i3 ] > > 19 ) ;
i1 + + ; i2 + + ; i3 + + ;
d [ i1 ] + = d [ i2 ] ^ d [ i3 ] ;
i1 + + ; i2 + + ; i3 + + ;
}
}
}
static void diffuser_a_encrypt ( u32 * d , size_t n )
{
int i , i1 , i2 , i3 ;
for ( i = 0 ; i < 5 ; i + + ) {
i1 = n - 1 ;
i2 = n - 2 - 1 ;
i3 = n - 5 - 1 ;
while ( i1 > 0 ) {
d [ i1 ] - = d [ i2 ] ^ d [ i3 ] ;
i1 - - ; i2 - - ; i3 - - ;
d [ i1 ] - = d [ i2 ] ^ ( d [ i3 ] < < 13 | d [ i3 ] > > 19 ) ;
i1 - - ; i2 - - ; i3 - - ;
if ( i2 < 0 )
i2 + = n ;
d [ i1 ] - = d [ i2 ] ^ d [ i3 ] ;
i1 - - ; i2 - - ; i3 - - ;
if ( i3 < 0 )
i3 + = n ;
d [ i1 ] - = d [ i2 ] ^ ( d [ i3 ] < < 9 | d [ i3 ] > > 23 ) ;
i1 - - ; i2 - - ; i3 - - ;
}
}
}
static void diffuser_b_decrypt ( u32 * d , size_t n )
{
int i , i1 , i2 , i3 ;
for ( i = 0 ; i < 3 ; i + + ) {
i1 = 0 ;
i2 = 2 ;
i3 = 5 ;
while ( i1 < ( n - 1 ) ) {
d [ i1 ] + = d [ i2 ] ^ d [ i3 ] ;
i1 + + ; i2 + + ; i3 + + ;
d [ i1 ] + = d [ i2 ] ^ ( d [ i3 ] < < 10 | d [ i3 ] > > 22 ) ;
i1 + + ; i2 + + ; i3 + + ;
if ( i2 > = n )
i2 - = n ;
d [ i1 ] + = d [ i2 ] ^ d [ i3 ] ;
i1 + + ; i2 + + ; i3 + + ;
if ( i3 > = n )
i3 - = n ;
d [ i1 ] + = d [ i2 ] ^ ( d [ i3 ] < < 25 | d [ i3 ] > > 7 ) ;
i1 + + ; i2 + + ; i3 + + ;
}
}
}
static void diffuser_b_encrypt ( u32 * d , size_t n )
{
int i , i1 , i2 , i3 ;
for ( i = 0 ; i < 3 ; i + + ) {
i1 = n - 1 ;
i2 = 2 - 1 ;
i3 = 5 - 1 ;
while ( i1 > 0 ) {
d [ i1 ] - = d [ i2 ] ^ ( d [ i3 ] < < 25 | d [ i3 ] > > 7 ) ;
i1 - - ; i2 - - ; i3 - - ;
if ( i3 < 0 )
i3 + = n ;
d [ i1 ] - = d [ i2 ] ^ d [ i3 ] ;
i1 - - ; i2 - - ; i3 - - ;
if ( i2 < 0 )
i2 + = n ;
d [ i1 ] - = d [ i2 ] ^ ( d [ i3 ] < < 10 | d [ i3 ] > > 22 ) ;
i1 - - ; i2 - - ; i3 - - ;
d [ i1 ] - = d [ i2 ] ^ d [ i3 ] ;
i1 - - ; i2 - - ; i3 - - ;
}
}
}
static int crypt_iv_elephant ( struct crypt_config * cc , struct dm_crypt_request * dmreq )
{
struct iv_elephant_private * elephant = & cc - > iv_gen_private . elephant ;
u8 * es , * ks , * data , * data2 , * data_offset ;
struct skcipher_request * req ;
struct scatterlist * sg , * sg2 , src , dst ;
2020-08-31 08:55:55 +03:00
DECLARE_CRYPTO_WAIT ( wait ) ;
2020-01-03 11:20:22 +03:00
int i , r ;
req = skcipher_request_alloc ( elephant - > tfm , GFP_NOIO ) ;
es = kzalloc ( 16 , GFP_NOIO ) ; /* Key for AES */
ks = kzalloc ( 32 , GFP_NOIO ) ; /* Elephant sector key */
if ( ! req | | ! es | | ! ks ) {
r = - ENOMEM ;
goto out ;
}
* ( __le64 * ) es = cpu_to_le64 ( dmreq - > iv_sector * cc - > sector_size ) ;
/* E(Ks, e(s)) */
sg_init_one ( & src , es , 16 ) ;
sg_init_one ( & dst , ks , 16 ) ;
skcipher_request_set_crypt ( req , & src , & dst , 16 , NULL ) ;
skcipher_request_set_callback ( req , 0 , crypto_req_done , & wait ) ;
r = crypto_wait_req ( crypto_skcipher_encrypt ( req ) , & wait ) ;
if ( r )
goto out ;
/* E(Ks, e'(s)) */
es [ 15 ] = 0x80 ;
sg_init_one ( & dst , & ks [ 16 ] , 16 ) ;
r = crypto_wait_req ( crypto_skcipher_encrypt ( req ) , & wait ) ;
if ( r )
goto out ;
sg = crypt_get_sg_data ( cc , dmreq - > sg_out ) ;
data = kmap_atomic ( sg_page ( sg ) ) ;
data_offset = data + sg - > offset ;
/* Cannot modify original bio, copy to sg_out and apply Elephant to it */
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) = = WRITE ) {
sg2 = crypt_get_sg_data ( cc , dmreq - > sg_in ) ;
data2 = kmap_atomic ( sg_page ( sg2 ) ) ;
memcpy ( data_offset , data2 + sg2 - > offset , cc - > sector_size ) ;
kunmap_atomic ( data2 ) ;
}
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) ! = WRITE ) {
diffuser_disk_to_cpu ( ( u32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
diffuser_b_decrypt ( ( u32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
diffuser_a_decrypt ( ( u32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
diffuser_cpu_to_disk ( ( __le32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
}
for ( i = 0 ; i < ( cc - > sector_size / 32 ) ; i + + )
crypto_xor ( data_offset + i * 32 , ks , 32 ) ;
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) = = WRITE ) {
diffuser_disk_to_cpu ( ( u32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
diffuser_a_encrypt ( ( u32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
diffuser_b_encrypt ( ( u32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
diffuser_cpu_to_disk ( ( __le32 * ) data_offset , cc - > sector_size / sizeof ( u32 ) ) ;
}
kunmap_atomic ( data ) ;
out :
2020-08-07 09:18:13 +03:00
kfree_sensitive ( ks ) ;
kfree_sensitive ( es ) ;
2020-01-03 11:20:22 +03:00
skcipher_request_free ( req ) ;
return r ;
}
static int crypt_iv_elephant_gen ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
int r ;
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) = = WRITE ) {
r = crypt_iv_elephant ( cc , dmreq ) ;
if ( r )
return r ;
}
return crypt_iv_eboiv_gen ( cc , iv , dmreq ) ;
}
static int crypt_iv_elephant_post ( struct crypt_config * cc , u8 * iv ,
struct dm_crypt_request * dmreq )
{
if ( bio_data_dir ( dmreq - > ctx - > bio_in ) ! = WRITE )
return crypt_iv_elephant ( cc , dmreq ) ;
return 0 ;
}
static int crypt_iv_elephant_init ( struct crypt_config * cc )
{
struct iv_elephant_private * elephant = & cc - > iv_gen_private . elephant ;
int key_offset = cc - > key_size - cc - > key_extra_size ;
return crypto_skcipher_setkey ( elephant - > tfm , & cc - > key [ key_offset ] , cc - > key_extra_size ) ;
}
static int crypt_iv_elephant_wipe ( struct crypt_config * cc )
{
struct iv_elephant_private * elephant = & cc - > iv_gen_private . elephant ;
u8 key [ ELEPHANT_MAX_KEY_SIZE ] ;
memset ( key , 0 , cc - > key_extra_size ) ;
return crypto_skcipher_setkey ( elephant - > tfm , key , cc - > key_extra_size ) ;
}
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_plain_ops = {
2005-04-17 02:20:36 +04:00
. generator = crypt_iv_plain_gen
} ;
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_plain64_ops = {
2009-12-11 02:52:25 +03:00
. generator = crypt_iv_plain64_gen
} ;
2017-06-06 10:07:01 +03:00
static const struct crypt_iv_operations crypt_iv_plain64be_ops = {
. generator = crypt_iv_plain64be_gen
} ;
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_essiv_ops = {
2005-04-17 02:20:36 +04:00
. generator = crypt_iv_essiv_gen
} ;
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_benbi_ops = {
2006-09-03 02:56:39 +04:00
. ctr = crypt_iv_benbi_ctr ,
. dtr = crypt_iv_benbi_dtr ,
. generator = crypt_iv_benbi_gen
} ;
2005-04-17 02:20:36 +04:00
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_null_ops = {
2007-05-09 13:32:55 +04:00
. generator = crypt_iv_null_gen
} ;
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_lmk_ops = {
2011-01-13 22:59:55 +03:00
. ctr = crypt_iv_lmk_ctr ,
. dtr = crypt_iv_lmk_dtr ,
. init = crypt_iv_lmk_init ,
. wipe = crypt_iv_lmk_wipe ,
. generator = crypt_iv_lmk_gen ,
. post = crypt_iv_lmk_post
} ;
2015-11-29 16:09:19 +03:00
static const struct crypt_iv_operations crypt_iv_tcw_ops = {
2013-10-29 02:21:04 +04:00
. ctr = crypt_iv_tcw_ctr ,
. dtr = crypt_iv_tcw_dtr ,
. init = crypt_iv_tcw_init ,
. wipe = crypt_iv_tcw_wipe ,
. generator = crypt_iv_tcw_gen ,
. post = crypt_iv_tcw_post
} ;
2017-01-04 22:23:54 +03:00
static struct crypt_iv_operations crypt_iv_random_ops = {
. generator = crypt_iv_random_gen
} ;
2019-07-09 16:22:14 +03:00
static struct crypt_iv_operations crypt_iv_eboiv_ops = {
. ctr = crypt_iv_eboiv_ctr ,
. generator = crypt_iv_eboiv_gen
} ;
2020-01-03 11:20:22 +03:00
static struct crypt_iv_operations crypt_iv_elephant_ops = {
. ctr = crypt_iv_elephant_ctr ,
. dtr = crypt_iv_elephant_dtr ,
. init = crypt_iv_elephant_init ,
. wipe = crypt_iv_elephant_wipe ,
. generator = crypt_iv_elephant_gen ,
. post = crypt_iv_elephant_post
} ;
2017-01-04 22:23:54 +03:00
/*
* Integrity extensions
*/
static bool crypt_integrity_aead ( struct crypt_config * cc )
{
return test_bit ( CRYPT_MODE_INTEGRITY_AEAD , & cc - > cipher_flags ) ;
}
static bool crypt_integrity_hmac ( struct crypt_config * cc )
{
2017-03-16 17:39:40 +03:00
return crypt_integrity_aead ( cc ) & & cc - > key_mac_size ;
2017-01-04 22:23:54 +03:00
}
/* Get sg containing data */
static struct scatterlist * crypt_get_sg_data ( struct crypt_config * cc ,
struct scatterlist * sg )
{
2017-03-16 17:39:40 +03:00
if ( unlikely ( crypt_integrity_aead ( cc ) ) )
2017-01-04 22:23:54 +03:00
return & sg [ 2 ] ;
return sg ;
}
static int dm_crypt_integrity_io_alloc ( struct dm_crypt_io * io , struct bio * bio )
{
struct bio_integrity_payload * bip ;
unsigned int tag_len ;
int ret ;
if ( ! bio_sectors ( bio ) | | ! io - > cc - > on_disk_tag_size )
return 0 ;
bip = bio_integrity_alloc ( bio , GFP_NOIO , 1 ) ;
if ( IS_ERR ( bip ) )
return PTR_ERR ( bip ) ;
2019-02-08 18:52:07 +03:00
tag_len = io - > cc - > on_disk_tag_size * ( bio_sectors ( bio ) > > io - > cc - > sector_shift ) ;
2017-01-04 22:23:54 +03:00
bip - > bip_iter . bi_size = tag_len ;
bip - > bip_iter . bi_sector = io - > cc - > start + io - > sector ;
ret = bio_integrity_add_page ( bio , virt_to_page ( io - > integrity_metadata ) ,
tag_len , offset_in_page ( io - > integrity_metadata ) ) ;
if ( unlikely ( ret ! = tag_len ) )
return - ENOMEM ;
return 0 ;
}
static int crypt_integrity_ctr ( struct crypt_config * cc , struct dm_target * ti )
{
# ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity * bi = blk_get_integrity ( cc - > dev - > bdev - > bd_disk ) ;
2019-05-15 17:23:43 +03:00
struct mapped_device * md = dm_table_get_md ( ti - > table ) ;
2017-01-04 22:23:54 +03:00
/* From now we require underlying device with our integrity profile */
if ( ! bi | | strcasecmp ( bi - > profile - > name , " DM-DIF-EXT-TAG " ) ) {
ti - > error = " Integrity profile not supported. " ;
return - EINVAL ;
}
2017-04-18 23:51:54 +03:00
if ( bi - > tag_size ! = cc - > on_disk_tag_size | |
bi - > tuple_size ! = cc - > on_disk_tag_size ) {
2017-01-04 22:23:54 +03:00
ti - > error = " Integrity profile tag size mismatch. " ;
return - EINVAL ;
}
2017-04-18 23:51:54 +03:00
if ( 1 < < bi - > interval_exp ! = cc - > sector_size ) {
ti - > error = " Integrity profile sector size mismatch. " ;
return - EINVAL ;
}
2017-01-04 22:23:54 +03:00
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) ) {
2017-01-04 22:23:54 +03:00
cc - > integrity_tag_size = cc - > on_disk_tag_size - cc - > integrity_iv_size ;
2019-05-15 17:23:43 +03:00
DMDEBUG ( " %s: Integrity AEAD, tag size %u, IV size %u. " , dm_device_name ( md ) ,
2017-01-04 22:23:54 +03:00
cc - > integrity_tag_size , cc - > integrity_iv_size ) ;
if ( crypto_aead_setauthsize ( any_tfm_aead ( cc ) , cc - > integrity_tag_size ) ) {
ti - > error = " Integrity AEAD auth tag size is not supported. " ;
return - EINVAL ;
}
} else if ( cc - > integrity_iv_size )
2019-05-15 17:23:43 +03:00
DMDEBUG ( " %s: Additional per-sector space %u bytes for IV. " , dm_device_name ( md ) ,
2017-01-04 22:23:54 +03:00
cc - > integrity_iv_size ) ;
if ( ( cc - > integrity_tag_size + cc - > integrity_iv_size ) ! = bi - > tag_size ) {
ti - > error = " Not enough space for integrity tag in the profile. " ;
return - EINVAL ;
}
return 0 ;
# else
ti - > error = " Integrity profile not supported. " ;
return - EINVAL ;
# endif
}
2007-10-20 01:42:37 +04:00
static void crypt_convert_init ( struct crypt_config * cc ,
struct convert_context * ctx ,
struct bio * bio_out , struct bio * bio_in ,
2008-02-08 05:10:41 +03:00
sector_t sector )
2005-04-17 02:20:36 +04:00
{
ctx - > bio_in = bio_in ;
ctx - > bio_out = bio_out ;
2013-10-12 02:45:43 +04:00
if ( bio_in )
ctx - > iter_in = bio_in - > bi_iter ;
if ( bio_out )
ctx - > iter_out = bio_out - > bi_iter ;
2012-07-27 18:08:05 +04:00
ctx - > cc_sector = sector + cc - > iv_offset ;
2008-02-08 05:11:09 +03:00
init_completion ( & ctx - > restart ) ;
2005-04-17 02:20:36 +04:00
}
2009-03-16 20:44:33 +03:00
static struct dm_crypt_request * dmreq_of_req ( struct crypt_config * cc ,
2017-01-04 22:23:54 +03:00
void * req )
2009-03-16 20:44:33 +03:00
{
return ( struct dm_crypt_request * ) ( ( char * ) req + cc - > dmreq_start ) ;
}
2017-01-04 22:23:54 +03:00
static void * req_of_dmreq ( struct crypt_config * cc , struct dm_crypt_request * dmreq )
2009-03-16 20:44:33 +03:00
{
2017-01-04 22:23:54 +03:00
return ( void * ) ( ( char * ) dmreq - cc - > dmreq_start ) ;
2009-03-16 20:44:33 +03:00
}
2011-01-13 22:59:54 +03:00
static u8 * iv_of_dmreq ( struct crypt_config * cc ,
struct dm_crypt_request * dmreq )
{
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
return ( u8 * ) ALIGN ( ( unsigned long ) ( dmreq + 1 ) ,
crypto_aead_alignmask ( any_tfm_aead ( cc ) ) + 1 ) ;
else
return ( u8 * ) ALIGN ( ( unsigned long ) ( dmreq + 1 ) ,
crypto_skcipher_alignmask ( any_tfm ( cc ) ) + 1 ) ;
2011-01-13 22:59:54 +03:00
}
2017-01-04 22:23:54 +03:00
static u8 * org_iv_of_dmreq ( struct crypt_config * cc ,
struct dm_crypt_request * dmreq )
{
return iv_of_dmreq ( cc , dmreq ) + cc - > iv_size ;
}
2019-04-04 19:33:34 +03:00
static __le64 * org_sector_of_dmreq ( struct crypt_config * cc ,
2017-01-04 22:23:54 +03:00
struct dm_crypt_request * dmreq )
{
u8 * ptr = iv_of_dmreq ( cc , dmreq ) + cc - > iv_size + cc - > iv_size ;
2019-04-04 19:33:34 +03:00
return ( __le64 * ) ptr ;
2017-01-04 22:23:54 +03:00
}
static unsigned int * org_tag_of_dmreq ( struct crypt_config * cc ,
struct dm_crypt_request * dmreq )
{
u8 * ptr = iv_of_dmreq ( cc , dmreq ) + cc - > iv_size +
cc - > iv_size + sizeof ( uint64_t ) ;
return ( unsigned int * ) ptr ;
}
static void * tag_from_dmreq ( struct crypt_config * cc ,
struct dm_crypt_request * dmreq )
{
struct convert_context * ctx = dmreq - > ctx ;
struct dm_crypt_io * io = container_of ( ctx , struct dm_crypt_io , ctx ) ;
return & io - > integrity_metadata [ * org_tag_of_dmreq ( cc , dmreq ) *
cc - > on_disk_tag_size ] ;
}
static void * iv_tag_from_dmreq ( struct crypt_config * cc ,
struct dm_crypt_request * dmreq )
{
return tag_from_dmreq ( cc , dmreq ) + cc - > integrity_tag_size ;
}
static int crypt_convert_block_aead ( struct crypt_config * cc ,
struct convert_context * ctx ,
struct aead_request * req ,
unsigned int tag_offset )
2008-02-08 05:11:04 +03:00
{
2013-10-12 02:45:43 +04:00
struct bio_vec bv_in = bio_iter_iovec ( ctx - > bio_in , ctx - > iter_in ) ;
struct bio_vec bv_out = bio_iter_iovec ( ctx - > bio_out , ctx - > iter_out ) ;
2008-02-08 05:11:14 +03:00
struct dm_crypt_request * dmreq ;
2017-01-04 22:23:54 +03:00
u8 * iv , * org_iv , * tag_iv , * tag ;
2019-04-04 19:33:34 +03:00
__le64 * sector ;
2017-01-04 22:23:54 +03:00
int r = 0 ;
BUG_ON ( cc - > integrity_iv_size & & cc - > integrity_iv_size ! = cc - > iv_size ) ;
2008-02-08 05:11:14 +03:00
2017-03-16 17:39:44 +03:00
/* Reject unexpected unaligned bio. */
2017-11-07 18:35:57 +03:00
if ( unlikely ( bv_in . bv_len & ( cc - > sector_size - 1 ) ) )
2017-03-16 17:39:44 +03:00
return - EIO ;
2008-02-08 05:11:14 +03:00
2009-03-16 20:44:33 +03:00
dmreq = dmreq_of_req ( cc , req ) ;
2017-01-04 22:23:54 +03:00
dmreq - > iv_sector = ctx - > cc_sector ;
2017-03-16 17:39:44 +03:00
if ( test_bit ( CRYPT_IV_LARGE_SECTORS , & cc - > cipher_flags ) )
2017-03-23 17:23:14 +03:00
dmreq - > iv_sector > > = cc - > sector_shift ;
2017-01-04 22:23:54 +03:00
dmreq - > ctx = ctx ;
* org_tag_of_dmreq ( cc , dmreq ) = tag_offset ;
sector = org_sector_of_dmreq ( cc , dmreq ) ;
* sector = cpu_to_le64 ( ctx - > cc_sector - cc - > iv_offset ) ;
2011-01-13 22:59:54 +03:00
iv = iv_of_dmreq ( cc , dmreq ) ;
2017-01-04 22:23:54 +03:00
org_iv = org_iv_of_dmreq ( cc , dmreq ) ;
tag = tag_from_dmreq ( cc , dmreq ) ;
tag_iv = iv_tag_from_dmreq ( cc , dmreq ) ;
/* AEAD request:
* | - - - - - AAD - - - - - - - | - - - - - - DATA - - - - - - - | - - AUTH TAG - - |
* | ( authenticated ) | ( auth + encryption ) | |
* | sector_LE | IV | sector in / out | tag in / out |
*/
sg_init_table ( dmreq - > sg_in , 4 ) ;
sg_set_buf ( & dmreq - > sg_in [ 0 ] , sector , sizeof ( uint64_t ) ) ;
sg_set_buf ( & dmreq - > sg_in [ 1 ] , org_iv , cc - > iv_size ) ;
2017-03-16 17:39:44 +03:00
sg_set_page ( & dmreq - > sg_in [ 2 ] , bv_in . bv_page , cc - > sector_size , bv_in . bv_offset ) ;
2017-01-04 22:23:54 +03:00
sg_set_buf ( & dmreq - > sg_in [ 3 ] , tag , cc - > integrity_tag_size ) ;
sg_init_table ( dmreq - > sg_out , 4 ) ;
sg_set_buf ( & dmreq - > sg_out [ 0 ] , sector , sizeof ( uint64_t ) ) ;
sg_set_buf ( & dmreq - > sg_out [ 1 ] , org_iv , cc - > iv_size ) ;
2017-03-16 17:39:44 +03:00
sg_set_page ( & dmreq - > sg_out [ 2 ] , bv_out . bv_page , cc - > sector_size , bv_out . bv_offset ) ;
2017-01-04 22:23:54 +03:00
sg_set_buf ( & dmreq - > sg_out [ 3 ] , tag , cc - > integrity_tag_size ) ;
if ( cc - > iv_gen_ops ) {
/* For READs use IV stored in integrity metadata */
if ( cc - > integrity_iv_size & & bio_data_dir ( ctx - > bio_in ) ! = WRITE ) {
memcpy ( org_iv , tag_iv , cc - > iv_size ) ;
} else {
r = cc - > iv_gen_ops - > generator ( cc , org_iv , dmreq ) ;
if ( r < 0 )
return r ;
/* Store generated IV in integrity metadata */
if ( cc - > integrity_iv_size )
memcpy ( tag_iv , org_iv , cc - > iv_size ) ;
}
/* Working copy of IV, to be modified in crypto API */
memcpy ( iv , org_iv , cc - > iv_size ) ;
}
aead_request_set_ad ( req , sizeof ( uint64_t ) + cc - > iv_size ) ;
if ( bio_data_dir ( ctx - > bio_in ) = = WRITE ) {
aead_request_set_crypt ( req , dmreq - > sg_in , dmreq - > sg_out ,
2017-03-16 17:39:44 +03:00
cc - > sector_size , iv ) ;
2017-01-04 22:23:54 +03:00
r = crypto_aead_encrypt ( req ) ;
if ( cc - > integrity_tag_size + cc - > integrity_iv_size ! = cc - > on_disk_tag_size )
memset ( tag + cc - > integrity_tag_size + cc - > integrity_iv_size , 0 ,
cc - > on_disk_tag_size - ( cc - > integrity_tag_size + cc - > integrity_iv_size ) ) ;
} else {
aead_request_set_crypt ( req , dmreq - > sg_in , dmreq - > sg_out ,
2017-03-16 17:39:44 +03:00
cc - > sector_size + cc - > integrity_tag_size , iv ) ;
2017-01-04 22:23:54 +03:00
r = crypto_aead_decrypt ( req ) ;
}
2019-05-15 17:22:30 +03:00
if ( r = = - EBADMSG ) {
char b [ BDEVNAME_SIZE ] ;
DMERR_LIMIT ( " %s: INTEGRITY AEAD ERROR, sector %llu " , bio_devname ( ctx - > bio_in , b ) ,
2017-01-04 22:23:54 +03:00
( unsigned long long ) le64_to_cpu ( * sector ) ) ;
2019-05-15 17:22:30 +03:00
}
2017-01-04 22:23:54 +03:00
if ( ! r & & cc - > iv_gen_ops & & cc - > iv_gen_ops - > post )
r = cc - > iv_gen_ops - > post ( cc , org_iv , dmreq ) ;
2017-03-16 17:39:44 +03:00
bio_advance_iter ( ctx - > bio_in , & ctx - > iter_in , cc - > sector_size ) ;
bio_advance_iter ( ctx - > bio_out , & ctx - > iter_out , cc - > sector_size ) ;
2008-02-08 05:11:04 +03:00
2017-01-04 22:23:54 +03:00
return r ;
}
static int crypt_convert_block_skcipher ( struct crypt_config * cc ,
struct convert_context * ctx ,
struct skcipher_request * req ,
unsigned int tag_offset )
{
struct bio_vec bv_in = bio_iter_iovec ( ctx - > bio_in , ctx - > iter_in ) ;
struct bio_vec bv_out = bio_iter_iovec ( ctx - > bio_out , ctx - > iter_out ) ;
struct scatterlist * sg_in , * sg_out ;
struct dm_crypt_request * dmreq ;
u8 * iv , * org_iv , * tag_iv ;
2019-04-04 19:33:34 +03:00
__le64 * sector ;
2017-01-04 22:23:54 +03:00
int r = 0 ;
2008-02-08 05:11:04 +03:00
2017-03-16 17:39:44 +03:00
/* Reject unexpected unaligned bio. */
2017-11-07 18:35:57 +03:00
if ( unlikely ( bv_in . bv_len & ( cc - > sector_size - 1 ) ) )
2017-03-16 17:39:44 +03:00
return - EIO ;
2017-01-04 22:23:54 +03:00
dmreq = dmreq_of_req ( cc , req ) ;
2012-07-27 18:08:05 +04:00
dmreq - > iv_sector = ctx - > cc_sector ;
2017-03-16 17:39:44 +03:00
if ( test_bit ( CRYPT_IV_LARGE_SECTORS , & cc - > cipher_flags ) )
2017-03-23 17:23:14 +03:00
dmreq - > iv_sector > > = cc - > sector_shift ;
2009-03-16 20:44:33 +03:00
dmreq - > ctx = ctx ;
2008-02-08 05:11:04 +03:00
2017-01-04 22:23:54 +03:00
* org_tag_of_dmreq ( cc , dmreq ) = tag_offset ;
iv = iv_of_dmreq ( cc , dmreq ) ;
org_iv = org_iv_of_dmreq ( cc , dmreq ) ;
tag_iv = iv_tag_from_dmreq ( cc , dmreq ) ;
sector = org_sector_of_dmreq ( cc , dmreq ) ;
* sector = cpu_to_le64 ( ctx - > cc_sector - cc - > iv_offset ) ;
/* For skcipher we use only the first sg item */
sg_in = & dmreq - > sg_in [ 0 ] ;
sg_out = & dmreq - > sg_out [ 0 ] ;
2008-02-08 05:11:04 +03:00
2017-01-04 22:23:54 +03:00
sg_init_table ( sg_in , 1 ) ;
2017-03-16 17:39:44 +03:00
sg_set_page ( sg_in , bv_in . bv_page , cc - > sector_size , bv_in . bv_offset ) ;
2017-01-04 22:23:54 +03:00
sg_init_table ( sg_out , 1 ) ;
2017-03-16 17:39:44 +03:00
sg_set_page ( sg_out , bv_out . bv_page , cc - > sector_size , bv_out . bv_offset ) ;
2008-02-08 05:11:04 +03:00
2008-02-08 05:11:14 +03:00
if ( cc - > iv_gen_ops ) {
2017-01-04 22:23:54 +03:00
/* For READs use IV stored in integrity metadata */
if ( cc - > integrity_iv_size & & bio_data_dir ( ctx - > bio_in ) ! = WRITE ) {
memcpy ( org_iv , tag_iv , cc - > integrity_iv_size ) ;
} else {
r = cc - > iv_gen_ops - > generator ( cc , org_iv , dmreq ) ;
if ( r < 0 )
return r ;
2020-01-03 11:20:22 +03:00
/* Data can be already preprocessed in generator */
if ( test_bit ( CRYPT_ENCRYPT_PREPROCESS , & cc - > cipher_flags ) )
sg_in = sg_out ;
2017-01-04 22:23:54 +03:00
/* Store generated IV in integrity metadata */
if ( cc - > integrity_iv_size )
memcpy ( tag_iv , org_iv , cc - > integrity_iv_size ) ;
}
/* Working copy of IV, to be modified in crypto API */
memcpy ( iv , org_iv , cc - > iv_size ) ;
2008-02-08 05:11:14 +03:00
}
2017-03-16 17:39:44 +03:00
skcipher_request_set_crypt ( req , sg_in , sg_out , cc - > sector_size , iv ) ;
2008-02-08 05:11:14 +03:00
if ( bio_data_dir ( ctx - > bio_in ) = = WRITE )
2016-01-24 16:16:36 +03:00
r = crypto_skcipher_encrypt ( req ) ;
2008-02-08 05:11:14 +03:00
else
2016-01-24 16:16:36 +03:00
r = crypto_skcipher_decrypt ( req ) ;
2008-02-08 05:11:14 +03:00
2011-01-13 22:59:54 +03:00
if ( ! r & & cc - > iv_gen_ops & & cc - > iv_gen_ops - > post )
2017-01-04 22:23:54 +03:00
r = cc - > iv_gen_ops - > post ( cc , org_iv , dmreq ) ;
2017-03-16 17:39:44 +03:00
bio_advance_iter ( ctx - > bio_in , & ctx - > iter_in , cc - > sector_size ) ;
bio_advance_iter ( ctx - > bio_out , & ctx - > iter_out , cc - > sector_size ) ;
2011-01-13 22:59:54 +03:00
2008-02-08 05:11:14 +03:00
return r ;
2008-02-08 05:11:04 +03:00
}
2008-02-08 05:11:12 +03:00
static void kcryptd_async_done ( struct crypto_async_request * async_req ,
int error ) ;
2011-01-13 22:59:53 +03:00
2017-01-04 22:23:54 +03:00
static void crypt_alloc_req_skcipher ( struct crypt_config * cc ,
struct convert_context * ctx )
2008-02-08 05:11:07 +03:00
{
2012-07-27 18:08:05 +04:00
unsigned key_index = ctx - > cc_sector & ( cc - > tfms_count - 1 ) ;
2011-01-13 22:59:53 +03:00
2017-01-04 22:23:54 +03:00
if ( ! ctx - > r . req )
2018-05-21 01:25:53 +03:00
ctx - > r . req = mempool_alloc ( & cc - > req_pool , GFP_NOIO ) ;
2011-01-13 22:59:53 +03:00
2017-01-04 22:23:54 +03:00
skcipher_request_set_tfm ( ctx - > r . req , cc - > cipher_tfm . tfms [ key_index ] ) ;
2015-05-15 18:00:25 +03:00
/*
* Use REQ_MAY_BACKLOG so a cipher driver internally backlogs
* requests if driver request queue is full .
*/
2017-01-04 22:23:54 +03:00
skcipher_request_set_callback ( ctx - > r . req ,
2018-09-05 16:17:45 +03:00
CRYPTO_TFM_REQ_MAY_BACKLOG ,
2017-01-04 22:23:54 +03:00
kcryptd_async_done , dmreq_of_req ( cc , ctx - > r . req ) ) ;
2008-02-08 05:11:07 +03:00
}
2017-01-04 22:23:54 +03:00
static void crypt_alloc_req_aead ( struct crypt_config * cc ,
struct convert_context * ctx )
{
if ( ! ctx - > r . req_aead )
2018-05-21 01:25:53 +03:00
ctx - > r . req_aead = mempool_alloc ( & cc - > req_pool , GFP_NOIO ) ;
2011-01-13 22:59:53 +03:00
2017-01-04 22:23:54 +03:00
aead_request_set_tfm ( ctx - > r . req_aead , cc - > cipher_tfm . tfms_aead [ 0 ] ) ;
2015-05-15 18:00:25 +03:00
/*
* Use REQ_MAY_BACKLOG so a cipher driver internally backlogs
* requests if driver request queue is full .
*/
2017-01-04 22:23:54 +03:00
aead_request_set_callback ( ctx - > r . req_aead ,
2018-09-05 16:17:45 +03:00
CRYPTO_TFM_REQ_MAY_BACKLOG ,
2017-01-04 22:23:54 +03:00
kcryptd_async_done , dmreq_of_req ( cc , ctx - > r . req_aead ) ) ;
}
static void crypt_alloc_req ( struct crypt_config * cc ,
struct convert_context * ctx )
{
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
crypt_alloc_req_aead ( cc , ctx ) ;
else
crypt_alloc_req_skcipher ( cc , ctx ) ;
2008-02-08 05:11:07 +03:00
}
2017-01-04 22:23:54 +03:00
static void crypt_free_req_skcipher ( struct crypt_config * cc ,
struct skcipher_request * req , struct bio * base_bio )
2014-03-28 23:51:55 +04:00
{
struct dm_crypt_io * io = dm_per_bio_data ( base_bio , cc - > per_bio_data_size ) ;
2016-01-24 16:16:36 +03:00
if ( ( struct skcipher_request * ) ( io + 1 ) ! = req )
2018-05-21 01:25:53 +03:00
mempool_free ( req , & cc - > req_pool ) ;
2014-03-28 23:51:55 +04:00
}
2017-01-04 22:23:54 +03:00
static void crypt_free_req_aead ( struct crypt_config * cc ,
struct aead_request * req , struct bio * base_bio )
{
struct dm_crypt_io * io = dm_per_bio_data ( base_bio , cc - > per_bio_data_size ) ;
if ( ( struct aead_request * ) ( io + 1 ) ! = req )
2018-05-21 01:25:53 +03:00
mempool_free ( req , & cc - > req_pool ) ;
2017-01-04 22:23:54 +03:00
}
static void crypt_free_req ( struct crypt_config * cc , void * req , struct bio * base_bio )
{
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
crypt_free_req_aead ( cc , req , base_bio ) ;
else
crypt_free_req_skcipher ( cc , req , base_bio ) ;
}
2005-04-17 02:20:36 +04:00
/*
* Encrypt / decrypt data from one bio to another one ( can be the same one )
*/
2017-06-03 10:38:06 +03:00
static blk_status_t crypt_convert ( struct crypt_config * cc ,
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
struct convert_context * ctx , bool atomic )
2005-04-17 02:20:36 +04:00
{
2017-01-04 22:23:54 +03:00
unsigned int tag_offset = 0 ;
2017-03-23 17:23:14 +03:00
unsigned int sector_step = cc - > sector_size > > SECTOR_SHIFT ;
2008-03-29 00:16:07 +03:00
int r ;
2005-04-17 02:20:36 +04:00
2012-07-27 18:08:04 +04:00
atomic_set ( & ctx - > cc_pending , 1 ) ;
2008-10-10 16:37:08 +04:00
2013-10-12 02:45:43 +04:00
while ( ctx - > iter_in . bi_size & & ctx - > iter_out . bi_size ) {
2005-04-17 02:20:36 +04:00
2008-02-08 05:11:14 +03:00
crypt_alloc_req ( cc , ctx ) ;
2012-07-27 18:08:04 +04:00
atomic_inc ( & ctx - > cc_pending ) ;
2008-03-29 00:16:07 +03:00
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
r = crypt_convert_block_aead ( cc , ctx , ctx - > r . req_aead , tag_offset ) ;
else
r = crypt_convert_block_skcipher ( cc , ctx , ctx - > r . req , tag_offset ) ;
2008-02-08 05:11:14 +03:00
switch ( r ) {
2015-05-15 18:00:25 +03:00
/*
* The request was queued by a crypto driver
* but the driver request queue is full , let ' s wait .
*/
2008-02-08 05:11:14 +03:00
case - EBUSY :
wait_for_completion ( & ctx - > restart ) ;
2013-11-15 02:32:02 +04:00
reinit_completion ( & ctx - > restart ) ;
2020-08-24 01:36:59 +03:00
fallthrough ;
2015-05-15 18:00:25 +03:00
/*
* The request is queued and processed asynchronously ,
* completion function kcryptd_async_done ( ) will be called .
*/
Revert "dm crypt: fix deadlock when async crypto algorithm returns -EBUSY"
This reverts Linux 4.1-rc1 commit 0618764cb25f6fa9fb31152995de42a8a0496475.
The problem which that commit attempts to fix actually lies in the
Freescale CAAM crypto driver not dm-crypt.
dm-crypt uses CRYPTO_TFM_REQ_MAY_BACKLOG. This means the the crypto
driver should internally backlog requests which arrive when the queue is
full and process them later. Until the crypto hw's queue becomes full,
the driver returns -EINPROGRESS. When the crypto hw's queue if full,
the driver returns -EBUSY, and if CRYPTO_TFM_REQ_MAY_BACKLOG is set, is
expected to backlog the request and process it when the hardware has
queue space. At the point when the driver takes the request from the
backlog and starts processing it, it calls the completion function with
a status of -EINPROGRESS. The completion function is called (for a
second time, in the case of backlogged requests) with a status/err of 0
when a request is done.
Crypto drivers for hardware without hardware queueing use the helpers,
crypto_init_queue(), crypto_enqueue_request(), crypto_dequeue_request()
and crypto_get_backlog() helpers to implement this behaviour correctly,
while others implement this behaviour without these helpers (ccp, for
example).
dm-crypt (before the patch that needs reverting) uses this API
correctly. It queues up as many requests as the hw queues will allow
(i.e. as long as it gets back -EINPROGRESS from the request function).
Then, when it sees at least one backlogged request (gets -EBUSY), it
waits till that backlogged request is handled (completion gets called
with -EINPROGRESS), and then continues. The references to
af_alg_wait_for_completion() and af_alg_complete() in that commit's
commit message are irrelevant because those functions only handle one
request at a time, unlink dm-crypt.
The problem is that the Freescale CAAM driver, which that commit
describes as having being tested with, fails to implement the
backlogging behaviour correctly. In cam_jr_enqueue(), if the hardware
queue is full, it simply returns -EBUSY without backlogging the request.
What the observed deadlock was is not described in the commit message
but it is obviously the wait_for_completion() in crypto_convert() where
dm-crypto would wait for the completion being called with -EINPROGRESS
in the case of backlogged requests. This completion will never be
completed due to the bug in the CAAM driver.
Commit 0618764cb25 incorrectly made dm-crypt wait for every request,
even when the driver/hardware queues are not full, which means that
dm-crypt will never see -EBUSY. This means that that commit will cause
a performance regression on all crypto drivers which implement the API
correctly.
Revert it. Correct backlog handling should be implemented in the CAAM
driver instead.
Cc'ing stable purely because commit 0618764cb25 did. If for some reason
a stable@ kernel did pick up commit 0618764cb25 it should get reverted.
Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Reviewed-by: Horia Geanta <horia.geanta@freescale.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-05-05 16:15:56 +03:00
case - EINPROGRESS :
2017-01-04 22:23:54 +03:00
ctx - > r . req = NULL ;
2017-03-16 17:39:44 +03:00
ctx - > cc_sector + = sector_step ;
2017-04-18 23:51:54 +03:00
tag_offset + + ;
2008-03-29 00:16:07 +03:00
continue ;
2015-05-15 18:00:25 +03:00
/*
* The request was already processed ( synchronously ) .
*/
2008-02-08 05:11:14 +03:00
case 0 :
2012-07-27 18:08:04 +04:00
atomic_dec ( & ctx - > cc_pending ) ;
2017-03-16 17:39:44 +03:00
ctx - > cc_sector + = sector_step ;
2017-04-18 23:51:54 +03:00
tag_offset + + ;
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
if ( ! atomic )
cond_resched ( ) ;
2008-02-08 05:11:14 +03:00
continue ;
2017-01-04 22:23:54 +03:00
/*
* There was a data integrity error .
*/
case - EBADMSG :
atomic_dec ( & ctx - > cc_pending ) ;
2017-06-03 10:38:06 +03:00
return BLK_STS_PROTECTION ;
2017-01-04 22:23:54 +03:00
/*
* There was an error while processing the request .
*/
2008-03-29 00:16:07 +03:00
default :
2012-07-27 18:08:04 +04:00
atomic_dec ( & ctx - > cc_pending ) ;
2017-06-03 10:38:06 +03:00
return BLK_STS_IOERR ;
2008-03-29 00:16:07 +03:00
}
2005-04-17 02:20:36 +04:00
}
2008-03-29 00:16:07 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2015-02-13 16:23:52 +03:00
static void crypt_free_buffer_pages ( struct crypt_config * cc , struct bio * clone ) ;
2005-04-17 02:20:36 +04:00
/*
* Generate a new unfragmented bio with the given size
2015-09-10 04:34:51 +03:00
* This should never violate the device limitations ( but only because
* max_segment_size is being constrained to PAGE_SIZE ) .
2015-02-13 16:24:41 +03:00
*
* This function may be called concurrently . If we allocate from the mempool
* concurrently , there is a possibility of deadlock . For example , if we have
* mempool of 256 pages , two processes , each wanting 256 , pages allocate from
* the mempool concurrently , it may deadlock in a situation where both processes
* have allocated 128 pages and the mempool is exhausted .
*
* In order to avoid this scenario we allocate the pages under a mutex .
*
* In order to not degrade performance with excessive locking , we try
* non - blocking allocations without a mutex first but on failure we fallback
* to blocking allocations with a mutex .
2005-04-17 02:20:36 +04:00
*/
2015-02-13 16:23:52 +03:00
static struct bio * crypt_alloc_buffer ( struct dm_crypt_io * io , unsigned size )
2005-04-17 02:20:36 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2006-10-03 12:15:37 +04:00
struct bio * clone ;
2005-04-17 02:20:36 +04:00
unsigned int nr_iovecs = ( size + PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
2015-02-13 16:24:41 +03:00
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM ;
unsigned i , len , remaining_size ;
2007-12-13 17:16:10 +03:00
struct page * page ;
2005-04-17 02:20:36 +04:00
2015-02-13 16:24:41 +03:00
retry :
2015-11-07 03:28:21 +03:00
if ( unlikely ( gfp_mask & __GFP_DIRECT_RECLAIM ) )
2015-02-13 16:24:41 +03:00
mutex_lock ( & cc - > bio_alloc_lock ) ;
2018-05-21 01:25:53 +03:00
clone = bio_alloc_bioset ( GFP_NOIO , nr_iovecs , & cc - > bs ) ;
2006-10-03 12:15:37 +04:00
if ( ! clone )
2017-01-04 22:23:54 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
2007-05-09 13:32:52 +04:00
clone_init ( io , clone ) ;
2006-10-03 12:15:40 +04:00
2015-02-13 16:24:41 +03:00
remaining_size = size ;
2007-05-09 13:32:54 +04:00
for ( i = 0 ; i < nr_iovecs ; i + + ) {
2018-05-21 01:25:53 +03:00
page = mempool_alloc ( & cc - > page_pool , gfp_mask ) ;
2015-02-13 16:24:41 +03:00
if ( ! page ) {
crypt_free_buffer_pages ( cc , clone ) ;
bio_put ( clone ) ;
2015-11-07 03:28:21 +03:00
gfp_mask | = __GFP_DIRECT_RECLAIM ;
2015-02-13 16:24:41 +03:00
goto retry ;
}
2005-04-17 02:20:36 +04:00
2015-02-13 16:24:41 +03:00
len = ( remaining_size > PAGE_SIZE ) ? PAGE_SIZE : remaining_size ;
2007-12-13 17:16:10 +03:00
2016-10-29 11:08:06 +03:00
bio_add_page ( clone , page , len , 0 ) ;
2005-04-17 02:20:36 +04:00
2015-02-13 16:24:41 +03:00
remaining_size - = len ;
2005-04-17 02:20:36 +04:00
}
2017-01-04 22:23:54 +03:00
/* Allocate space for integrity tags */
if ( dm_crypt_integrity_io_alloc ( io , clone ) ) {
crypt_free_buffer_pages ( cc , clone ) ;
bio_put ( clone ) ;
clone = NULL ;
}
out :
2015-11-07 03:28:21 +03:00
if ( unlikely ( gfp_mask & __GFP_DIRECT_RECLAIM ) )
2015-02-13 16:24:41 +03:00
mutex_unlock ( & cc - > bio_alloc_lock ) ;
2006-10-03 12:15:37 +04:00
return clone ;
2005-04-17 02:20:36 +04:00
}
2007-10-16 15:48:46 +04:00
static void crypt_free_buffer_pages ( struct crypt_config * cc , struct bio * clone )
2005-04-17 02:20:36 +04:00
{
struct bio_vec * bv ;
2019-02-15 14:13:19 +03:00
struct bvec_iter_all iter_all ;
2005-04-17 02:20:36 +04:00
2019-04-25 10:03:00 +03:00
bio_for_each_segment_all ( bv , clone , iter_all ) {
2005-04-17 02:20:36 +04:00
BUG_ON ( ! bv - > bv_page ) ;
2018-05-21 01:25:53 +03:00
mempool_free ( bv - > bv_page , & cc - > page_pool ) ;
2005-04-17 02:20:36 +04:00
}
}
2014-03-28 23:51:55 +04:00
static void crypt_io_init ( struct dm_crypt_io * io , struct crypt_config * cc ,
struct bio * bio , sector_t sector )
2008-10-10 16:37:03 +04:00
{
2012-07-27 18:08:05 +04:00
io - > cc = cc ;
2008-10-10 16:37:03 +04:00
io - > base_bio = bio ;
io - > sector = sector ;
io - > error = 0 ;
2017-01-04 22:23:54 +03:00
io - > ctx . r . req = NULL ;
io - > integrity_metadata = NULL ;
io - > integrity_metadata_from_pool = false ;
2012-07-27 18:08:04 +04:00
atomic_set ( & io - > io_pending , 0 ) ;
2008-10-10 16:37:03 +04:00
}
2008-10-10 16:37:02 +04:00
static void crypt_inc_pending ( struct dm_crypt_io * io )
{
2012-07-27 18:08:04 +04:00
atomic_inc ( & io - > io_pending ) ;
2008-10-10 16:37:02 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* One of the bios was finished . Check for completion of
* the whole request and correctly clean up the buffer .
*/
2008-02-08 05:10:43 +03:00
static void crypt_dec_pending ( struct dm_crypt_io * io )
2005-04-17 02:20:36 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2009-03-16 20:44:36 +03:00
struct bio * base_bio = io - > base_bio ;
2017-06-03 10:38:06 +03:00
blk_status_t error = io - > error ;
2005-04-17 02:20:36 +04:00
2012-07-27 18:08:04 +04:00
if ( ! atomic_dec_and_test ( & io - > io_pending ) )
2005-04-17 02:20:36 +04:00
return ;
2017-01-04 22:23:54 +03:00
if ( io - > ctx . r . req )
crypt_free_req ( cc , io - > ctx . r . req , base_bio ) ;
if ( unlikely ( io - > integrity_metadata_from_pool ) )
2018-05-21 01:25:53 +03:00
mempool_free ( io - > integrity_metadata , & io - > cc - > tag_pool ) ;
2017-01-04 22:23:54 +03:00
else
kfree ( io - > integrity_metadata ) ;
2009-03-16 20:44:36 +03:00
2017-06-03 10:38:06 +03:00
base_bio - > bi_status = error ;
2015-07-20 16:29:37 +03:00
bio_endio ( base_bio ) ;
2005-04-17 02:20:36 +04:00
}
/*
2007-10-20 01:38:58 +04:00
* kcryptd / kcryptd_io :
2005-04-17 02:20:36 +04:00
*
* Needed because it would be very unwise to do decryption in an
2006-10-03 12:15:39 +04:00
* interrupt context .
2007-10-20 01:38:58 +04:00
*
* kcryptd performs the actual encryption or decryption .
*
* kcryptd_io performs the IO submission .
*
* They must be separated as otherwise the final stages could be
* starved by new requests which can block in the first stages due
* to memory allocation .
2011-01-13 22:59:53 +03:00
*
* The work is done per CPU global for all dm - crypt instances .
* They should not depend on each other and do not block .
2005-04-17 02:20:36 +04:00
*/
2015-07-20 16:29:37 +03:00
static void crypt_endio ( struct bio * clone )
2006-10-03 12:15:37 +04:00
{
2007-07-12 20:26:32 +04:00
struct dm_crypt_io * io = clone - > bi_private ;
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2008-02-08 05:10:46 +03:00
unsigned rw = bio_data_dir ( clone ) ;
2017-06-03 10:38:06 +03:00
blk_status_t error ;
2006-10-03 12:15:37 +04:00
/*
2007-09-27 14:47:43 +04:00
* free the processed pages
2006-10-03 12:15:37 +04:00
*/
2008-02-08 05:10:46 +03:00
if ( rw = = WRITE )
2007-10-16 15:48:46 +04:00
crypt_free_buffer_pages ( cc , clone ) ;
2006-10-03 12:15:37 +04:00
2017-06-03 10:38:06 +03:00
error = clone - > bi_status ;
2006-10-03 12:15:37 +04:00
bio_put ( clone ) ;
2015-08-11 02:05:18 +03:00
if ( rw = = READ & & ! error ) {
2008-02-08 05:10:46 +03:00
kcryptd_queue_crypt ( io ) ;
return ;
}
2008-02-08 05:10:43 +03:00
2015-08-11 02:05:18 +03:00
if ( unlikely ( error ) )
io - > error = error ;
2008-02-08 05:10:43 +03:00
crypt_dec_pending ( io ) ;
2006-10-03 12:15:37 +04:00
}
2007-07-12 20:26:32 +04:00
static void clone_init ( struct dm_crypt_io * io , struct bio * clone )
2006-10-03 12:15:37 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2006-10-03 12:15:37 +04:00
clone - > bi_private = io ;
clone - > bi_end_io = crypt_endio ;
2017-08-23 20:10:32 +03:00
bio_set_dev ( clone , cc - > dev - > bdev ) ;
2016-10-28 17:48:16 +03:00
clone - > bi_opf = io - > base_bio - > bi_opf ;
2006-10-03 12:15:37 +04:00
}
2011-01-13 22:59:53 +03:00
static int kcryptd_io_read ( struct dm_crypt_io * io , gfp_t gfp )
2006-10-03 12:15:37 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2006-10-03 12:15:37 +04:00
struct bio * clone ;
2006-10-03 12:15:38 +04:00
2006-10-03 12:15:37 +04:00
/*
2015-04-09 23:53:24 +03:00
* We need the original biovec array in order to decrypt
* the whole bio data * afterwards * - - thanks to immutable
* biovecs we don ' t need to worry about the block layer
* modifying the biovec array ; so leverage bio_clone_fast ( ) .
2006-10-03 12:15:37 +04:00
*/
2018-05-21 01:25:53 +03:00
clone = bio_clone_fast ( io - > base_bio , gfp , & cc - > bs ) ;
2011-03-10 10:52:07 +03:00
if ( ! clone )
2011-01-13 22:59:53 +03:00
return 1 ;
2006-10-03 12:15:37 +04:00
2011-01-13 22:59:53 +03:00
crypt_inc_pending ( io ) ;
2006-10-03 12:15:37 +04:00
clone_init ( io , clone ) ;
2013-10-12 02:44:27 +04:00
clone - > bi_iter . bi_sector = cc - > start + io - > sector ;
2006-10-03 12:15:37 +04:00
2017-01-04 22:23:54 +03:00
if ( dm_crypt_integrity_io_alloc ( io , clone ) ) {
crypt_dec_pending ( io ) ;
bio_put ( clone ) ;
return 1 ;
}
2020-07-01 11:59:44 +03:00
submit_bio_noacct ( clone ) ;
2011-01-13 22:59:53 +03:00
return 0 ;
2006-10-03 12:15:37 +04:00
}
2015-02-13 16:25:59 +03:00
static void kcryptd_io_read_work ( struct work_struct * work )
{
struct dm_crypt_io * io = container_of ( work , struct dm_crypt_io , work ) ;
crypt_inc_pending ( io ) ;
if ( kcryptd_io_read ( io , GFP_NOIO ) )
2017-06-03 10:38:06 +03:00
io - > error = BLK_STS_RESOURCE ;
2015-02-13 16:25:59 +03:00
crypt_dec_pending ( io ) ;
}
static void kcryptd_queue_read ( struct dm_crypt_io * io )
{
struct crypt_config * cc = io - > cc ;
INIT_WORK ( & io - > work , kcryptd_io_read_work ) ;
queue_work ( cc - > io_queue , & io - > work ) ;
}
2008-02-08 05:10:49 +03:00
static void kcryptd_io_write ( struct dm_crypt_io * io )
{
2008-02-08 05:11:12 +03:00
struct bio * clone = io - > ctx . bio_out ;
2015-02-13 16:25:59 +03:00
2020-07-01 11:59:44 +03:00
submit_bio_noacct ( clone ) ;
2008-02-08 05:10:49 +03:00
}
2015-02-13 16:27:41 +03:00
# define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
2015-02-13 16:25:59 +03:00
static int dmcrypt_write ( void * data )
2008-02-08 05:10:52 +03:00
{
2015-02-13 16:25:59 +03:00
struct crypt_config * cc = data ;
2015-02-13 16:27:41 +03:00
struct dm_crypt_io * io ;
2015-02-13 16:25:59 +03:00
while ( 1 ) {
2015-02-13 16:27:41 +03:00
struct rb_root write_tree ;
2015-02-13 16:25:59 +03:00
struct blk_plug plug ;
2008-02-08 05:10:52 +03:00
2018-07-11 19:10:51 +03:00
spin_lock_irq ( & cc - > write_thread_lock ) ;
2015-02-13 16:25:59 +03:00
continue_locked :
2008-02-08 05:10:52 +03:00
2015-02-13 16:27:41 +03:00
if ( ! RB_EMPTY_ROOT ( & cc - > write_tree ) )
2015-02-13 16:25:59 +03:00
goto pop_from_list ;
2016-09-21 17:22:29 +03:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
2015-02-13 16:25:59 +03:00
2018-07-11 19:10:51 +03:00
spin_unlock_irq ( & cc - > write_thread_lock ) ;
2015-02-13 16:25:59 +03:00
2016-09-21 17:22:29 +03:00
if ( unlikely ( kthread_should_stop ( ) ) ) {
sched/core: Remove set_task_state()
This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:
be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")
... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.
However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.
Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().
Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.
[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com
== 1. x86-64 ==
Avg runtime set_task_state(): 601 msecs
Avg runtime set_current_state(): 552 msecs
vanilla dirty
Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%)
Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%)
Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%)
Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%)
Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%)
Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%)
Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%)
Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%)
Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%)
Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%)
Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%)
Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%)
Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%)
== 2. ppc64le ==
Avg runtime set_task_state(): 938 msecs
Avg runtime set_current_state: 940 msecs
vanilla dirty
Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%)
Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%)
Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%)
Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%)
Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%)
Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%)
Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%)
Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%)
Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%)
Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%)
Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%)
Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%)
Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%)
Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%)
Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%)
Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%)
x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-04 00:43:14 +03:00
set_current_state ( TASK_RUNNING ) ;
2016-09-21 17:22:29 +03:00
break ;
}
2015-02-13 16:25:59 +03:00
schedule ( ) ;
sched/core: Remove set_task_state()
This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:
be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")
... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.
However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.
Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().
Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.
[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com
== 1. x86-64 ==
Avg runtime set_task_state(): 601 msecs
Avg runtime set_current_state(): 552 msecs
vanilla dirty
Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%)
Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%)
Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%)
Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%)
Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%)
Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%)
Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%)
Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%)
Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%)
Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%)
Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%)
Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%)
Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%)
== 2. ppc64le ==
Avg runtime set_task_state(): 938 msecs
Avg runtime set_current_state: 940 msecs
vanilla dirty
Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%)
Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%)
Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%)
Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%)
Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%)
Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%)
Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%)
Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%)
Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%)
Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%)
Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%)
Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%)
Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%)
Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%)
Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%)
Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%)
x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-04 00:43:14 +03:00
set_current_state ( TASK_RUNNING ) ;
2018-07-11 19:10:51 +03:00
spin_lock_irq ( & cc - > write_thread_lock ) ;
2015-02-13 16:25:59 +03:00
goto continue_locked ;
pop_from_list :
2015-02-13 16:27:41 +03:00
write_tree = cc - > write_tree ;
cc - > write_tree = RB_ROOT ;
2018-07-11 19:10:51 +03:00
spin_unlock_irq ( & cc - > write_thread_lock ) ;
2015-02-13 16:25:59 +03:00
2015-02-13 16:27:41 +03:00
BUG_ON ( rb_parent ( write_tree . rb_node ) ) ;
/*
* Note : we cannot walk the tree here with rb_next because
* the structures may be freed when kcryptd_io_write is called .
*/
2015-02-13 16:25:59 +03:00
blk_start_plug ( & plug ) ;
do {
2015-02-13 16:27:41 +03:00
io = crypt_io_from_node ( rb_first ( & write_tree ) ) ;
rb_erase ( & io - > rb_node , & write_tree ) ;
2015-02-13 16:25:59 +03:00
kcryptd_io_write ( io ) ;
2015-02-13 16:27:41 +03:00
} while ( ! RB_EMPTY_ROOT ( & write_tree ) ) ;
2015-02-13 16:25:59 +03:00
blk_finish_plug ( & plug ) ;
}
return 0 ;
2008-02-08 05:10:52 +03:00
}
2012-03-28 21:41:22 +04:00
static void kcryptd_crypt_write_io_submit ( struct dm_crypt_io * io , int async )
2008-02-08 05:10:49 +03:00
{
2008-02-08 05:10:57 +03:00
struct bio * clone = io - > ctx . bio_out ;
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2015-02-13 16:25:59 +03:00
unsigned long flags ;
2015-02-13 16:27:41 +03:00
sector_t sector ;
struct rb_node * * rbp , * parent ;
2008-02-08 05:10:57 +03:00
2017-06-03 10:38:06 +03:00
if ( unlikely ( io - > error ) ) {
2008-02-08 05:10:57 +03:00
crypt_free_buffer_pages ( cc , clone ) ;
bio_put ( clone ) ;
2008-10-10 16:37:06 +04:00
crypt_dec_pending ( io ) ;
2008-02-08 05:10:57 +03:00
return ;
}
/* crypt_convert should have filled the clone bio */
2013-10-12 02:45:43 +04:00
BUG_ON ( io - > ctx . iter_out . bi_size ) ;
2008-02-08 05:10:57 +03:00
2013-10-12 02:44:27 +04:00
clone - > bi_iter . bi_sector = cc - > start + io - > sector ;
2008-02-08 05:11:02 +03:00
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
if ( ( likely ( ! async ) & & test_bit ( DM_CRYPT_NO_OFFLOAD , & cc - > flags ) ) | |
test_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) ) {
2020-07-01 11:59:44 +03:00
submit_bio_noacct ( clone ) ;
2015-02-13 16:27:08 +03:00
return ;
}
2018-07-11 19:10:51 +03:00
spin_lock_irqsave ( & cc - > write_thread_lock , flags ) ;
if ( RB_EMPTY_ROOT ( & cc - > write_tree ) )
wake_up_process ( cc - > write_thread ) ;
2015-02-13 16:27:41 +03:00
rbp = & cc - > write_tree . rb_node ;
parent = NULL ;
sector = io - > sector ;
while ( * rbp ) {
parent = * rbp ;
if ( sector < crypt_io_from_node ( parent ) - > sector )
rbp = & ( * rbp ) - > rb_left ;
else
rbp = & ( * rbp ) - > rb_right ;
}
rb_link_node ( & io - > rb_node , parent , rbp ) ;
rb_insert_color ( & io - > rb_node , & cc - > write_tree ) ;
2018-07-11 19:10:51 +03:00
spin_unlock_irqrestore ( & cc - > write_thread_lock , flags ) ;
2008-02-08 05:10:49 +03:00
}
2020-07-08 12:28:08 +03:00
static bool kcryptd_crypt_write_inline ( struct crypt_config * cc ,
struct convert_context * ctx )
{
if ( ! test_bit ( DM_CRYPT_WRITE_INLINE , & cc - > flags ) )
return false ;
/*
* Note : zone append writes ( REQ_OP_ZONE_APPEND ) do not have ordering
* constraints so they do not need to be issued inline by
* kcryptd_crypt_write_convert ( ) .
*/
switch ( bio_op ( ctx - > bio_in ) ) {
case REQ_OP_WRITE :
case REQ_OP_WRITE_SAME :
case REQ_OP_WRITE_ZEROES :
return true ;
default :
return false ;
}
}
2008-10-10 16:37:04 +04:00
static void kcryptd_crypt_write_convert ( struct dm_crypt_io * io )
2006-10-03 12:15:37 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2020-07-08 12:28:08 +03:00
struct convert_context * ctx = & io - > ctx ;
2006-10-03 12:15:37 +04:00
struct bio * clone ;
2008-10-10 16:37:08 +04:00
int crypt_finished ;
2008-10-21 20:45:00 +04:00
sector_t sector = io - > sector ;
2017-06-03 10:38:06 +03:00
blk_status_t r ;
2006-10-03 12:15:37 +04:00
2008-10-10 16:37:04 +04:00
/*
* Prevent io from disappearing until this function completes .
*/
crypt_inc_pending ( io ) ;
2020-07-08 12:28:08 +03:00
crypt_convert_init ( cc , ctx , NULL , io - > base_bio , sector ) ;
2008-10-10 16:37:04 +04:00
2015-02-13 16:23:52 +03:00
clone = crypt_alloc_buffer ( io , io - > base_bio - > bi_iter . bi_size ) ;
if ( unlikely ( ! clone ) ) {
2017-06-03 10:38:06 +03:00
io - > error = BLK_STS_IOERR ;
2015-02-13 16:23:52 +03:00
goto dec ;
}
2008-10-10 16:37:08 +04:00
2015-02-13 16:23:52 +03:00
io - > ctx . bio_out = clone ;
io - > ctx . iter_out = clone - > bi_iter ;
2008-10-21 20:45:00 +04:00
2015-02-13 16:23:52 +03:00
sector + = bio_sectors ( clone ) ;
2006-10-03 12:15:38 +04:00
2015-02-13 16:23:52 +03:00
crypt_inc_pending ( io ) ;
2020-07-08 12:28:08 +03:00
r = crypt_convert ( cc , ctx ,
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
test_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) ) ;
2017-06-03 10:38:06 +03:00
if ( r )
2017-01-04 22:23:54 +03:00
io - > error = r ;
2020-07-08 12:28:08 +03:00
crypt_finished = atomic_dec_and_test ( & ctx - > cc_pending ) ;
if ( ! crypt_finished & & kcryptd_crypt_write_inline ( cc , ctx ) ) {
/* Wait for completion signaled by kcryptd_async_done() */
wait_for_completion ( & ctx - > restart ) ;
crypt_finished = 1 ;
}
2008-10-10 16:37:08 +04:00
2015-02-13 16:23:52 +03:00
/* Encryption was already finished, submit io now */
if ( crypt_finished ) {
kcryptd_crypt_write_io_submit ( io , 0 ) ;
io - > sector = sector ;
2006-10-03 12:15:38 +04:00
}
2008-02-08 05:11:02 +03:00
2015-02-13 16:23:52 +03:00
dec :
2008-02-08 05:11:02 +03:00
crypt_dec_pending ( io ) ;
2008-02-08 05:10:59 +03:00
}
2012-03-28 21:41:22 +04:00
static void kcryptd_crypt_read_done ( struct dm_crypt_io * io )
2008-02-08 05:10:43 +03:00
{
crypt_dec_pending ( io ) ;
}
2008-02-08 05:10:49 +03:00
static void kcryptd_crypt_read_convert ( struct dm_crypt_io * io )
2006-10-03 12:15:37 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2017-06-03 10:38:06 +03:00
blk_status_t r ;
2005-04-17 02:20:36 +04:00
2008-10-10 16:37:02 +04:00
crypt_inc_pending ( io ) ;
2008-02-08 05:11:14 +03:00
2008-02-08 05:10:38 +03:00
crypt_convert_init ( cc , & io - > ctx , io - > base_bio , io - > base_bio ,
2008-02-08 05:10:54 +03:00
io - > sector ) ;
2005-04-17 02:20:36 +04:00
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
r = crypt_convert ( cc , & io - > ctx ,
test_bit ( DM_CRYPT_NO_READ_WORKQUEUE , & cc - > flags ) ) ;
2017-06-03 10:38:06 +03:00
if ( r )
2017-01-04 22:23:54 +03:00
io - > error = r ;
2008-02-08 05:10:43 +03:00
2012-07-27 18:08:04 +04:00
if ( atomic_dec_and_test ( & io - > ctx . cc_pending ) )
2012-03-28 21:41:22 +04:00
kcryptd_crypt_read_done ( io ) ;
2008-02-08 05:11:14 +03:00
crypt_dec_pending ( io ) ;
2005-04-17 02:20:36 +04:00
}
2008-02-08 05:11:12 +03:00
static void kcryptd_async_done ( struct crypto_async_request * async_req ,
int error )
{
2009-03-16 20:44:33 +03:00
struct dm_crypt_request * dmreq = async_req - > data ;
struct convert_context * ctx = dmreq - > ctx ;
2008-02-08 05:11:12 +03:00
struct dm_crypt_io * io = container_of ( ctx , struct dm_crypt_io , ctx ) ;
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2008-02-08 05:11:12 +03:00
2015-05-15 18:00:25 +03:00
/*
* A request from crypto driver backlog is going to be processed now ,
* finish the completion and continue in crypt_convert ( ) .
* ( Callback will be called for the second time for this request . )
*/
Revert "dm crypt: fix deadlock when async crypto algorithm returns -EBUSY"
This reverts Linux 4.1-rc1 commit 0618764cb25f6fa9fb31152995de42a8a0496475.
The problem which that commit attempts to fix actually lies in the
Freescale CAAM crypto driver not dm-crypt.
dm-crypt uses CRYPTO_TFM_REQ_MAY_BACKLOG. This means the the crypto
driver should internally backlog requests which arrive when the queue is
full and process them later. Until the crypto hw's queue becomes full,
the driver returns -EINPROGRESS. When the crypto hw's queue if full,
the driver returns -EBUSY, and if CRYPTO_TFM_REQ_MAY_BACKLOG is set, is
expected to backlog the request and process it when the hardware has
queue space. At the point when the driver takes the request from the
backlog and starts processing it, it calls the completion function with
a status of -EINPROGRESS. The completion function is called (for a
second time, in the case of backlogged requests) with a status/err of 0
when a request is done.
Crypto drivers for hardware without hardware queueing use the helpers,
crypto_init_queue(), crypto_enqueue_request(), crypto_dequeue_request()
and crypto_get_backlog() helpers to implement this behaviour correctly,
while others implement this behaviour without these helpers (ccp, for
example).
dm-crypt (before the patch that needs reverting) uses this API
correctly. It queues up as many requests as the hw queues will allow
(i.e. as long as it gets back -EINPROGRESS from the request function).
Then, when it sees at least one backlogged request (gets -EBUSY), it
waits till that backlogged request is handled (completion gets called
with -EINPROGRESS), and then continues. The references to
af_alg_wait_for_completion() and af_alg_complete() in that commit's
commit message are irrelevant because those functions only handle one
request at a time, unlink dm-crypt.
The problem is that the Freescale CAAM driver, which that commit
describes as having being tested with, fails to implement the
backlogging behaviour correctly. In cam_jr_enqueue(), if the hardware
queue is full, it simply returns -EBUSY without backlogging the request.
What the observed deadlock was is not described in the commit message
but it is obviously the wait_for_completion() in crypto_convert() where
dm-crypto would wait for the completion being called with -EINPROGRESS
in the case of backlogged requests. This completion will never be
completed due to the bug in the CAAM driver.
Commit 0618764cb25 incorrectly made dm-crypt wait for every request,
even when the driver/hardware queues are not full, which means that
dm-crypt will never see -EBUSY. This means that that commit will cause
a performance regression on all crypto drivers which implement the API
correctly.
Revert it. Correct backlog handling should be implemented in the CAAM
driver instead.
Cc'ing stable purely because commit 0618764cb25 did. If for some reason
a stable@ kernel did pick up commit 0618764cb25 it should get reverted.
Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Reviewed-by: Horia Geanta <horia.geanta@freescale.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-05-05 16:15:56 +03:00
if ( error = = - EINPROGRESS ) {
complete ( & ctx - > restart ) ;
2008-02-08 05:11:12 +03:00
return ;
Revert "dm crypt: fix deadlock when async crypto algorithm returns -EBUSY"
This reverts Linux 4.1-rc1 commit 0618764cb25f6fa9fb31152995de42a8a0496475.
The problem which that commit attempts to fix actually lies in the
Freescale CAAM crypto driver not dm-crypt.
dm-crypt uses CRYPTO_TFM_REQ_MAY_BACKLOG. This means the the crypto
driver should internally backlog requests which arrive when the queue is
full and process them later. Until the crypto hw's queue becomes full,
the driver returns -EINPROGRESS. When the crypto hw's queue if full,
the driver returns -EBUSY, and if CRYPTO_TFM_REQ_MAY_BACKLOG is set, is
expected to backlog the request and process it when the hardware has
queue space. At the point when the driver takes the request from the
backlog and starts processing it, it calls the completion function with
a status of -EINPROGRESS. The completion function is called (for a
second time, in the case of backlogged requests) with a status/err of 0
when a request is done.
Crypto drivers for hardware without hardware queueing use the helpers,
crypto_init_queue(), crypto_enqueue_request(), crypto_dequeue_request()
and crypto_get_backlog() helpers to implement this behaviour correctly,
while others implement this behaviour without these helpers (ccp, for
example).
dm-crypt (before the patch that needs reverting) uses this API
correctly. It queues up as many requests as the hw queues will allow
(i.e. as long as it gets back -EINPROGRESS from the request function).
Then, when it sees at least one backlogged request (gets -EBUSY), it
waits till that backlogged request is handled (completion gets called
with -EINPROGRESS), and then continues. The references to
af_alg_wait_for_completion() and af_alg_complete() in that commit's
commit message are irrelevant because those functions only handle one
request at a time, unlink dm-crypt.
The problem is that the Freescale CAAM driver, which that commit
describes as having being tested with, fails to implement the
backlogging behaviour correctly. In cam_jr_enqueue(), if the hardware
queue is full, it simply returns -EBUSY without backlogging the request.
What the observed deadlock was is not described in the commit message
but it is obviously the wait_for_completion() in crypto_convert() where
dm-crypto would wait for the completion being called with -EINPROGRESS
in the case of backlogged requests. This completion will never be
completed due to the bug in the CAAM driver.
Commit 0618764cb25 incorrectly made dm-crypt wait for every request,
even when the driver/hardware queues are not full, which means that
dm-crypt will never see -EBUSY. This means that that commit will cause
a performance regression on all crypto drivers which implement the API
correctly.
Revert it. Correct backlog handling should be implemented in the CAAM
driver instead.
Cc'ing stable purely because commit 0618764cb25 did. If for some reason
a stable@ kernel did pick up commit 0618764cb25 it should get reverted.
Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Reviewed-by: Horia Geanta <horia.geanta@freescale.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-05-05 16:15:56 +03:00
}
2008-02-08 05:11:12 +03:00
2011-01-13 22:59:54 +03:00
if ( ! error & & cc - > iv_gen_ops & & cc - > iv_gen_ops - > post )
2017-01-04 22:23:54 +03:00
error = cc - > iv_gen_ops - > post ( cc , org_iv_of_dmreq ( cc , dmreq ) , dmreq ) ;
2011-01-13 22:59:54 +03:00
2017-01-04 22:23:54 +03:00
if ( error = = - EBADMSG ) {
2019-05-15 17:22:30 +03:00
char b [ BDEVNAME_SIZE ] ;
DMERR_LIMIT ( " %s: INTEGRITY AEAD ERROR, sector %llu " , bio_devname ( ctx - > bio_in , b ) ,
2017-01-04 22:23:54 +03:00
( unsigned long long ) le64_to_cpu ( * org_sector_of_dmreq ( cc , dmreq ) ) ) ;
2017-06-03 10:38:06 +03:00
io - > error = BLK_STS_PROTECTION ;
2017-01-04 22:23:54 +03:00
} else if ( error < 0 )
2017-06-03 10:38:06 +03:00
io - > error = BLK_STS_IOERR ;
2012-03-28 21:41:22 +04:00
2014-03-28 23:51:55 +04:00
crypt_free_req ( cc , req_of_dmreq ( cc , dmreq ) , io - > base_bio ) ;
2008-02-08 05:11:12 +03:00
2012-07-27 18:08:04 +04:00
if ( ! atomic_dec_and_test ( & ctx - > cc_pending ) )
Revert "dm crypt: fix deadlock when async crypto algorithm returns -EBUSY"
This reverts Linux 4.1-rc1 commit 0618764cb25f6fa9fb31152995de42a8a0496475.
The problem which that commit attempts to fix actually lies in the
Freescale CAAM crypto driver not dm-crypt.
dm-crypt uses CRYPTO_TFM_REQ_MAY_BACKLOG. This means the the crypto
driver should internally backlog requests which arrive when the queue is
full and process them later. Until the crypto hw's queue becomes full,
the driver returns -EINPROGRESS. When the crypto hw's queue if full,
the driver returns -EBUSY, and if CRYPTO_TFM_REQ_MAY_BACKLOG is set, is
expected to backlog the request and process it when the hardware has
queue space. At the point when the driver takes the request from the
backlog and starts processing it, it calls the completion function with
a status of -EINPROGRESS. The completion function is called (for a
second time, in the case of backlogged requests) with a status/err of 0
when a request is done.
Crypto drivers for hardware without hardware queueing use the helpers,
crypto_init_queue(), crypto_enqueue_request(), crypto_dequeue_request()
and crypto_get_backlog() helpers to implement this behaviour correctly,
while others implement this behaviour without these helpers (ccp, for
example).
dm-crypt (before the patch that needs reverting) uses this API
correctly. It queues up as many requests as the hw queues will allow
(i.e. as long as it gets back -EINPROGRESS from the request function).
Then, when it sees at least one backlogged request (gets -EBUSY), it
waits till that backlogged request is handled (completion gets called
with -EINPROGRESS), and then continues. The references to
af_alg_wait_for_completion() and af_alg_complete() in that commit's
commit message are irrelevant because those functions only handle one
request at a time, unlink dm-crypt.
The problem is that the Freescale CAAM driver, which that commit
describes as having being tested with, fails to implement the
backlogging behaviour correctly. In cam_jr_enqueue(), if the hardware
queue is full, it simply returns -EBUSY without backlogging the request.
What the observed deadlock was is not described in the commit message
but it is obviously the wait_for_completion() in crypto_convert() where
dm-crypto would wait for the completion being called with -EINPROGRESS
in the case of backlogged requests. This completion will never be
completed due to the bug in the CAAM driver.
Commit 0618764cb25 incorrectly made dm-crypt wait for every request,
even when the driver/hardware queues are not full, which means that
dm-crypt will never see -EBUSY. This means that that commit will cause
a performance regression on all crypto drivers which implement the API
correctly.
Revert it. Correct backlog handling should be implemented in the CAAM
driver instead.
Cc'ing stable purely because commit 0618764cb25 did. If for some reason
a stable@ kernel did pick up commit 0618764cb25 it should get reverted.
Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Reviewed-by: Horia Geanta <horia.geanta@freescale.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-05-05 16:15:56 +03:00
return ;
2008-02-08 05:11:12 +03:00
2020-07-08 12:28:08 +03:00
/*
* The request is fully completed : for inline writes , let
* kcryptd_crypt_write_convert ( ) do the IO submission .
*/
if ( bio_data_dir ( io - > base_bio ) = = READ ) {
2012-03-28 21:41:22 +04:00
kcryptd_crypt_read_done ( io ) ;
2020-07-08 12:28:08 +03:00
return ;
}
if ( kcryptd_crypt_write_inline ( cc , ctx ) ) {
complete ( & ctx - > restart ) ;
return ;
}
kcryptd_crypt_write_io_submit ( io , 1 ) ;
2008-02-08 05:11:12 +03:00
}
2008-02-08 05:10:52 +03:00
static void kcryptd_crypt ( struct work_struct * work )
2005-04-17 02:20:36 +04:00
{
2007-07-12 20:26:32 +04:00
struct dm_crypt_io * io = container_of ( work , struct dm_crypt_io , work ) ;
2006-10-03 12:15:37 +04:00
2007-10-20 01:38:58 +04:00
if ( bio_data_dir ( io - > base_bio ) = = READ )
2008-02-08 05:10:52 +03:00
kcryptd_crypt_read_convert ( io ) ;
2008-02-08 05:10:49 +03:00
else
2008-02-08 05:10:52 +03:00
kcryptd_crypt_write_convert ( io ) ;
2007-10-20 01:38:58 +04:00
}
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
static void kcryptd_crypt_tasklet ( unsigned long work )
{
kcryptd_crypt ( ( struct work_struct * ) work ) ;
}
2008-02-08 05:10:52 +03:00
static void kcryptd_queue_crypt ( struct dm_crypt_io * io )
2007-10-20 01:38:58 +04:00
{
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = io - > cc ;
2007-10-20 01:38:58 +04:00
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
if ( ( bio_data_dir ( io - > base_bio ) = = READ & & test_bit ( DM_CRYPT_NO_READ_WORKQUEUE , & cc - > flags ) ) | |
( bio_data_dir ( io - > base_bio ) = = WRITE & & test_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) ) ) {
if ( in_irq ( ) ) {
/* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
tasklet_init ( & io - > tasklet , kcryptd_crypt_tasklet , ( unsigned long ) & io - > work ) ;
tasklet_schedule ( & io - > tasklet ) ;
return ;
}
kcryptd_crypt ( & io - > work ) ;
return ;
}
2008-02-08 05:10:52 +03:00
INIT_WORK ( & io - > work , kcryptd_crypt ) ;
queue_work ( cc - > crypt_queue , & io - > work ) ;
2005-04-17 02:20:36 +04:00
}
2017-01-04 22:23:54 +03:00
static void crypt_free_tfms_aead ( struct crypt_config * cc )
2005-04-17 02:20:36 +04:00
{
2017-01-04 22:23:54 +03:00
if ( ! cc - > cipher_tfm . tfms_aead )
return ;
2005-04-17 02:20:36 +04:00
2017-01-04 22:23:54 +03:00
if ( cc - > cipher_tfm . tfms_aead [ 0 ] & & ! IS_ERR ( cc - > cipher_tfm . tfms_aead [ 0 ] ) ) {
crypto_free_aead ( cc - > cipher_tfm . tfms_aead [ 0 ] ) ;
cc - > cipher_tfm . tfms_aead [ 0 ] = NULL ;
2005-04-17 02:20:36 +04:00
}
2017-01-04 22:23:54 +03:00
kfree ( cc - > cipher_tfm . tfms_aead ) ;
cc - > cipher_tfm . tfms_aead = NULL ;
2005-04-17 02:20:36 +04:00
}
2017-01-04 22:23:54 +03:00
static void crypt_free_tfms_skcipher ( struct crypt_config * cc )
2011-01-13 22:59:54 +03:00
{
unsigned i ;
2017-01-04 22:23:54 +03:00
if ( ! cc - > cipher_tfm . tfms )
2012-07-27 18:08:05 +04:00
return ;
2011-01-13 22:59:54 +03:00
for ( i = 0 ; i < cc - > tfms_count ; i + + )
2017-01-04 22:23:54 +03:00
if ( cc - > cipher_tfm . tfms [ i ] & & ! IS_ERR ( cc - > cipher_tfm . tfms [ i ] ) ) {
crypto_free_skcipher ( cc - > cipher_tfm . tfms [ i ] ) ;
cc - > cipher_tfm . tfms [ i ] = NULL ;
2011-01-13 22:59:54 +03:00
}
2012-07-27 18:08:05 +04:00
2017-01-04 22:23:54 +03:00
kfree ( cc - > cipher_tfm . tfms ) ;
cc - > cipher_tfm . tfms = NULL ;
2011-01-13 22:59:54 +03:00
}
2017-01-04 22:23:54 +03:00
static void crypt_free_tfms ( struct crypt_config * cc )
{
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
crypt_free_tfms_aead ( cc ) ;
else
crypt_free_tfms_skcipher ( cc ) ;
}
static int crypt_alloc_tfms_skcipher ( struct crypt_config * cc , char * ciphermode )
2011-01-13 22:59:54 +03:00
{
unsigned i ;
int err ;
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 00:03:40 +03:00
cc - > cipher_tfm . tfms = kcalloc ( cc - > tfms_count ,
sizeof ( struct crypto_skcipher * ) ,
GFP_KERNEL ) ;
2017-01-04 22:23:54 +03:00
if ( ! cc - > cipher_tfm . tfms )
2012-07-27 18:08:05 +04:00
return - ENOMEM ;
2011-01-13 22:59:54 +03:00
for ( i = 0 ; i < cc - > tfms_count ; i + + ) {
2020-07-10 09:20:42 +03:00
cc - > cipher_tfm . tfms [ i ] = crypto_alloc_skcipher ( ciphermode , 0 ,
CRYPTO_ALG_ALLOCATES_MEMORY ) ;
2017-01-04 22:23:54 +03:00
if ( IS_ERR ( cc - > cipher_tfm . tfms [ i ] ) ) {
err = PTR_ERR ( cc - > cipher_tfm . tfms [ i ] ) ;
2012-07-27 18:08:05 +04:00
crypt_free_tfms ( cc ) ;
2011-01-13 22:59:54 +03:00
return err ;
}
}
2018-12-06 07:53:00 +03:00
/*
* dm - crypt performance can vary greatly depending on which crypto
* algorithm implementation is used . Help people debug performance
* problems by logging the - > cra_driver_name .
*/
2019-05-15 17:23:43 +03:00
DMDEBUG_LIMIT ( " %s using implementation \" %s \" " , ciphermode ,
2018-12-06 07:53:00 +03:00
crypto_skcipher_alg ( any_tfm ( cc ) ) - > base . cra_driver_name ) ;
2011-01-13 22:59:54 +03:00
return 0 ;
}
2017-01-04 22:23:54 +03:00
static int crypt_alloc_tfms_aead ( struct crypt_config * cc , char * ciphermode )
{
int err ;
cc - > cipher_tfm . tfms = kmalloc ( sizeof ( struct crypto_aead * ) , GFP_KERNEL ) ;
if ( ! cc - > cipher_tfm . tfms )
return - ENOMEM ;
2020-07-10 09:20:42 +03:00
cc - > cipher_tfm . tfms_aead [ 0 ] = crypto_alloc_aead ( ciphermode , 0 ,
CRYPTO_ALG_ALLOCATES_MEMORY ) ;
2017-01-04 22:23:54 +03:00
if ( IS_ERR ( cc - > cipher_tfm . tfms_aead [ 0 ] ) ) {
err = PTR_ERR ( cc - > cipher_tfm . tfms_aead [ 0 ] ) ;
crypt_free_tfms ( cc ) ;
return err ;
}
2019-05-15 17:23:43 +03:00
DMDEBUG_LIMIT ( " %s using implementation \" %s \" " , ciphermode ,
2018-12-06 07:53:00 +03:00
crypto_aead_alg ( any_tfm_aead ( cc ) ) - > base . cra_driver_name ) ;
2017-01-04 22:23:54 +03:00
return 0 ;
}
static int crypt_alloc_tfms ( struct crypt_config * cc , char * ciphermode )
{
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
return crypt_alloc_tfms_aead ( cc , ciphermode ) ;
else
return crypt_alloc_tfms_skcipher ( cc , ciphermode ) ;
}
static unsigned crypt_subkey_size ( struct crypt_config * cc )
{
return ( cc - > key_size - cc - > key_extra_size ) > > ilog2 ( cc - > tfms_count ) ;
}
static unsigned crypt_authenckey_size ( struct crypt_config * cc )
{
return crypt_subkey_size ( cc ) + RTA_SPACE ( sizeof ( struct crypto_authenc_key_param ) ) ;
}
/*
* If AEAD is composed like authenc ( hmac ( sha256 ) , xts ( aes ) ) ,
* the key must be for some reason in special format .
* This funcion converts cc - > key to this special format .
*/
static void crypt_copy_authenckey ( char * p , const void * key ,
unsigned enckeylen , unsigned authkeylen )
{
struct crypto_authenc_key_param * param ;
struct rtattr * rta ;
rta = ( struct rtattr * ) p ;
param = RTA_DATA ( rta ) ;
param - > enckeylen = cpu_to_be32 ( enckeylen ) ;
rta - > rta_len = RTA_LENGTH ( sizeof ( * param ) ) ;
rta - > rta_type = CRYPTO_AUTHENC_KEYA_PARAM ;
p + = RTA_SPACE ( sizeof ( * param ) ) ;
memcpy ( p , key + enckeylen , authkeylen ) ;
p + = authkeylen ;
memcpy ( p , key , enckeylen ) ;
}
2016-08-25 14:12:54 +03:00
static int crypt_setkey ( struct crypt_config * cc )
2011-01-13 22:59:53 +03:00
{
2013-10-29 02:21:03 +04:00
unsigned subkey_size ;
2012-07-27 18:08:05 +04:00
int err = 0 , i , r ;
2013-10-29 02:21:03 +04:00
/* Ignore extra keys (which are used for IV etc) */
2017-01-04 22:23:54 +03:00
subkey_size = crypt_subkey_size ( cc ) ;
2013-10-29 02:21:03 +04:00
2018-01-04 00:48:59 +03:00
if ( crypt_integrity_hmac ( cc ) ) {
if ( subkey_size < cc - > key_mac_size )
return - EINVAL ;
2017-01-04 22:23:54 +03:00
crypt_copy_authenckey ( cc - > authenc_key , cc - > key ,
subkey_size - cc - > key_mac_size ,
cc - > key_mac_size ) ;
2018-01-04 00:48:59 +03:00
}
2012-07-27 18:08:05 +04:00
for ( i = 0 ; i < cc - > tfms_count ; i + + ) {
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_hmac ( cc ) )
2017-01-04 22:23:54 +03:00
r = crypto_aead_setkey ( cc - > cipher_tfm . tfms_aead [ i ] ,
cc - > authenc_key , crypt_authenckey_size ( cc ) ) ;
2017-03-16 17:39:40 +03:00
else if ( crypt_integrity_aead ( cc ) )
r = crypto_aead_setkey ( cc - > cipher_tfm . tfms_aead [ i ] ,
cc - > key + ( i * subkey_size ) ,
subkey_size ) ;
2017-01-04 22:23:54 +03:00
else
r = crypto_skcipher_setkey ( cc - > cipher_tfm . tfms [ i ] ,
cc - > key + ( i * subkey_size ) ,
subkey_size ) ;
2012-07-27 18:08:05 +04:00
if ( r )
err = r ;
2011-01-13 22:59:53 +03:00
}
2017-01-04 22:23:54 +03:00
if ( crypt_integrity_hmac ( cc ) )
memzero_explicit ( cc - > authenc_key , crypt_authenckey_size ( cc ) ) ;
2011-01-13 22:59:53 +03:00
return err ;
}
2016-11-21 17:58:51 +03:00
# ifdef CONFIG_KEYS
2016-12-01 20:20:52 +03:00
static bool contains_whitespace ( const char * str )
{
while ( * str )
if ( isspace ( * str + + ) )
return true ;
return false ;
}
2020-04-20 16:46:59 +03:00
static int set_key_user ( struct crypt_config * cc , struct key * key )
{
const struct user_key_payload * ukp ;
ukp = user_key_payload_locked ( key ) ;
if ( ! ukp )
return - EKEYREVOKED ;
if ( cc - > key_size ! = ukp - > datalen )
return - EINVAL ;
memcpy ( cc - > key , ukp - > data , cc - > key_size ) ;
return 0 ;
}
# if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE)
static int set_key_encrypted ( struct crypt_config * cc , struct key * key )
{
const struct encrypted_key_payload * ekp ;
ekp = key - > payload . data [ 0 ] ;
if ( ! ekp )
return - EKEYREVOKED ;
if ( cc - > key_size ! = ekp - > decrypted_datalen )
return - EINVAL ;
memcpy ( cc - > key , ekp - > decrypted_data , cc - > key_size ) ;
return 0 ;
}
# endif /* CONFIG_ENCRYPTED_KEYS */
2016-11-21 17:58:51 +03:00
static int crypt_set_keyring_key ( struct crypt_config * cc , const char * key_string )
{
char * new_key_string , * key_desc ;
int ret ;
2020-04-20 16:46:59 +03:00
struct key_type * type ;
2016-11-21 17:58:51 +03:00
struct key * key ;
2020-04-20 16:46:59 +03:00
int ( * set_key ) ( struct crypt_config * cc , struct key * key ) ;
2016-11-21 17:58:51 +03:00
2016-12-01 20:20:52 +03:00
/*
* Reject key_string with whitespace . dm core currently lacks code for
* proper whitespace escaping in arguments on DM_TABLE_STATUS path .
*/
if ( contains_whitespace ( key_string ) ) {
DMERR ( " whitespace chars not allowed in key string " ) ;
return - EINVAL ;
}
2016-11-21 17:58:51 +03:00
/* look for next ':' separating key_type from key_description */
key_desc = strpbrk ( key_string , " : " ) ;
if ( ! key_desc | | key_desc = = key_string | | ! strlen ( key_desc + 1 ) )
return - EINVAL ;
2020-04-20 16:46:59 +03:00
if ( ! strncmp ( key_string , " logon: " , key_desc - key_string + 1 ) ) {
type = & key_type_logon ;
set_key = set_key_user ;
} else if ( ! strncmp ( key_string , " user: " , key_desc - key_string + 1 ) ) {
type = & key_type_user ;
set_key = set_key_user ;
# if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE)
} else if ( ! strncmp ( key_string , " encrypted: " , key_desc - key_string + 1 ) ) {
type = & key_type_encrypted ;
set_key = set_key_encrypted ;
# endif
} else {
2016-11-21 17:58:51 +03:00
return - EINVAL ;
2020-04-20 16:46:59 +03:00
}
2016-11-21 17:58:51 +03:00
new_key_string = kstrdup ( key_string , GFP_KERNEL ) ;
if ( ! new_key_string )
return - ENOMEM ;
2020-04-20 16:46:59 +03:00
key = request_key ( type , key_desc + 1 , NULL ) ;
2016-11-21 17:58:51 +03:00
if ( IS_ERR ( key ) ) {
2020-08-07 09:18:13 +03:00
kfree_sensitive ( new_key_string ) ;
2016-11-21 17:58:51 +03:00
return PTR_ERR ( key ) ;
}
2017-01-31 17:47:11 +03:00
down_read ( & key - > sem ) ;
2016-11-21 17:58:51 +03:00
2020-04-20 16:46:59 +03:00
ret = set_key ( cc , key ) ;
if ( ret < 0 ) {
2017-01-31 17:47:11 +03:00
up_read ( & key - > sem ) ;
2016-11-21 17:58:51 +03:00
key_put ( key ) ;
2020-08-07 09:18:13 +03:00
kfree_sensitive ( new_key_string ) ;
2020-04-20 16:46:59 +03:00
return ret ;
2016-11-21 17:58:51 +03:00
}
2017-01-31 17:47:11 +03:00
up_read ( & key - > sem ) ;
2016-11-21 17:58:51 +03:00
key_put ( key ) ;
/* clear the flag since following operations may invalidate previously valid key */
clear_bit ( DM_CRYPT_KEY_VALID , & cc - > flags ) ;
ret = crypt_setkey ( cc ) ;
if ( ! ret ) {
set_bit ( DM_CRYPT_KEY_VALID , & cc - > flags ) ;
2020-08-07 09:18:13 +03:00
kfree_sensitive ( cc - > key_string ) ;
2016-11-21 17:58:51 +03:00
cc - > key_string = new_key_string ;
} else
2020-08-07 09:18:13 +03:00
kfree_sensitive ( new_key_string ) ;
2016-11-21 17:58:51 +03:00
return ret ;
}
static int get_key_size ( char * * key_string )
{
char * colon , dummy ;
int ret ;
if ( * key_string [ 0 ] ! = ' : ' )
return strlen ( * key_string ) > > 1 ;
/* look for next ':' in key string */
colon = strpbrk ( * key_string + 1 , " : " ) ;
if ( ! colon )
return - EINVAL ;
if ( sscanf ( * key_string + 1 , " %u%c " , & ret , & dummy ) ! = 2 | | dummy ! = ' : ' )
return - EINVAL ;
* key_string = colon ;
/* remaining key string should be :<logon|user>:<key_desc> */
return ret ;
}
# else
static int crypt_set_keyring_key ( struct crypt_config * cc , const char * key_string )
{
return - EINVAL ;
}
static int get_key_size ( char * * key_string )
{
return ( * key_string [ 0 ] = = ' : ' ) ? - EINVAL : strlen ( * key_string ) > > 1 ;
}
2020-04-20 16:46:59 +03:00
# endif /* CONFIG_KEYS */
2016-11-21 17:58:51 +03:00
2006-10-03 12:15:37 +04:00
static int crypt_set_key ( struct crypt_config * cc , char * key )
{
2011-03-24 16:54:27 +03:00
int r = - EINVAL ;
int key_string_len = strlen ( key ) ;
2011-01-13 22:59:49 +03:00
/* Hyphen (which gives a key_size of zero) means there is no key. */
if ( ! cc - > key_size & & strcmp ( key , " - " ) )
2011-03-24 16:54:27 +03:00
goto out ;
2006-10-03 12:15:37 +04:00
2016-11-21 17:58:51 +03:00
/* ':' means the key is in kernel keyring, short-circuit normal key processing */
if ( key [ 0 ] = = ' : ' ) {
r = crypt_set_keyring_key ( cc , key + 1 ) ;
2011-03-24 16:54:27 +03:00
goto out ;
2016-11-21 17:58:51 +03:00
}
2006-10-03 12:15:37 +04:00
2016-11-02 17:02:08 +03:00
/* clear the flag since following operations may invalidate previously valid key */
clear_bit ( DM_CRYPT_KEY_VALID , & cc - > flags ) ;
2006-10-03 12:15:37 +04:00
2016-11-21 17:58:51 +03:00
/* wipe references to any kernel keyring key */
2020-08-07 09:18:13 +03:00
kfree_sensitive ( cc - > key_string ) ;
2016-11-21 17:58:51 +03:00
cc - > key_string = NULL ;
2017-04-27 16:52:04 +03:00
/* Decode key from its hex representation. */
if ( cc - > key_size & & hex2bin ( cc - > key , key , cc - > key_size ) < 0 )
2011-03-24 16:54:27 +03:00
goto out ;
2006-10-03 12:15:37 +04:00
2016-08-25 14:12:54 +03:00
r = crypt_setkey ( cc ) ;
2016-11-02 17:02:08 +03:00
if ( ! r )
set_bit ( DM_CRYPT_KEY_VALID , & cc - > flags ) ;
2011-03-24 16:54:27 +03:00
out :
/* Hex key string not needed after here, so wipe it. */
memset ( key , ' 0 ' , key_string_len ) ;
return r ;
2006-10-03 12:15:37 +04:00
}
static int crypt_wipe_key ( struct crypt_config * cc )
{
2017-04-24 15:21:53 +03:00
int r ;
2006-10-03 12:15:37 +04:00
clear_bit ( DM_CRYPT_KEY_VALID , & cc - > flags ) ;
2017-04-24 15:21:53 +03:00
get_random_bytes ( & cc - > key , cc - > key_size ) ;
2019-07-09 16:22:12 +03:00
/* Wipe IV private keys */
if ( cc - > iv_gen_ops & & cc - > iv_gen_ops - > wipe ) {
r = cc - > iv_gen_ops - > wipe ( cc ) ;
if ( r )
return r ;
}
2020-08-07 09:18:13 +03:00
kfree_sensitive ( cc - > key_string ) ;
2016-11-21 17:58:51 +03:00
cc - > key_string = NULL ;
2017-04-24 15:21:53 +03:00
r = crypt_setkey ( cc ) ;
memset ( & cc - > key , 0 , cc - > key_size * sizeof ( u8 ) ) ;
2011-01-13 22:59:53 +03:00
2017-04-24 15:21:53 +03:00
return r ;
2006-10-03 12:15:37 +04:00
}
2017-08-14 05:45:08 +03:00
static void crypt_calculate_pages_per_client ( void )
{
2018-12-28 11:34:29 +03:00
unsigned long pages = ( totalram_pages ( ) - totalhigh_pages ( ) ) * DM_CRYPT_MEMORY_PERCENT / 100 ;
2017-08-14 05:45:08 +03:00
if ( ! dm_crypt_clients_n )
return ;
pages / = dm_crypt_clients_n ;
if ( pages < DM_CRYPT_MIN_PAGES_PER_CLIENT )
pages = DM_CRYPT_MIN_PAGES_PER_CLIENT ;
dm_crypt_pages_per_client = pages ;
}
static void * crypt_page_alloc ( gfp_t gfp_mask , void * pool_data )
{
struct crypt_config * cc = pool_data ;
struct page * page ;
if ( unlikely ( percpu_counter_compare ( & cc - > n_allocated_pages , dm_crypt_pages_per_client ) > = 0 ) & &
likely ( gfp_mask & __GFP_NORETRY ) )
return NULL ;
page = alloc_page ( gfp_mask ) ;
if ( likely ( page ! = NULL ) )
percpu_counter_add ( & cc - > n_allocated_pages , 1 ) ;
return page ;
}
static void crypt_page_free ( void * page , void * pool_data )
{
struct crypt_config * cc = pool_data ;
__free_page ( page ) ;
percpu_counter_sub ( & cc - > n_allocated_pages , 1 ) ;
}
2010-08-12 07:14:06 +04:00
static void crypt_dtr ( struct dm_target * ti )
{
struct crypt_config * cc = ti - > private ;
ti - > private = NULL ;
if ( ! cc )
return ;
2016-09-21 17:22:29 +03:00
if ( cc - > write_thread )
2015-02-13 16:25:59 +03:00
kthread_stop ( cc - > write_thread ) ;
2010-08-12 07:14:06 +04:00
if ( cc - > io_queue )
destroy_workqueue ( cc - > io_queue ) ;
if ( cc - > crypt_queue )
destroy_workqueue ( cc - > crypt_queue ) ;
2012-07-27 18:08:05 +04:00
crypt_free_tfms ( cc ) ;
2018-05-21 01:25:53 +03:00
bioset_exit ( & cc - > bs ) ;
2010-08-12 07:14:06 +04:00
2018-05-21 01:25:53 +03:00
mempool_exit ( & cc - > page_pool ) ;
mempool_exit ( & cc - > req_pool ) ;
mempool_exit ( & cc - > tag_pool ) ;
2018-06-02 20:45:04 +03:00
WARN_ON ( percpu_counter_sum ( & cc - > n_allocated_pages ) ! = 0 ) ;
percpu_counter_destroy ( & cc - > n_allocated_pages ) ;
2010-08-12 07:14:06 +04:00
if ( cc - > iv_gen_ops & & cc - > iv_gen_ops - > dtr )
cc - > iv_gen_ops - > dtr ( cc ) ;
if ( cc - > dev )
dm_put_device ( ti , cc - > dev ) ;
2020-08-07 09:18:13 +03:00
kfree_sensitive ( cc - > cipher_string ) ;
kfree_sensitive ( cc - > key_string ) ;
kfree_sensitive ( cc - > cipher_auth ) ;
kfree_sensitive ( cc - > authenc_key ) ;
2010-08-12 07:14:06 +04:00
2018-01-06 05:17:20 +03:00
mutex_destroy ( & cc - > bio_alloc_lock ) ;
2010-08-12 07:14:06 +04:00
/* Must zero key material before freeing */
2020-08-07 09:18:13 +03:00
kfree_sensitive ( cc ) ;
2017-08-14 05:45:08 +03:00
spin_lock ( & dm_crypt_clients_lock ) ;
WARN_ON ( ! dm_crypt_clients_n ) ;
dm_crypt_clients_n - - ;
crypt_calculate_pages_per_client ( ) ;
spin_unlock ( & dm_crypt_clients_lock ) ;
2010-08-12 07:14:06 +04:00
}
2017-03-16 17:39:39 +03:00
static int crypt_ctr_ivmode ( struct dm_target * ti , const char * ivmode )
{
struct crypt_config * cc = ti - > private ;
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-03-16 17:39:39 +03:00
cc - > iv_size = crypto_aead_ivsize ( any_tfm_aead ( cc ) ) ;
else
cc - > iv_size = crypto_skcipher_ivsize ( any_tfm ( cc ) ) ;
if ( cc - > iv_size )
/* at least a 64 bit sector number should fit in our buffer */
cc - > iv_size = max ( cc - > iv_size ,
( unsigned int ) ( sizeof ( u64 ) / sizeof ( u8 ) ) ) ;
else if ( ivmode ) {
DMWARN ( " Selected cipher does not support IVs " ) ;
ivmode = NULL ;
}
/* Choose ivmode, see comments at iv code. */
if ( ivmode = = NULL )
cc - > iv_gen_ops = NULL ;
else if ( strcmp ( ivmode , " plain " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_plain_ops ;
else if ( strcmp ( ivmode , " plain64 " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_plain64_ops ;
2017-06-06 10:07:01 +03:00
else if ( strcmp ( ivmode , " plain64be " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_plain64be_ops ;
2017-03-16 17:39:39 +03:00
else if ( strcmp ( ivmode , " essiv " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_essiv_ops ;
else if ( strcmp ( ivmode , " benbi " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_benbi_ops ;
else if ( strcmp ( ivmode , " null " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_null_ops ;
2019-07-09 16:22:14 +03:00
else if ( strcmp ( ivmode , " eboiv " ) = = 0 )
cc - > iv_gen_ops = & crypt_iv_eboiv_ops ;
2020-01-03 11:20:22 +03:00
else if ( strcmp ( ivmode , " elephant " ) = = 0 ) {
cc - > iv_gen_ops = & crypt_iv_elephant_ops ;
cc - > key_parts = 2 ;
cc - > key_extra_size = cc - > key_size / 2 ;
if ( cc - > key_extra_size > ELEPHANT_MAX_KEY_SIZE )
return - EINVAL ;
set_bit ( CRYPT_ENCRYPT_PREPROCESS , & cc - > cipher_flags ) ;
} else if ( strcmp ( ivmode , " lmk " ) = = 0 ) {
2017-03-16 17:39:39 +03:00
cc - > iv_gen_ops = & crypt_iv_lmk_ops ;
/*
* Version 2 and 3 is recognised according
* to length of provided multi - key string .
* If present ( version 3 ) , last key is used as IV seed .
* All keys ( including IV seed ) are always the same size .
*/
if ( cc - > key_size % cc - > key_parts ) {
cc - > key_parts + + ;
cc - > key_extra_size = cc - > key_size / cc - > key_parts ;
}
} else if ( strcmp ( ivmode , " tcw " ) = = 0 ) {
cc - > iv_gen_ops = & crypt_iv_tcw_ops ;
cc - > key_parts + = 2 ; /* IV + whitening */
cc - > key_extra_size = cc - > iv_size + TCW_WHITENING_SIZE ;
} else if ( strcmp ( ivmode , " random " ) = = 0 ) {
cc - > iv_gen_ops = & crypt_iv_random_ops ;
/* Need storage space in integrity fields. */
cc - > integrity_iv_size = cc - > iv_size ;
} else {
ti - > error = " Invalid IV mode " ;
return - EINVAL ;
}
return 0 ;
}
2017-03-16 17:39:40 +03:00
/*
* Workaround to parse HMAC algorithm from AEAD crypto API spec .
* The HMAC is needed to calculate tag size ( HMAC digest size ) .
* This should be probably done by crypto - api calls ( once available . . . )
*/
static int crypt_ctr_auth_cipher ( struct crypt_config * cc , char * cipher_api )
{
char * start , * end , * mac_alg = NULL ;
struct crypto_ahash * mac ;
if ( ! strstarts ( cipher_api , " authenc( " ) )
return 0 ;
start = strchr ( cipher_api , ' ( ' ) ;
end = strchr ( cipher_api , ' , ' ) ;
if ( ! start | | ! end | | + + start > end )
return - EINVAL ;
mac_alg = kzalloc ( end - start + 1 , GFP_KERNEL ) ;
if ( ! mac_alg )
return - ENOMEM ;
strncpy ( mac_alg , start , end - start ) ;
2020-07-10 09:20:42 +03:00
mac = crypto_alloc_ahash ( mac_alg , 0 , CRYPTO_ALG_ALLOCATES_MEMORY ) ;
2017-03-16 17:39:40 +03:00
kfree ( mac_alg ) ;
if ( IS_ERR ( mac ) )
return PTR_ERR ( mac ) ;
cc - > key_mac_size = crypto_ahash_digestsize ( mac ) ;
crypto_free_ahash ( mac ) ;
cc - > authenc_key = kmalloc ( crypt_authenckey_size ( cc ) , GFP_KERNEL ) ;
if ( ! cc - > authenc_key )
return - ENOMEM ;
return 0 ;
}
static int crypt_ctr_cipher_new ( struct dm_target * ti , char * cipher_in , char * key ,
char * * ivmode , char * * ivopts )
{
struct crypt_config * cc = ti - > private ;
2019-08-19 17:17:37 +03:00
char * tmp , * cipher_api , buf [ CRYPTO_MAX_ALG_NAME ] ;
2017-03-16 17:39:40 +03:00
int ret = - EINVAL ;
cc - > tfms_count = 1 ;
/*
* New format ( capi : prefix )
* capi : cipher_api_spec - iv : ivopts
*/
tmp = & cipher_in [ strlen ( " capi: " ) ] ;
2019-01-09 13:57:14 +03:00
/* Separate IV options if present, it can contain another '-' in hash name */
* ivopts = strrchr ( tmp , ' : ' ) ;
if ( * ivopts ) {
* * ivopts = ' \0 ' ;
( * ivopts ) + + ;
}
/* Parse IV mode */
* ivmode = strrchr ( tmp , ' - ' ) ;
if ( * ivmode ) {
* * ivmode = ' \0 ' ;
( * ivmode ) + + ;
}
/* The rest is crypto API spec */
cipher_api = tmp ;
2017-03-16 17:39:40 +03:00
2019-08-19 17:17:37 +03:00
/* Alloc AEAD, can be used only in new format. */
if ( crypt_integrity_aead ( cc ) ) {
ret = crypt_ctr_auth_cipher ( cc , cipher_api ) ;
if ( ret < 0 ) {
ti - > error = " Invalid AEAD cipher spec " ;
return - ENOMEM ;
}
}
2017-03-16 17:39:40 +03:00
if ( * ivmode & & ! strcmp ( * ivmode , " lmk " ) )
cc - > tfms_count = 64 ;
2019-08-19 17:17:37 +03:00
if ( * ivmode & & ! strcmp ( * ivmode , " essiv " ) ) {
if ( ! * ivopts ) {
ti - > error = " Digest algorithm missing for ESSIV mode " ;
return - EINVAL ;
}
ret = snprintf ( buf , CRYPTO_MAX_ALG_NAME , " essiv(%s,%s) " ,
cipher_api , * ivopts ) ;
if ( ret < 0 | | ret > = CRYPTO_MAX_ALG_NAME ) {
ti - > error = " Cannot allocate cipher string " ;
return - ENOMEM ;
}
cipher_api = buf ;
}
2017-03-16 17:39:40 +03:00
cc - > key_parts = cc - > tfms_count ;
/* Allocate cipher */
ret = crypt_alloc_tfms ( cc , cipher_api ) ;
if ( ret < 0 ) {
ti - > error = " Error allocating crypto tfm " ;
return ret ;
}
2019-08-19 17:17:37 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-03-16 17:39:40 +03:00
cc - > iv_size = crypto_aead_ivsize ( any_tfm_aead ( cc ) ) ;
2019-08-19 17:17:37 +03:00
else
2017-03-16 17:39:40 +03:00
cc - > iv_size = crypto_skcipher_ivsize ( any_tfm ( cc ) ) ;
return 0 ;
}
static int crypt_ctr_cipher_old ( struct dm_target * ti , char * cipher_in , char * key ,
char * * ivmode , char * * ivopts )
2005-04-17 02:20:36 +04:00
{
2010-08-12 07:14:07 +04:00
struct crypt_config * cc = ti - > private ;
2017-03-16 17:39:40 +03:00
char * tmp , * cipher , * chainmode , * keycount ;
2010-08-12 07:14:07 +04:00
char * cipher_api = NULL ;
2012-07-27 18:08:05 +04:00
int ret = - EINVAL ;
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-28 21:41:26 +04:00
char dummy ;
2005-04-17 02:20:36 +04:00
2017-03-16 17:39:40 +03:00
if ( strchr ( cipher_in , ' ( ' ) | | crypt_integrity_aead ( cc ) ) {
2010-08-12 07:14:07 +04:00
ti - > error = " Bad cipher specification " ;
2005-04-17 02:20:36 +04:00
return - EINVAL ;
}
2010-08-12 07:14:07 +04:00
/*
* Legacy dm - crypt cipher specification
2011-01-13 22:59:54 +03:00
* cipher [ : keycount ] - mode - iv : ivopts
2010-08-12 07:14:07 +04:00
*/
tmp = cipher_in ;
2011-01-13 22:59:54 +03:00
keycount = strsep ( & tmp , " - " ) ;
cipher = strsep ( & keycount , " : " ) ;
if ( ! keycount )
cc - > tfms_count = 1 ;
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-28 21:41:26 +04:00
else if ( sscanf ( keycount , " %u%c " , & cc - > tfms_count , & dummy ) ! = 1 | |
2011-01-13 22:59:54 +03:00
! is_power_of_2 ( cc - > tfms_count ) ) {
ti - > error = " Bad cipher key count specification " ;
return - EINVAL ;
}
cc - > key_parts = cc - > tfms_count ;
2010-08-12 07:14:07 +04:00
2005-04-17 02:20:36 +04:00
chainmode = strsep ( & tmp , " - " ) ;
2019-01-09 13:57:14 +03:00
* ivmode = strsep ( & tmp , " : " ) ;
* ivopts = tmp ;
2005-04-17 02:20:36 +04:00
2011-01-13 22:59:52 +03:00
/*
* For compatibility with the original dm - crypt mapping format , if
* only the cipher name is supplied , use cbc - plain .
*/
2017-03-16 17:39:40 +03:00
if ( ! chainmode | | ( ! strcmp ( chainmode , " plain " ) & & ! * ivmode ) ) {
2005-04-17 02:20:36 +04:00
chainmode = " cbc " ;
2017-03-16 17:39:40 +03:00
* ivmode = " plain " ;
2005-04-17 02:20:36 +04:00
}
2017-03-16 17:39:40 +03:00
if ( strcmp ( chainmode , " ecb " ) & & ! * ivmode ) {
2010-08-12 07:14:07 +04:00
ti - > error = " IV mechanism required " ;
return - EINVAL ;
2005-04-17 02:20:36 +04:00
}
2010-08-12 07:14:07 +04:00
cipher_api = kmalloc ( CRYPTO_MAX_ALG_NAME , GFP_KERNEL ) ;
if ( ! cipher_api )
goto bad_mem ;
2019-08-19 17:17:37 +03:00
if ( * ivmode & & ! strcmp ( * ivmode , " essiv " ) ) {
if ( ! * ivopts ) {
ti - > error = " Digest algorithm missing for ESSIV mode " ;
kfree ( cipher_api ) ;
return - EINVAL ;
}
ret = snprintf ( cipher_api , CRYPTO_MAX_ALG_NAME ,
" essiv(%s(%s),%s) " , chainmode , cipher , * ivopts ) ;
} else {
ret = snprintf ( cipher_api , CRYPTO_MAX_ALG_NAME ,
" %s(%s) " , chainmode , cipher ) ;
}
if ( ret < 0 | | ret > = CRYPTO_MAX_ALG_NAME ) {
2010-08-12 07:14:07 +04:00
kfree ( cipher_api ) ;
goto bad_mem ;
2005-04-17 02:20:36 +04:00
}
2010-08-12 07:14:07 +04:00
/* Allocate cipher */
2012-07-27 18:08:05 +04:00
ret = crypt_alloc_tfms ( cc , cipher_api ) ;
if ( ret < 0 ) {
ti - > error = " Error allocating crypto tfm " ;
2017-03-16 17:39:40 +03:00
kfree ( cipher_api ) ;
return ret ;
2005-04-17 02:20:36 +04:00
}
2017-09-27 15:28:57 +03:00
kfree ( cipher_api ) ;
2005-04-17 02:20:36 +04:00
2017-03-16 17:39:40 +03:00
return 0 ;
bad_mem :
ti - > error = " Cannot allocate cipher strings " ;
return - ENOMEM ;
}
2010-08-12 07:14:07 +04:00
2017-03-16 17:39:40 +03:00
static int crypt_ctr_cipher ( struct dm_target * ti , char * cipher_in , char * key )
{
struct crypt_config * cc = ti - > private ;
char * ivmode = NULL , * ivopts = NULL ;
int ret ;
cc - > cipher_string = kstrdup ( cipher_in , GFP_KERNEL ) ;
if ( ! cc - > cipher_string ) {
ti - > error = " Cannot allocate cipher strings " ;
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
}
2017-03-16 17:39:40 +03:00
if ( strstarts ( cipher_in , " capi: " ) )
ret = crypt_ctr_cipher_new ( ti , cipher_in , key , & ivmode , & ivopts ) ;
else
ret = crypt_ctr_cipher_old ( ti , cipher_in , key , & ivmode , & ivopts ) ;
if ( ret )
return ret ;
2010-08-12 07:14:07 +04:00
/* Initialize IV */
2017-03-16 17:39:39 +03:00
ret = crypt_ctr_ivmode ( ti , ivmode ) ;
if ( ret < 0 )
2017-03-16 17:39:40 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
2013-10-29 02:21:03 +04:00
/* Initialize and set key */
ret = crypt_set_key ( cc , key ) ;
if ( ret < 0 ) {
ti - > error = " Error decoding and setting key " ;
2017-03-16 17:39:40 +03:00
return ret ;
2013-10-29 02:21:03 +04:00
}
2010-08-12 07:14:06 +04:00
/* Allocate IV */
if ( cc - > iv_gen_ops & & cc - > iv_gen_ops - > ctr ) {
ret = cc - > iv_gen_ops - > ctr ( cc , ti , ivopts ) ;
if ( ret < 0 ) {
ti - > error = " Error creating IV " ;
2017-03-16 17:39:40 +03:00
return ret ;
2010-08-12 07:14:06 +04:00
}
}
2005-04-17 02:20:36 +04:00
2010-08-12 07:14:06 +04:00
/* Initialize IV (set keys for ESSIV etc) */
if ( cc - > iv_gen_ops & & cc - > iv_gen_ops - > init ) {
ret = cc - > iv_gen_ops - > init ( cc ) ;
if ( ret < 0 ) {
ti - > error = " Error initialising IV " ;
2017-03-16 17:39:40 +03:00
return ret ;
2010-08-12 07:14:06 +04:00
}
2009-12-11 02:51:56 +03:00
}
2018-01-12 18:30:32 +03:00
/* wipe the kernel key payload copy */
if ( cc - > key_string )
memset ( cc - > key , 0 , cc - > key_size * sizeof ( u8 ) ) ;
2010-08-12 07:14:07 +04:00
return ret ;
}
2017-01-04 22:23:54 +03:00
static int crypt_ctr_optional ( struct dm_target * ti , unsigned int argc , char * * argv )
{
struct crypt_config * cc = ti - > private ;
struct dm_arg_set as ;
2017-06-22 21:32:45 +03:00
static const struct dm_arg _args [ ] = {
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
{ 0 , 8 , " Invalid number of feature args " } ,
2017-01-04 22:23:54 +03:00
} ;
unsigned int opt_params , val ;
const char * opt_string , * sval ;
2017-03-16 17:39:44 +03:00
char dummy ;
2017-01-04 22:23:54 +03:00
int ret ;
/* Optional parameters */
as . argc = argc ;
as . argv = argv ;
ret = dm_read_arg_group ( _args , & as , & opt_params , & ti - > error ) ;
if ( ret )
return ret ;
while ( opt_params - - ) {
opt_string = dm_shift_arg ( & as ) ;
if ( ! opt_string ) {
ti - > error = " Not enough feature arguments " ;
return - EINVAL ;
}
if ( ! strcasecmp ( opt_string , " allow_discards " ) )
ti - > num_discard_bios = 1 ;
else if ( ! strcasecmp ( opt_string , " same_cpu_crypt " ) )
set_bit ( DM_CRYPT_SAME_CPU , & cc - > flags ) ;
else if ( ! strcasecmp ( opt_string , " submit_from_crypt_cpus " ) )
set_bit ( DM_CRYPT_NO_OFFLOAD , & cc - > flags ) ;
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
else if ( ! strcasecmp ( opt_string , " no_read_workqueue " ) )
set_bit ( DM_CRYPT_NO_READ_WORKQUEUE , & cc - > flags ) ;
else if ( ! strcasecmp ( opt_string , " no_write_workqueue " ) )
set_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) ;
2017-01-04 22:23:54 +03:00
else if ( sscanf ( opt_string , " integrity:%u: " , & val ) = = 1 ) {
if ( val = = 0 | | val > MAX_TAG_SIZE ) {
ti - > error = " Invalid integrity arguments " ;
return - EINVAL ;
}
cc - > on_disk_tag_size = val ;
sval = strchr ( opt_string + strlen ( " integrity: " ) , ' : ' ) + 1 ;
if ( ! strcasecmp ( sval , " aead " ) ) {
set_bit ( CRYPT_MODE_INTEGRITY_AEAD , & cc - > cipher_flags ) ;
} else if ( strcasecmp ( sval , " none " ) ) {
ti - > error = " Unknown integrity profile " ;
return - EINVAL ;
}
cc - > cipher_auth = kstrdup ( sval , GFP_KERNEL ) ;
if ( ! cc - > cipher_auth )
return - ENOMEM ;
2017-03-23 17:23:14 +03:00
} else if ( sscanf ( opt_string , " sector_size:%hu%c " , & cc - > sector_size , & dummy ) = = 1 ) {
2017-03-16 17:39:44 +03:00
if ( cc - > sector_size < ( 1 < < SECTOR_SHIFT ) | |
cc - > sector_size > 4096 | |
2017-03-23 17:23:14 +03:00
( cc - > sector_size & ( cc - > sector_size - 1 ) ) ) {
2017-03-16 17:39:44 +03:00
ti - > error = " Invalid feature value for sector_size " ;
return - EINVAL ;
}
2017-09-13 16:45:56 +03:00
if ( ti - > len & ( ( cc - > sector_size > > SECTOR_SHIFT ) - 1 ) ) {
ti - > error = " Device size is not multiple of sector_size feature " ;
return - EINVAL ;
}
2017-03-23 17:23:14 +03:00
cc - > sector_shift = __ffs ( cc - > sector_size ) - SECTOR_SHIFT ;
2017-03-16 17:39:44 +03:00
} else if ( ! strcasecmp ( opt_string , " iv_large_sectors " ) )
set_bit ( CRYPT_IV_LARGE_SECTORS , & cc - > cipher_flags ) ;
else {
2017-01-04 22:23:54 +03:00
ti - > error = " Invalid feature arguments " ;
return - EINVAL ;
}
}
return 0 ;
2010-08-12 07:14:07 +04:00
}
2020-07-08 12:28:08 +03:00
# ifdef CONFIG_BLK_DEV_ZONED
static int crypt_report_zones ( struct dm_target * ti ,
struct dm_report_zones_args * args , unsigned int nr_zones )
{
struct crypt_config * cc = ti - > private ;
sector_t sector = cc - > start + dm_target_offset ( ti , args - > next_sector ) ;
args - > start = cc - > start ;
return blkdev_report_zones ( cc - > dev - > bdev , sector , nr_zones ,
dm_report_zones_cb , args ) ;
}
# endif
2010-08-12 07:14:07 +04:00
/*
* Construct an encryption mapping :
2016-11-21 17:58:51 +03:00
* < cipher > [ < key > | : < key_size > : < user | logon > : < key_description > ] < iv_offset > < dev_path > < start >
2010-08-12 07:14:07 +04:00
*/
static int crypt_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
struct crypt_config * cc ;
2018-10-09 23:13:43 +03:00
const char * devname = dm_table_device_name ( ti - > table ) ;
2016-11-21 17:58:51 +03:00
int key_size ;
2017-01-04 22:23:54 +03:00
unsigned int align_mask ;
2010-08-12 07:14:07 +04:00
unsigned long long tmpll ;
int ret ;
2017-01-04 22:23:54 +03:00
size_t iv_size_padding , additional_req_size ;
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-28 21:41:26 +04:00
char dummy ;
2011-08-02 15:32:08 +04:00
if ( argc < 5 ) {
2010-08-12 07:14:07 +04:00
ti - > error = " Not enough arguments " ;
return - EINVAL ;
2005-04-17 02:20:36 +04:00
}
2016-11-21 17:58:51 +03:00
key_size = get_key_size ( & argv [ 1 ] ) ;
if ( key_size < 0 ) {
ti - > error = " Cannot parse key size " ;
return - EINVAL ;
}
2010-08-12 07:14:07 +04:00
2019-06-12 09:14:45 +03:00
cc = kzalloc ( struct_size ( cc , key , key_size ) , GFP_KERNEL ) ;
2010-08-12 07:14:07 +04:00
if ( ! cc ) {
ti - > error = " Cannot allocate encryption context " ;
return - ENOMEM ;
}
2011-01-13 22:59:49 +03:00
cc - > key_size = key_size ;
2017-03-16 17:39:44 +03:00
cc - > sector_size = ( 1 < < SECTOR_SHIFT ) ;
2017-03-23 17:23:14 +03:00
cc - > sector_shift = 0 ;
2010-08-12 07:14:07 +04:00
ti - > private = cc ;
2017-01-04 22:23:54 +03:00
2017-08-14 05:45:08 +03:00
spin_lock ( & dm_crypt_clients_lock ) ;
dm_crypt_clients_n + + ;
crypt_calculate_pages_per_client ( ) ;
spin_unlock ( & dm_crypt_clients_lock ) ;
ret = percpu_counter_init ( & cc - > n_allocated_pages , 0 , GFP_KERNEL ) ;
if ( ret < 0 )
goto bad ;
2017-01-04 22:23:54 +03:00
/* Optional parameters need to be read before cipher constructor */
if ( argc > 5 ) {
ret = crypt_ctr_optional ( ti , argc - 5 , & argv [ 5 ] ) ;
if ( ret )
goto bad ;
}
2010-08-12 07:14:07 +04:00
ret = crypt_ctr_cipher ( ti , argv [ 0 ] , argv [ 1 ] ) ;
if ( ret < 0 )
goto bad ;
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) ) {
2017-01-04 22:23:54 +03:00
cc - > dmreq_start = sizeof ( struct aead_request ) ;
cc - > dmreq_start + = crypto_aead_reqsize ( any_tfm_aead ( cc ) ) ;
align_mask = crypto_aead_alignmask ( any_tfm_aead ( cc ) ) ;
} else {
cc - > dmreq_start = sizeof ( struct skcipher_request ) ;
cc - > dmreq_start + = crypto_skcipher_reqsize ( any_tfm ( cc ) ) ;
align_mask = crypto_skcipher_alignmask ( any_tfm ( cc ) ) ;
}
dm crypt: fix access beyond the end of allocated space
The DM crypt target accesses memory beyond allocated space resulting in
a crash on 32 bit x86 systems.
This bug is very old (it dates back to 2.6.25 commit 3a7f6c990ad04 "dm
crypt: use async crypto"). However, this bug was masked by the fact
that kmalloc rounds the size up to the next power of two. This bug
wasn't exposed until 3.17-rc1 commit 298a9fa08a ("dm crypt: use per-bio
data"). By switching to using per-bio data there was no longer any
padding beyond the end of a dm-crypt allocated memory block.
To minimize allocation overhead dm-crypt puts several structures into one
block allocated with kmalloc. The block holds struct ablkcipher_request,
cipher-specific scratch pad (crypto_ablkcipher_reqsize(any_tfm(cc))),
struct dm_crypt_request and an initialization vector.
The variable dmreq_start is set to offset of struct dm_crypt_request
within this memory block. dm-crypt allocates the block with this size:
cc->dmreq_start + sizeof(struct dm_crypt_request) + cc->iv_size.
When accessing the initialization vector, dm-crypt uses the function
iv_of_dmreq, which performs this calculation: ALIGN((unsigned long)(dmreq
+ 1), crypto_ablkcipher_alignmask(any_tfm(cc)) + 1).
dm-crypt allocated "cc->iv_size" bytes beyond the end of dm_crypt_request
structure. However, when dm-crypt accesses the initialization vector, it
takes a pointer to the end of dm_crypt_request, aligns it, and then uses
it as the initialization vector. If the end of dm_crypt_request is not
aligned on a crypto_ablkcipher_alignmask(any_tfm(cc)) boundary the
alignment causes the initialization vector to point beyond the allocated
space.
Fix this bug by calculating the variable iv_size_padding and adding it
to the allocated size.
Also correct the alignment of dm_crypt_request. struct dm_crypt_request
is specific to dm-crypt (it isn't used by the crypto subsystem at all),
so it is aligned on __alignof__(struct dm_crypt_request).
Also align per_bio_data_size on ARCH_KMALLOC_MINALIGN, so that it is
aligned as if the block was allocated with kmalloc.
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2014-08-28 19:09:31 +04:00
cc - > dmreq_start = ALIGN ( cc - > dmreq_start , __alignof__ ( struct dm_crypt_request ) ) ;
2017-01-04 22:23:54 +03:00
if ( align_mask < CRYPTO_MINALIGN ) {
dm crypt: fix access beyond the end of allocated space
The DM crypt target accesses memory beyond allocated space resulting in
a crash on 32 bit x86 systems.
This bug is very old (it dates back to 2.6.25 commit 3a7f6c990ad04 "dm
crypt: use async crypto"). However, this bug was masked by the fact
that kmalloc rounds the size up to the next power of two. This bug
wasn't exposed until 3.17-rc1 commit 298a9fa08a ("dm crypt: use per-bio
data"). By switching to using per-bio data there was no longer any
padding beyond the end of a dm-crypt allocated memory block.
To minimize allocation overhead dm-crypt puts several structures into one
block allocated with kmalloc. The block holds struct ablkcipher_request,
cipher-specific scratch pad (crypto_ablkcipher_reqsize(any_tfm(cc))),
struct dm_crypt_request and an initialization vector.
The variable dmreq_start is set to offset of struct dm_crypt_request
within this memory block. dm-crypt allocates the block with this size:
cc->dmreq_start + sizeof(struct dm_crypt_request) + cc->iv_size.
When accessing the initialization vector, dm-crypt uses the function
iv_of_dmreq, which performs this calculation: ALIGN((unsigned long)(dmreq
+ 1), crypto_ablkcipher_alignmask(any_tfm(cc)) + 1).
dm-crypt allocated "cc->iv_size" bytes beyond the end of dm_crypt_request
structure. However, when dm-crypt accesses the initialization vector, it
takes a pointer to the end of dm_crypt_request, aligns it, and then uses
it as the initialization vector. If the end of dm_crypt_request is not
aligned on a crypto_ablkcipher_alignmask(any_tfm(cc)) boundary the
alignment causes the initialization vector to point beyond the allocated
space.
Fix this bug by calculating the variable iv_size_padding and adding it
to the allocated size.
Also correct the alignment of dm_crypt_request. struct dm_crypt_request
is specific to dm-crypt (it isn't used by the crypto subsystem at all),
so it is aligned on __alignof__(struct dm_crypt_request).
Also align per_bio_data_size on ARCH_KMALLOC_MINALIGN, so that it is
aligned as if the block was allocated with kmalloc.
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2014-08-28 19:09:31 +04:00
/* Allocate the padding exactly */
iv_size_padding = - ( cc - > dmreq_start + sizeof ( struct dm_crypt_request ) )
2017-01-04 22:23:54 +03:00
& align_mask ;
dm crypt: fix access beyond the end of allocated space
The DM crypt target accesses memory beyond allocated space resulting in
a crash on 32 bit x86 systems.
This bug is very old (it dates back to 2.6.25 commit 3a7f6c990ad04 "dm
crypt: use async crypto"). However, this bug was masked by the fact
that kmalloc rounds the size up to the next power of two. This bug
wasn't exposed until 3.17-rc1 commit 298a9fa08a ("dm crypt: use per-bio
data"). By switching to using per-bio data there was no longer any
padding beyond the end of a dm-crypt allocated memory block.
To minimize allocation overhead dm-crypt puts several structures into one
block allocated with kmalloc. The block holds struct ablkcipher_request,
cipher-specific scratch pad (crypto_ablkcipher_reqsize(any_tfm(cc))),
struct dm_crypt_request and an initialization vector.
The variable dmreq_start is set to offset of struct dm_crypt_request
within this memory block. dm-crypt allocates the block with this size:
cc->dmreq_start + sizeof(struct dm_crypt_request) + cc->iv_size.
When accessing the initialization vector, dm-crypt uses the function
iv_of_dmreq, which performs this calculation: ALIGN((unsigned long)(dmreq
+ 1), crypto_ablkcipher_alignmask(any_tfm(cc)) + 1).
dm-crypt allocated "cc->iv_size" bytes beyond the end of dm_crypt_request
structure. However, when dm-crypt accesses the initialization vector, it
takes a pointer to the end of dm_crypt_request, aligns it, and then uses
it as the initialization vector. If the end of dm_crypt_request is not
aligned on a crypto_ablkcipher_alignmask(any_tfm(cc)) boundary the
alignment causes the initialization vector to point beyond the allocated
space.
Fix this bug by calculating the variable iv_size_padding and adding it
to the allocated size.
Also correct the alignment of dm_crypt_request. struct dm_crypt_request
is specific to dm-crypt (it isn't used by the crypto subsystem at all),
so it is aligned on __alignof__(struct dm_crypt_request).
Also align per_bio_data_size on ARCH_KMALLOC_MINALIGN, so that it is
aligned as if the block was allocated with kmalloc.
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2014-08-28 19:09:31 +04:00
} else {
/*
* If the cipher requires greater alignment than kmalloc
* alignment , we don ' t know the exact position of the
* initialization vector . We must assume worst case .
*/
2017-01-04 22:23:54 +03:00
iv_size_padding = align_mask ;
dm crypt: fix access beyond the end of allocated space
The DM crypt target accesses memory beyond allocated space resulting in
a crash on 32 bit x86 systems.
This bug is very old (it dates back to 2.6.25 commit 3a7f6c990ad04 "dm
crypt: use async crypto"). However, this bug was masked by the fact
that kmalloc rounds the size up to the next power of two. This bug
wasn't exposed until 3.17-rc1 commit 298a9fa08a ("dm crypt: use per-bio
data"). By switching to using per-bio data there was no longer any
padding beyond the end of a dm-crypt allocated memory block.
To minimize allocation overhead dm-crypt puts several structures into one
block allocated with kmalloc. The block holds struct ablkcipher_request,
cipher-specific scratch pad (crypto_ablkcipher_reqsize(any_tfm(cc))),
struct dm_crypt_request and an initialization vector.
The variable dmreq_start is set to offset of struct dm_crypt_request
within this memory block. dm-crypt allocates the block with this size:
cc->dmreq_start + sizeof(struct dm_crypt_request) + cc->iv_size.
When accessing the initialization vector, dm-crypt uses the function
iv_of_dmreq, which performs this calculation: ALIGN((unsigned long)(dmreq
+ 1), crypto_ablkcipher_alignmask(any_tfm(cc)) + 1).
dm-crypt allocated "cc->iv_size" bytes beyond the end of dm_crypt_request
structure. However, when dm-crypt accesses the initialization vector, it
takes a pointer to the end of dm_crypt_request, aligns it, and then uses
it as the initialization vector. If the end of dm_crypt_request is not
aligned on a crypto_ablkcipher_alignmask(any_tfm(cc)) boundary the
alignment causes the initialization vector to point beyond the allocated
space.
Fix this bug by calculating the variable iv_size_padding and adding it
to the allocated size.
Also correct the alignment of dm_crypt_request. struct dm_crypt_request
is specific to dm-crypt (it isn't used by the crypto subsystem at all),
so it is aligned on __alignof__(struct dm_crypt_request).
Also align per_bio_data_size on ARCH_KMALLOC_MINALIGN, so that it is
aligned as if the block was allocated with kmalloc.
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2014-08-28 19:09:31 +04:00
}
2008-02-08 05:11:07 +03:00
2017-01-04 22:23:54 +03:00
/* ...| IV + padding | original IV | original sec. number | bio tag offset | */
additional_req_size = sizeof ( struct dm_crypt_request ) +
iv_size_padding + cc - > iv_size +
cc - > iv_size +
sizeof ( uint64_t ) +
sizeof ( unsigned int ) ;
2018-05-21 01:25:53 +03:00
ret = mempool_init_kmalloc_pool ( & cc - > req_pool , MIN_IOS , cc - > dmreq_start + additional_req_size ) ;
if ( ret ) {
2008-02-08 05:11:07 +03:00
ti - > error = " Cannot allocate crypt request mempool " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2008-02-08 05:11:07 +03:00
}
2016-01-31 21:28:26 +03:00
cc - > per_bio_data_size = ti - > per_io_data_size =
2017-01-04 22:23:54 +03:00
ALIGN ( sizeof ( struct dm_crypt_io ) + cc - > dmreq_start + additional_req_size ,
dm crypt: fix access beyond the end of allocated space
The DM crypt target accesses memory beyond allocated space resulting in
a crash on 32 bit x86 systems.
This bug is very old (it dates back to 2.6.25 commit 3a7f6c990ad04 "dm
crypt: use async crypto"). However, this bug was masked by the fact
that kmalloc rounds the size up to the next power of two. This bug
wasn't exposed until 3.17-rc1 commit 298a9fa08a ("dm crypt: use per-bio
data"). By switching to using per-bio data there was no longer any
padding beyond the end of a dm-crypt allocated memory block.
To minimize allocation overhead dm-crypt puts several structures into one
block allocated with kmalloc. The block holds struct ablkcipher_request,
cipher-specific scratch pad (crypto_ablkcipher_reqsize(any_tfm(cc))),
struct dm_crypt_request and an initialization vector.
The variable dmreq_start is set to offset of struct dm_crypt_request
within this memory block. dm-crypt allocates the block with this size:
cc->dmreq_start + sizeof(struct dm_crypt_request) + cc->iv_size.
When accessing the initialization vector, dm-crypt uses the function
iv_of_dmreq, which performs this calculation: ALIGN((unsigned long)(dmreq
+ 1), crypto_ablkcipher_alignmask(any_tfm(cc)) + 1).
dm-crypt allocated "cc->iv_size" bytes beyond the end of dm_crypt_request
structure. However, when dm-crypt accesses the initialization vector, it
takes a pointer to the end of dm_crypt_request, aligns it, and then uses
it as the initialization vector. If the end of dm_crypt_request is not
aligned on a crypto_ablkcipher_alignmask(any_tfm(cc)) boundary the
alignment causes the initialization vector to point beyond the allocated
space.
Fix this bug by calculating the variable iv_size_padding and adding it
to the allocated size.
Also correct the alignment of dm_crypt_request. struct dm_crypt_request
is specific to dm-crypt (it isn't used by the crypto subsystem at all),
so it is aligned on __alignof__(struct dm_crypt_request).
Also align per_bio_data_size on ARCH_KMALLOC_MINALIGN, so that it is
aligned as if the block was allocated with kmalloc.
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2014-08-28 19:09:31 +04:00
ARCH_KMALLOC_MINALIGN ) ;
2014-03-28 23:51:55 +04:00
2018-05-21 01:25:53 +03:00
ret = mempool_init ( & cc - > page_pool , BIO_MAX_PAGES , crypt_page_alloc , crypt_page_free , cc ) ;
if ( ret ) {
2006-06-26 11:27:35 +04:00
ti - > error = " Cannot allocate page mempool " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2005-04-17 02:20:36 +04:00
}
2018-05-21 01:25:53 +03:00
ret = bioset_init ( & cc - > bs , MIN_IOS , 0 , BIOSET_NEED_BVECS ) ;
if ( ret ) {
2006-10-03 12:15:40 +04:00
ti - > error = " Cannot allocate crypt bioset " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2006-10-03 12:15:40 +04:00
}
2015-02-13 16:24:41 +03:00
mutex_init ( & cc - > bio_alloc_lock ) ;
2010-08-12 07:14:06 +04:00
ret = - EINVAL ;
2017-03-16 17:39:44 +03:00
if ( ( sscanf ( argv [ 2 ] , " %llu%c " , & tmpll , & dummy ) ! = 1 ) | |
( tmpll & ( ( cc - > sector_size > > SECTOR_SHIFT ) - 1 ) ) ) {
2006-06-26 11:27:35 +04:00
ti - > error = " Invalid iv_offset sector " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2005-04-17 02:20:36 +04:00
}
2006-03-27 13:17:48 +04:00
cc - > iv_offset = tmpll ;
2005-04-17 02:20:36 +04:00
2015-07-31 16:20:36 +03:00
ret = dm_get_device ( ti , argv [ 3 ] , dm_table_get_mode ( ti - > table ) , & cc - > dev ) ;
if ( ret ) {
2010-08-12 07:14:06 +04:00
ti - > error = " Device lookup failed " ;
goto bad ;
}
2015-07-31 16:20:36 +03:00
ret = - EINVAL ;
2018-11-08 00:24:55 +03:00
if ( sscanf ( argv [ 4 ] , " %llu%c " , & tmpll , & dummy ) ! = 1 | | tmpll ! = ( sector_t ) tmpll ) {
2006-06-26 11:27:35 +04:00
ti - > error = " Invalid device sector " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2005-04-17 02:20:36 +04:00
}
2006-03-27 13:17:48 +04:00
cc - > start = tmpll ;
2005-04-17 02:20:36 +04:00
2020-07-08 12:28:08 +03:00
/*
* For zoned block devices , we need to preserve the issuer write
* ordering . To do so , disable write workqueues and force inline
* encryption completion .
*/
if ( bdev_is_zoned ( cc - > dev - > bdev ) ) {
set_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) ;
set_bit ( DM_CRYPT_WRITE_INLINE , & cc - > flags ) ;
}
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) | | cc - > integrity_iv_size ) {
2017-01-04 22:23:54 +03:00
ret = crypt_integrity_ctr ( cc , ti ) ;
2011-08-02 15:32:08 +04:00
if ( ret )
goto bad ;
2017-01-04 22:23:54 +03:00
cc - > tag_pool_max_sectors = POOL_ENTRY_SIZE / cc - > on_disk_tag_size ;
if ( ! cc - > tag_pool_max_sectors )
cc - > tag_pool_max_sectors = 1 ;
2015-02-13 16:23:09 +03:00
2018-05-21 01:25:53 +03:00
ret = mempool_init_kmalloc_pool ( & cc - > tag_pool , MIN_IOS ,
2017-01-04 22:23:54 +03:00
cc - > tag_pool_max_sectors * cc - > on_disk_tag_size ) ;
2018-05-21 01:25:53 +03:00
if ( ret ) {
2017-01-04 22:23:54 +03:00
ti - > error = " Cannot allocate integrity tags mempool " ;
goto bad ;
2011-08-02 15:32:08 +04:00
}
2017-04-18 23:51:54 +03:00
cc - > tag_pool_max_sectors < < = cc - > sector_shift ;
2011-08-02 15:32:08 +04:00
}
2010-08-12 07:14:06 +04:00
ret = - ENOMEM ;
2019-11-21 01:27:39 +03:00
cc - > io_queue = alloc_workqueue ( " kcryptd_io/%s " , WQ_MEM_RECLAIM , 1 , devname ) ;
2007-10-20 01:38:58 +04:00
if ( ! cc - > io_queue ) {
ti - > error = " Couldn't create kcryptd io queue " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2007-10-20 01:38:58 +04:00
}
2015-02-13 16:23:09 +03:00
if ( test_bit ( DM_CRYPT_SAME_CPU , & cc - > flags ) )
2019-11-21 01:27:39 +03:00
cc - > crypt_queue = alloc_workqueue ( " kcryptd/%s " , WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM ,
2018-10-09 23:13:43 +03:00
1 , devname ) ;
2015-02-13 16:23:09 +03:00
else
2018-10-09 23:13:43 +03:00
cc - > crypt_queue = alloc_workqueue ( " kcryptd/%s " ,
2019-11-21 01:27:39 +03:00
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND ,
2018-10-09 23:13:43 +03:00
num_online_cpus ( ) , devname ) ;
2007-10-20 01:38:58 +04:00
if ( ! cc - > crypt_queue ) {
2007-10-20 01:38:57 +04:00
ti - > error = " Couldn't create kcryptd queue " ;
2010-08-12 07:14:06 +04:00
goto bad ;
2007-10-20 01:38:57 +04:00
}
2018-07-11 19:10:51 +03:00
spin_lock_init ( & cc - > write_thread_lock ) ;
2015-02-13 16:27:41 +03:00
cc - > write_tree = RB_ROOT ;
2015-02-13 16:25:59 +03:00
2018-10-09 23:13:43 +03:00
cc - > write_thread = kthread_create ( dmcrypt_write , cc , " dmcrypt_write/%s " , devname ) ;
2015-02-13 16:25:59 +03:00
if ( IS_ERR ( cc - > write_thread ) ) {
ret = PTR_ERR ( cc - > write_thread ) ;
cc - > write_thread = NULL ;
ti - > error = " Couldn't spawn write thread " ;
goto bad ;
}
wake_up_process ( cc - > write_thread ) ;
2013-03-02 02:45:47 +04:00
ti - > num_flush_bios = 1 ;
2011-09-26 02:26:21 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
2010-08-12 07:14:06 +04:00
bad :
crypt_dtr ( ti ) ;
return ret ;
2005-04-17 02:20:36 +04:00
}
2012-12-22 00:23:41 +04:00
static int crypt_map ( struct dm_target * ti , struct bio * bio )
2005-04-17 02:20:36 +04:00
{
2007-07-12 20:26:32 +04:00
struct dm_crypt_io * io ;
2012-07-27 18:08:05 +04:00
struct crypt_config * cc = ti - > private ;
2009-06-22 13:12:23 +04:00
2011-08-02 15:32:08 +04:00
/*
2016-06-05 22:32:25 +03:00
* If bio is REQ_PREFLUSH or REQ_OP_DISCARD , just bypass crypt queues .
* - for REQ_PREFLUSH device - mapper core ensures that no IO is in - flight
2016-06-05 22:32:04 +03:00
* - for REQ_OP_DISCARD caller must use flush if IO ordering matters
2011-08-02 15:32:08 +04:00
*/
2016-08-06 00:35:16 +03:00
if ( unlikely ( bio - > bi_opf & REQ_PREFLUSH | |
2016-06-05 22:32:25 +03:00
bio_op ( bio ) = = REQ_OP_DISCARD ) ) {
2017-08-23 20:10:32 +03:00
bio_set_dev ( bio , cc - > dev - > bdev ) ;
2011-08-02 15:32:08 +04:00
if ( bio_sectors ( bio ) )
2013-10-12 02:44:27 +04:00
bio - > bi_iter . bi_sector = cc - > start +
dm_target_offset ( ti , bio - > bi_iter . bi_sector ) ;
2009-06-22 13:12:23 +04:00
return DM_MAPIO_REMAPPED ;
}
2005-04-17 02:20:36 +04:00
2016-08-30 23:38:42 +03:00
/*
* Check if bio is too large , split as needed .
*/
if ( unlikely ( bio - > bi_iter . bi_size > ( BIO_MAX_PAGES < < PAGE_SHIFT ) ) & &
2017-01-04 22:23:54 +03:00
( bio_data_dir ( bio ) = = WRITE | | cc - > on_disk_tag_size ) )
2016-08-30 23:38:42 +03:00
dm_accept_partial_bio ( bio , ( ( BIO_MAX_PAGES < < PAGE_SHIFT ) > > SECTOR_SHIFT ) ) ;
2017-03-16 17:39:44 +03:00
/*
* Ensure that bio is a multiple of internal sector encryption size
* and is aligned to this size as defined in IO hints .
*/
if ( unlikely ( ( bio - > bi_iter . bi_sector & ( ( cc - > sector_size > > SECTOR_SHIFT ) - 1 ) ) ! = 0 ) )
2017-06-03 10:38:02 +03:00
return DM_MAPIO_KILL ;
2017-03-16 17:39:44 +03:00
if ( unlikely ( bio - > bi_iter . bi_size & ( cc - > sector_size - 1 ) ) )
2017-06-03 10:38:02 +03:00
return DM_MAPIO_KILL ;
2017-03-16 17:39:44 +03:00
2014-03-28 23:51:55 +04:00
io = dm_per_bio_data ( bio , cc - > per_bio_data_size ) ;
crypt_io_init ( io , cc , bio , dm_target_offset ( ti , bio - > bi_iter . bi_sector ) ) ;
2017-01-04 22:23:54 +03:00
if ( cc - > on_disk_tag_size ) {
2017-04-18 23:51:54 +03:00
unsigned tag_len = cc - > on_disk_tag_size * ( bio_sectors ( bio ) > > cc - > sector_shift ) ;
2017-01-04 22:23:54 +03:00
if ( unlikely ( tag_len > KMALLOC_MAX_SIZE ) | |
2017-04-18 23:51:54 +03:00
unlikely ( ! ( io - > integrity_metadata = kmalloc ( tag_len ,
2017-01-04 22:23:54 +03:00
GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN ) ) ) ) {
if ( bio_sectors ( bio ) > cc - > tag_pool_max_sectors )
dm_accept_partial_bio ( bio , cc - > tag_pool_max_sectors ) ;
2018-05-21 01:25:53 +03:00
io - > integrity_metadata = mempool_alloc ( & cc - > tag_pool , GFP_NOIO ) ;
2017-01-04 22:23:54 +03:00
io - > integrity_metadata_from_pool = true ;
}
}
2017-03-16 17:39:40 +03:00
if ( crypt_integrity_aead ( cc ) )
2017-01-04 22:23:54 +03:00
io - > ctx . r . req_aead = ( struct aead_request * ) ( io + 1 ) ;
else
io - > ctx . r . req = ( struct skcipher_request * ) ( io + 1 ) ;
2007-10-20 01:38:58 +04:00
2011-01-13 22:59:53 +03:00
if ( bio_data_dir ( io - > base_bio ) = = READ ) {
if ( kcryptd_io_read ( io , GFP_NOWAIT ) )
2015-02-13 16:25:59 +03:00
kcryptd_queue_read ( io ) ;
2011-01-13 22:59:53 +03:00
} else
2007-10-20 01:38:58 +04:00
kcryptd_queue_crypt ( io ) ;
2005-04-17 02:20:36 +04:00
2006-12-08 13:41:06 +03:00
return DM_MAPIO_SUBMITTED ;
2005-04-17 02:20:36 +04:00
}
2013-03-02 02:45:44 +04:00
static void crypt_status ( struct dm_target * ti , status_type_t type ,
unsigned status_flags , char * result , unsigned maxlen )
2005-04-17 02:20:36 +04:00
{
2010-08-12 07:14:07 +04:00
struct crypt_config * cc = ti - > private ;
2013-03-02 02:45:44 +04:00
unsigned i , sz = 0 ;
2015-02-13 16:23:09 +03:00
int num_feature_args = 0 ;
2005-04-17 02:20:36 +04:00
switch ( type ) {
case STATUSTYPE_INFO :
result [ 0 ] = ' \0 ' ;
break ;
case STATUSTYPE_TABLE :
2011-01-13 22:59:52 +03:00
DMEMIT ( " %s " , cc - > cipher_string ) ;
2005-04-17 02:20:36 +04:00
2016-11-21 17:58:51 +03:00
if ( cc - > key_size > 0 ) {
if ( cc - > key_string )
DMEMIT ( " :%u:%s " , cc - > key_size , cc - > key_string ) ;
else
for ( i = 0 ; i < cc - > key_size ; i + + )
DMEMIT ( " %02x " , cc - > key [ i ] ) ;
} else
2013-03-02 02:45:44 +04:00
DMEMIT ( " - " ) ;
2005-04-17 02:20:36 +04:00
2006-03-27 13:17:48 +04:00
DMEMIT ( " %llu %s %llu " , ( unsigned long long ) cc - > iv_offset ,
cc - > dev - > name , ( unsigned long long ) cc - > start ) ;
2011-08-02 15:32:08 +04:00
2015-02-13 16:23:09 +03:00
num_feature_args + = ! ! ti - > num_discard_bios ;
num_feature_args + = test_bit ( DM_CRYPT_SAME_CPU , & cc - > flags ) ;
2015-02-13 16:27:08 +03:00
num_feature_args + = test_bit ( DM_CRYPT_NO_OFFLOAD , & cc - > flags ) ;
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
num_feature_args + = test_bit ( DM_CRYPT_NO_READ_WORKQUEUE , & cc - > flags ) ;
num_feature_args + = test_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) ;
2017-03-23 17:23:14 +03:00
num_feature_args + = cc - > sector_size ! = ( 1 < < SECTOR_SHIFT ) ;
2017-03-16 17:39:44 +03:00
num_feature_args + = test_bit ( CRYPT_IV_LARGE_SECTORS , & cc - > cipher_flags ) ;
2017-01-04 22:23:54 +03:00
if ( cc - > on_disk_tag_size )
num_feature_args + + ;
2015-02-13 16:23:09 +03:00
if ( num_feature_args ) {
DMEMIT ( " %d " , num_feature_args ) ;
if ( ti - > num_discard_bios )
DMEMIT ( " allow_discards " ) ;
if ( test_bit ( DM_CRYPT_SAME_CPU , & cc - > flags ) )
DMEMIT ( " same_cpu_crypt " ) ;
2015-02-13 16:27:08 +03:00
if ( test_bit ( DM_CRYPT_NO_OFFLOAD , & cc - > flags ) )
DMEMIT ( " submit_from_crypt_cpus " ) ;
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
if ( test_bit ( DM_CRYPT_NO_READ_WORKQUEUE , & cc - > flags ) )
DMEMIT ( " no_read_workqueue " ) ;
if ( test_bit ( DM_CRYPT_NO_WRITE_WORKQUEUE , & cc - > flags ) )
DMEMIT ( " no_write_workqueue " ) ;
2017-01-04 22:23:54 +03:00
if ( cc - > on_disk_tag_size )
DMEMIT ( " integrity:%u:%s " , cc - > on_disk_tag_size , cc - > cipher_auth ) ;
2017-03-16 17:39:44 +03:00
if ( cc - > sector_size ! = ( 1 < < SECTOR_SHIFT ) )
DMEMIT ( " sector_size:%d " , cc - > sector_size ) ;
if ( test_bit ( CRYPT_IV_LARGE_SECTORS , & cc - > cipher_flags ) )
DMEMIT ( " iv_large_sectors " ) ;
2015-02-13 16:23:09 +03:00
}
2011-08-02 15:32:08 +04:00
2005-04-17 02:20:36 +04:00
break ;
}
}
2006-10-03 12:15:37 +04:00
static void crypt_postsuspend ( struct dm_target * ti )
{
struct crypt_config * cc = ti - > private ;
set_bit ( DM_CRYPT_SUSPENDED , & cc - > flags ) ;
}
static int crypt_preresume ( struct dm_target * ti )
{
struct crypt_config * cc = ti - > private ;
if ( ! test_bit ( DM_CRYPT_KEY_VALID , & cc - > flags ) ) {
DMERR ( " aborting resume - crypt key is not set. " ) ;
return - EAGAIN ;
}
return 0 ;
}
static void crypt_resume ( struct dm_target * ti )
{
struct crypt_config * cc = ti - > private ;
clear_bit ( DM_CRYPT_SUSPENDED , & cc - > flags ) ;
}
/* Message interface
* key set < key >
* key wipe
*/
2018-02-28 23:59:59 +03:00
static int crypt_message ( struct dm_target * ti , unsigned argc , char * * argv ,
char * result , unsigned maxlen )
2006-10-03 12:15:37 +04:00
{
struct crypt_config * cc = ti - > private ;
2016-11-21 17:58:51 +03:00
int key_size , ret = - EINVAL ;
2006-10-03 12:15:37 +04:00
if ( argc < 2 )
goto error ;
2011-08-02 15:32:04 +04:00
if ( ! strcasecmp ( argv [ 0 ] , " key " ) ) {
2006-10-03 12:15:37 +04:00
if ( ! test_bit ( DM_CRYPT_SUSPENDED , & cc - > flags ) ) {
DMWARN ( " not suspended during key manipulation. " ) ;
return - EINVAL ;
}
2011-08-02 15:32:04 +04:00
if ( argc = = 3 & & ! strcasecmp ( argv [ 1 ] , " set " ) ) {
2016-11-21 17:58:51 +03:00
/* The key size may not be changed. */
key_size = get_key_size ( & argv [ 2 ] ) ;
if ( key_size < 0 | | cc - > key_size ! = key_size ) {
memset ( argv [ 2 ] , ' 0 ' , strlen ( argv [ 2 ] ) ) ;
return - EINVAL ;
}
2009-12-11 02:51:57 +03:00
ret = crypt_set_key ( cc , argv [ 2 ] ) ;
if ( ret )
return ret ;
if ( cc - > iv_gen_ops & & cc - > iv_gen_ops - > init )
ret = cc - > iv_gen_ops - > init ( cc ) ;
2018-01-12 18:30:32 +03:00
/* wipe the kernel key payload copy */
if ( cc - > key_string )
memset ( cc - > key , 0 , cc - > key_size * sizeof ( u8 ) ) ;
2009-12-11 02:51:57 +03:00
return ret ;
}
2019-07-09 16:22:12 +03:00
if ( argc = = 2 & & ! strcasecmp ( argv [ 1 ] , " wipe " ) )
2006-10-03 12:15:37 +04:00
return crypt_wipe_key ( cc ) ;
}
error :
DMWARN ( " unrecognised message received. " ) ;
return - EINVAL ;
}
2009-06-22 13:12:33 +04:00
static int crypt_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
struct crypt_config * cc = ti - > private ;
2009-07-23 23:30:42 +04:00
return fn ( ti , cc - > dev , cc - > start , ti - > len , data ) ;
2009-06-22 13:12:33 +04:00
}
2015-09-10 04:34:51 +03:00
static void crypt_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
2017-03-16 17:39:44 +03:00
struct crypt_config * cc = ti - > private ;
2015-09-10 04:34:51 +03:00
/*
* Unfortunate constraint that is required to avoid the potential
* for exceeding underlying device ' s max_segments limits - - due to
* crypt_alloc_buffer ( ) possibly allocating pages for the encryption
* bio that are not as physically contiguous as the original bio .
*/
limits - > max_segment_size = PAGE_SIZE ;
2017-03-16 17:39:44 +03:00
2018-08-10 18:23:56 +03:00
limits - > logical_block_size =
2020-06-04 22:01:26 +03:00
max_t ( unsigned , limits - > logical_block_size , cc - > sector_size ) ;
2018-08-10 18:23:56 +03:00
limits - > physical_block_size =
max_t ( unsigned , limits - > physical_block_size , cc - > sector_size ) ;
limits - > io_min = max_t ( unsigned , limits - > io_min , cc - > sector_size ) ;
2015-09-10 04:34:51 +03:00
}
2005-04-17 02:20:36 +04:00
static struct target_type crypt_target = {
. name = " crypt " ,
dm crypt: add flags to optionally bypass kcryptd workqueues
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.
Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).
This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues. In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.
These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.
To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html
[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2020-07-06 20:37:31 +03:00
. version = { 1 , 22 , 0 } ,
2005-04-17 02:20:36 +04:00
. module = THIS_MODULE ,
. ctr = crypt_ctr ,
. dtr = crypt_dtr ,
2020-07-08 12:28:08 +03:00
# ifdef CONFIG_BLK_DEV_ZONED
. features = DM_TARGET_ZONED_HM ,
. report_zones = crypt_report_zones ,
# endif
2005-04-17 02:20:36 +04:00
. map = crypt_map ,
. status = crypt_status ,
2006-10-03 12:15:37 +04:00
. postsuspend = crypt_postsuspend ,
. preresume = crypt_preresume ,
. resume = crypt_resume ,
. message = crypt_message ,
2009-06-22 13:12:33 +04:00
. iterate_devices = crypt_iterate_devices ,
2015-09-10 04:34:51 +03:00
. io_hints = crypt_io_hints ,
2005-04-17 02:20:36 +04:00
} ;
static int __init dm_crypt_init ( void )
{
int r ;
r = dm_register_target ( & crypt_target ) ;
2015-02-13 16:25:26 +03:00
if ( r < 0 )
2006-06-26 11:27:35 +04:00
DMERR ( " register failed %d " , r ) ;
2005-04-17 02:20:36 +04:00
return r ;
}
static void __exit dm_crypt_exit ( void )
{
2009-01-06 06:04:58 +03:00
dm_unregister_target ( & crypt_target ) ;
2005-04-17 02:20:36 +04:00
}
module_init ( dm_crypt_init ) ;
module_exit ( dm_crypt_exit ) ;
2014-06-24 22:27:04 +04:00
MODULE_AUTHOR ( " Jana Saout <jana@saout.de> " ) ;
2005-04-17 02:20:36 +04:00
MODULE_DESCRIPTION ( DM_NAME " target for transparent encryption / decryption " ) ;
MODULE_LICENSE ( " GPL " ) ;