- Adjust various DM structure members to improve alignment relative to
4.18 block's mempool_t and bioset changes. - Add DM writecache target that offers writeback caching to persistent memory or SSD. - Small DM core error message change to give context for why a DM table type transition wasn't allowed. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJbHsFxAAoJEMUj8QotnQNaHAgIAJPTwTOZboTzjQLrdiYEQ6q5 lk7ZJP44+VlnY+iPRzyf36JyjVgIoZ82gWMW28hJmbq1dWaVphWA9yxYemFqfkSb F7oqcWl/C2J7U8Zk5U+gJKGQXRBhhIIYO7W3KWKTfF1cSx1AcqM2Au5IPejBG/sP h42Pfil22Rfg1U3kpxU8UQHe/V9cr/3eaRu0rD477HKqob1M08jP+27jdTu1vmNH uGGDWz5Dgra2IIxx797f4gn2hHJ825dDgaFF35JkTbKRom/xk8GlREy5wxqFvkbI Ti45mMlRdBFxXkFyvToVMtbCfkcZ617hag8KV4/BZ/4zmGBLFQXddHMAgJeYChk= =KH0g -----END PGP SIGNATURE----- Merge tag 'for-4.18/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - Adjust various DM structure members to improve alignment relative to 4.18 block's mempool_t and bioset changes. - Add DM writecache target that offers writeback caching to persistent memory or SSD. - Small DM core error message change to give context for why a DM table type transition wasn't allowed. * tag 'for-4.18/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: add writecache target dm: adjust structure members to improve alignment dm: report which conflicting type caused error during table_load()
This commit is contained in:
commit
4597fcff07
68
Documentation/device-mapper/writecache.txt
Normal file
68
Documentation/device-mapper/writecache.txt
Normal file
@ -0,0 +1,68 @@
|
||||
The writecache target caches writes on persistent memory or on SSD. It
|
||||
doesn't cache reads because reads are supposed to be cached in page cache
|
||||
in normal RAM.
|
||||
|
||||
When the device is constructed, the first sector should be zeroed or the
|
||||
first sector should contain valid superblock from previous invocation.
|
||||
|
||||
Constructor parameters:
|
||||
1. type of the cache device - "p" or "s"
|
||||
p - persistent memory
|
||||
s - SSD
|
||||
2. the underlying device that will be cached
|
||||
3. the cache device
|
||||
4. block size (4096 is recommended; the maximum block size is the page
|
||||
size)
|
||||
5. the number of optional parameters (the parameters with an argument
|
||||
count as two)
|
||||
high_watermark n (default: 50)
|
||||
start writeback when the number of used blocks reach this
|
||||
watermark
|
||||
low_watermark x (default: 45)
|
||||
stop writeback when the number of used blocks drops below
|
||||
this watermark
|
||||
writeback_jobs n (default: unlimited)
|
||||
limit the number of blocks that are in flight during
|
||||
writeback. Setting this value reduces writeback
|
||||
throughput, but it may improve latency of read requests
|
||||
autocommit_blocks n (default: 64 for pmem, 65536 for ssd)
|
||||
when the application writes this amount of blocks without
|
||||
issuing the FLUSH request, the blocks are automatically
|
||||
commited
|
||||
autocommit_time ms (default: 1000)
|
||||
autocommit time in milliseconds. The data is automatically
|
||||
commited if this time passes and no FLUSH request is
|
||||
received
|
||||
fua (by default on)
|
||||
applicable only to persistent memory - use the FUA flag
|
||||
when writing data from persistent memory back to the
|
||||
underlying device
|
||||
nofua
|
||||
applicable only to persistent memory - don't use the FUA
|
||||
flag when writing back data and send the FLUSH request
|
||||
afterwards
|
||||
- some underlying devices perform better with fua, some
|
||||
with nofua. The user should test it
|
||||
|
||||
Status:
|
||||
1. error indicator - 0 if there was no error, otherwise error number
|
||||
2. the number of blocks
|
||||
3. the number of free blocks
|
||||
4. the number of blocks under writeback
|
||||
|
||||
Messages:
|
||||
flush
|
||||
flush the cache device. The message returns successfully
|
||||
if the cache device was flushed without an error
|
||||
flush_on_suspend
|
||||
flush the cache device on next suspend. Use this message
|
||||
when you are going to remove the cache device. The proper
|
||||
sequence for removing the cache device is:
|
||||
1. send the "flush_on_suspend" message
|
||||
2. load an inactive table with a linear target that maps
|
||||
to the underlying device
|
||||
3. suspend the device
|
||||
4. ask for status and verify that there are no errors
|
||||
5. resume the device, so that it will use the linear
|
||||
target
|
||||
6. the cache device is now inactive and it can be deleted
|
@ -334,6 +334,17 @@ config DM_CACHE_SMQ
|
||||
of less memory utilization, improved performance and increased
|
||||
adaptability in the face of changing workloads.
|
||||
|
||||
config DM_WRITECACHE
|
||||
tristate "Writecache target"
|
||||
depends on BLK_DEV_DM
|
||||
---help---
|
||||
The writecache target caches writes on persistent memory or SSD.
|
||||
It is intended for databases or other programs that need extremely
|
||||
low commit latency.
|
||||
|
||||
The writecache target doesn't cache reads because reads are supposed
|
||||
to be cached in standard RAM.
|
||||
|
||||
config DM_ERA
|
||||
tristate "Era target (EXPERIMENTAL)"
|
||||
depends on BLK_DEV_DM
|
||||
|
@ -67,6 +67,7 @@ obj-$(CONFIG_DM_ERA) += dm-era.o
|
||||
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
|
||||
obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
|
||||
obj-$(CONFIG_DM_ZONED) += dm-zoned.o
|
||||
obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o
|
||||
|
||||
ifeq ($(CONFIG_DM_UEVENT),y)
|
||||
dm-mod-objs += dm-uevent.o
|
||||
|
@ -19,8 +19,8 @@
|
||||
|
||||
struct dm_bio_prison {
|
||||
spinlock_t lock;
|
||||
mempool_t cell_pool;
|
||||
struct rb_root cells;
|
||||
mempool_t cell_pool;
|
||||
};
|
||||
|
||||
static struct kmem_cache *_cell_cache;
|
||||
|
@ -21,8 +21,8 @@ struct dm_bio_prison_v2 {
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
spinlock_t lock;
|
||||
mempool_t cell_pool;
|
||||
struct rb_root cells;
|
||||
mempool_t cell_pool;
|
||||
};
|
||||
|
||||
static struct kmem_cache *_cell_cache;
|
||||
|
@ -371,7 +371,13 @@ struct cache_stats {
|
||||
|
||||
struct cache {
|
||||
struct dm_target *ti;
|
||||
struct dm_target_callbacks callbacks;
|
||||
spinlock_t lock;
|
||||
|
||||
/*
|
||||
* Fields for converting from sectors to blocks.
|
||||
*/
|
||||
int sectors_per_block_shift;
|
||||
sector_t sectors_per_block;
|
||||
|
||||
struct dm_cache_metadata *cmd;
|
||||
|
||||
@ -402,13 +408,11 @@ struct cache {
|
||||
dm_cblock_t cache_size;
|
||||
|
||||
/*
|
||||
* Fields for converting from sectors to blocks.
|
||||
* Invalidation fields.
|
||||
*/
|
||||
sector_t sectors_per_block;
|
||||
int sectors_per_block_shift;
|
||||
spinlock_t invalidation_lock;
|
||||
struct list_head invalidation_requests;
|
||||
|
||||
spinlock_t lock;
|
||||
struct bio_list deferred_bios;
|
||||
sector_t migration_threshold;
|
||||
wait_queue_head_t migration_wait;
|
||||
atomic_t nr_allocated_migrations;
|
||||
@ -419,13 +423,11 @@ struct cache {
|
||||
*/
|
||||
atomic_t nr_io_migrations;
|
||||
|
||||
struct bio_list deferred_bios;
|
||||
|
||||
struct rw_semaphore quiesce_lock;
|
||||
|
||||
/*
|
||||
* cache_size entries, dirty if set
|
||||
*/
|
||||
atomic_t nr_dirty;
|
||||
unsigned long *dirty_bitset;
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
/*
|
||||
* origin_blocks entries, discarded if set.
|
||||
@ -442,24 +444,20 @@ struct cache {
|
||||
const char **ctr_args;
|
||||
|
||||
struct dm_kcopyd_client *copier;
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct deferred_bio_worker;
|
||||
struct work_struct migration_worker;
|
||||
struct workqueue_struct *wq;
|
||||
struct delayed_work waker;
|
||||
struct dm_bio_prison_v2 *prison;
|
||||
struct bio_set bs;
|
||||
|
||||
mempool_t migration_pool;
|
||||
/*
|
||||
* cache_size entries, dirty if set
|
||||
*/
|
||||
unsigned long *dirty_bitset;
|
||||
atomic_t nr_dirty;
|
||||
|
||||
struct dm_cache_policy *policy;
|
||||
unsigned policy_nr_args;
|
||||
|
||||
bool need_tick_bio:1;
|
||||
bool sized:1;
|
||||
bool invalidate:1;
|
||||
bool commit_requested:1;
|
||||
bool loaded_mappings:1;
|
||||
bool loaded_discards:1;
|
||||
struct dm_cache_policy *policy;
|
||||
|
||||
/*
|
||||
* Cache features such as write-through.
|
||||
@ -468,18 +466,23 @@ struct cache {
|
||||
|
||||
struct cache_stats stats;
|
||||
|
||||
/*
|
||||
* Invalidation fields.
|
||||
*/
|
||||
spinlock_t invalidation_lock;
|
||||
struct list_head invalidation_requests;
|
||||
bool need_tick_bio:1;
|
||||
bool sized:1;
|
||||
bool invalidate:1;
|
||||
bool commit_requested:1;
|
||||
bool loaded_mappings:1;
|
||||
bool loaded_discards:1;
|
||||
|
||||
struct rw_semaphore background_work_lock;
|
||||
|
||||
struct batcher committer;
|
||||
struct work_struct commit_ws;
|
||||
|
||||
struct io_tracker tracker;
|
||||
|
||||
struct work_struct commit_ws;
|
||||
struct batcher committer;
|
||||
mempool_t migration_pool;
|
||||
|
||||
struct rw_semaphore background_work_lock;
|
||||
struct bio_set bs;
|
||||
};
|
||||
|
||||
struct per_bio_data {
|
||||
|
@ -31,6 +31,9 @@ struct dm_kobject_holder {
|
||||
struct mapped_device {
|
||||
struct mutex suspend_lock;
|
||||
|
||||
struct mutex table_devices_lock;
|
||||
struct list_head table_devices;
|
||||
|
||||
/*
|
||||
* The current mapping (struct dm_table *).
|
||||
* Use dm_get_live_table{_fast} or take suspend_lock for
|
||||
@ -38,17 +41,14 @@ struct mapped_device {
|
||||
*/
|
||||
void __rcu *map;
|
||||
|
||||
struct list_head table_devices;
|
||||
struct mutex table_devices_lock;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
struct request_queue *queue;
|
||||
int numa_node_id;
|
||||
|
||||
enum dm_queue_mode type;
|
||||
/* Protect queue and type against concurrent access. */
|
||||
struct mutex type_lock;
|
||||
enum dm_queue_mode type;
|
||||
|
||||
int numa_node_id;
|
||||
struct request_queue *queue;
|
||||
|
||||
atomic_t holders;
|
||||
atomic_t open_count;
|
||||
@ -56,21 +56,21 @@ struct mapped_device {
|
||||
struct dm_target *immutable_target;
|
||||
struct target_type *immutable_target_type;
|
||||
|
||||
char name[16];
|
||||
struct gendisk *disk;
|
||||
struct dax_device *dax_dev;
|
||||
char name[16];
|
||||
|
||||
void *interface_ptr;
|
||||
|
||||
/*
|
||||
* A list of ios that arrived while we were suspended.
|
||||
*/
|
||||
atomic_t pending[2];
|
||||
wait_queue_head_t wait;
|
||||
struct work_struct work;
|
||||
wait_queue_head_t wait;
|
||||
atomic_t pending[2];
|
||||
spinlock_t deferred_lock;
|
||||
struct bio_list deferred;
|
||||
|
||||
void *interface_ptr;
|
||||
|
||||
/*
|
||||
* Event handling.
|
||||
*/
|
||||
@ -83,17 +83,17 @@ struct mapped_device {
|
||||
/* the number of internal suspends */
|
||||
unsigned internal_suspend_count;
|
||||
|
||||
/*
|
||||
* Processing queue (flush)
|
||||
*/
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
/*
|
||||
* io objects are allocated from here.
|
||||
*/
|
||||
struct bio_set io_bs;
|
||||
struct bio_set bs;
|
||||
|
||||
/*
|
||||
* Processing queue (flush)
|
||||
*/
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
/*
|
||||
* freeze/thaw support require holding onto a super block
|
||||
*/
|
||||
@ -102,11 +102,11 @@ struct mapped_device {
|
||||
/* forced geometry settings */
|
||||
struct hd_geometry geometry;
|
||||
|
||||
struct block_device *bdev;
|
||||
|
||||
/* kobject and completion */
|
||||
struct dm_kobject_holder kobj_holder;
|
||||
|
||||
struct block_device *bdev;
|
||||
|
||||
/* zero-length flush that will be cloned and submitted to targets */
|
||||
struct bio flush_bio;
|
||||
|
||||
|
@ -139,25 +139,13 @@ struct crypt_config {
|
||||
struct dm_dev *dev;
|
||||
sector_t start;
|
||||
|
||||
/*
|
||||
* pool for per bio private data, crypto requests,
|
||||
* encryption requeusts/buffer pages and integrity tags
|
||||
*/
|
||||
mempool_t req_pool;
|
||||
mempool_t page_pool;
|
||||
mempool_t tag_pool;
|
||||
unsigned tag_pool_max_sectors;
|
||||
|
||||
struct percpu_counter n_allocated_pages;
|
||||
|
||||
struct bio_set bs;
|
||||
struct mutex bio_alloc_lock;
|
||||
|
||||
struct workqueue_struct *io_queue;
|
||||
struct workqueue_struct *crypt_queue;
|
||||
|
||||
struct task_struct *write_thread;
|
||||
wait_queue_head_t write_thread_wait;
|
||||
struct task_struct *write_thread;
|
||||
struct rb_root write_tree;
|
||||
|
||||
char *cipher;
|
||||
@ -213,6 +201,18 @@ struct crypt_config {
|
||||
unsigned int integrity_iv_size;
|
||||
unsigned int on_disk_tag_size;
|
||||
|
||||
/*
|
||||
* pool for per bio private data, crypto requests,
|
||||
* encryption requeusts/buffer pages and integrity tags
|
||||
*/
|
||||
unsigned tag_pool_max_sectors;
|
||||
mempool_t tag_pool;
|
||||
mempool_t req_pool;
|
||||
mempool_t page_pool;
|
||||
|
||||
struct bio_set bs;
|
||||
struct mutex bio_alloc_lock;
|
||||
|
||||
u8 *authenc_key; /* space for keys in authenc() format (if used) */
|
||||
u8 key[0];
|
||||
};
|
||||
|
@ -1344,7 +1344,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
|
||||
goto err_unlock_md_type;
|
||||
}
|
||||
} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
|
||||
DMWARN("can't change device type after initial table load.");
|
||||
DMWARN("can't change device type (old=%u vs new=%u) after initial table load.",
|
||||
dm_get_md_type(md), dm_table_get_type(t));
|
||||
r = -EINVAL;
|
||||
goto err_unlock_md_type;
|
||||
}
|
||||
|
@ -45,7 +45,6 @@ struct dm_kcopyd_client {
|
||||
struct dm_io_client *io_client;
|
||||
|
||||
wait_queue_head_t destroyq;
|
||||
atomic_t nr_jobs;
|
||||
|
||||
mempool_t job_pool;
|
||||
|
||||
@ -54,6 +53,8 @@ struct dm_kcopyd_client {
|
||||
|
||||
struct dm_kcopyd_throttle *throttle;
|
||||
|
||||
atomic_t nr_jobs;
|
||||
|
||||
/*
|
||||
* We maintain three lists of jobs:
|
||||
*
|
||||
|
@ -63,28 +63,29 @@ struct dm_region_hash {
|
||||
|
||||
/* hash table */
|
||||
rwlock_t hash_lock;
|
||||
mempool_t region_pool;
|
||||
unsigned mask;
|
||||
unsigned nr_buckets;
|
||||
unsigned prime;
|
||||
unsigned shift;
|
||||
struct list_head *buckets;
|
||||
|
||||
unsigned max_recovery; /* Max # of regions to recover in parallel */
|
||||
|
||||
spinlock_t region_lock;
|
||||
atomic_t recovery_in_flight;
|
||||
struct semaphore recovery_count;
|
||||
struct list_head clean_regions;
|
||||
struct list_head quiesced_regions;
|
||||
struct list_head recovered_regions;
|
||||
struct list_head failed_recovered_regions;
|
||||
|
||||
/*
|
||||
* If there was a flush failure no regions can be marked clean.
|
||||
*/
|
||||
int flush_failure;
|
||||
|
||||
unsigned max_recovery; /* Max # of regions to recover in parallel */
|
||||
|
||||
spinlock_t region_lock;
|
||||
atomic_t recovery_in_flight;
|
||||
struct list_head clean_regions;
|
||||
struct list_head quiesced_regions;
|
||||
struct list_head recovered_regions;
|
||||
struct list_head failed_recovered_regions;
|
||||
struct semaphore recovery_count;
|
||||
|
||||
mempool_t region_pool;
|
||||
|
||||
void *context;
|
||||
sector_t target_begin;
|
||||
|
||||
|
@ -240,9 +240,9 @@ struct pool {
|
||||
struct dm_bio_prison *prison;
|
||||
struct dm_kcopyd_client *copier;
|
||||
|
||||
struct work_struct worker;
|
||||
struct workqueue_struct *wq;
|
||||
struct throttle throttle;
|
||||
struct work_struct worker;
|
||||
struct delayed_work waker;
|
||||
struct delayed_work no_space_timeout;
|
||||
|
||||
@ -260,7 +260,6 @@ struct pool {
|
||||
struct dm_deferred_set *all_io_ds;
|
||||
|
||||
struct dm_thin_new_mapping *next_mapping;
|
||||
mempool_t mapping_pool;
|
||||
|
||||
process_bio_fn process_bio;
|
||||
process_bio_fn process_discard;
|
||||
@ -273,6 +272,8 @@ struct pool {
|
||||
process_mapping_fn process_prepared_discard_pt2;
|
||||
|
||||
struct dm_bio_prison_cell **cell_sort_array;
|
||||
|
||||
mempool_t mapping_pool;
|
||||
};
|
||||
|
||||
static enum pool_mode get_pool_mode(struct pool *pool);
|
||||
|
2305
drivers/md/dm-writecache.c
Normal file
2305
drivers/md/dm-writecache.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -52,9 +52,9 @@ struct dmz_target {
|
||||
struct dmz_reclaim *reclaim;
|
||||
|
||||
/* For chunk work */
|
||||
struct mutex chunk_lock;
|
||||
struct radix_tree_root chunk_rxtree;
|
||||
struct workqueue_struct *chunk_wq;
|
||||
struct mutex chunk_lock;
|
||||
|
||||
/* For cloned BIOs to zones */
|
||||
struct bio_set bio_set;
|
||||
|
Loading…
Reference in New Issue
Block a user