2005-04-17 02:20:36 +04:00
/*
* dm - snapshot . c
*
* Copyright ( C ) 2001 - 2002 Sistina Software ( UK ) Limited .
*
* This file is released under the GPL .
*/
# include <linux/blkdev.h>
# include <linux/device-mapper.h>
2009-01-06 06:04:54 +03:00
# include <linux/delay.h>
2005-04-17 02:20:36 +04:00
# include <linux/fs.h>
# include <linux/init.h>
# include <linux/kdev_t.h>
# include <linux/list.h>
# include <linux/mempool.h>
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/vmalloc.h>
2007-10-20 01:38:44 +04:00
# include <linux/log2.h>
2008-04-25 01:02:01 +04:00
# include <linux/dm-kcopyd.h>
2005-04-17 02:20:36 +04:00
2015-02-26 19:40:35 +03:00
# include "dm.h"
2009-01-06 06:05:15 +03:00
# include "dm-exception-store.h"
2005-04-17 02:20:36 +04:00
2006-06-26 11:27:35 +04:00
# define DM_MSG_PREFIX "snapshots"
2009-12-11 02:52:30 +03:00
static const char dm_snapshot_merge_target_name [ ] = " snapshot-merge " ;
# define dm_target_is_snapshot_merge(ti) \
( ( ti ) - > type - > name = = dm_snapshot_merge_target_name )
2008-07-21 15:00:32 +04:00
/*
* The size of the mempool used to track chunks in use .
*/
# define MIN_IOS 256
2009-04-02 22:55:34 +04:00
# define DM_TRACKED_CHUNK_HASH_SIZE 16
# define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
( DM_TRACKED_CHUNK_HASH_SIZE - 1 ) )
2009-12-11 02:52:10 +03:00
struct dm_exception_table {
2009-04-02 22:55:34 +04:00
uint32_t hash_mask ;
unsigned hash_shift ;
struct list_head * table ;
} ;
struct dm_snapshot {
struct rw_semaphore lock ;
struct dm_dev * origin ;
2009-12-11 02:52:12 +03:00
struct dm_dev * cow ;
struct dm_target * ti ;
2009-04-02 22:55:34 +04:00
/* List of snapshots per Origin */
struct list_head list ;
2009-12-11 02:52:35 +03:00
/*
* You can ' t use a snapshot if this is 0 ( e . g . if full ) .
* A snapshot - merge target never clears this .
*/
2009-04-02 22:55:34 +04:00
int valid ;
2015-06-21 23:31:33 +03:00
/*
* The snapshot overflowed because of a write to the snapshot device .
* We don ' t have to invalidate the snapshot in this case , but we need
* to prevent further writes .
*/
int snapshot_overflowed ;
2009-04-02 22:55:34 +04:00
/* Origin writes don't trigger exceptions until this is set */
int active ;
atomic_t pending_exceptions_count ;
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
/* Protected by "lock" */
sector_t exception_start_sequence ;
/* Protected by kcopyd single-threaded callback */
sector_t exception_complete_sequence ;
/*
* A list of pending exceptions that completed out of order .
* Protected by kcopyd single - threaded callback .
*/
struct list_head out_of_order_list ;
2010-03-06 05:32:33 +03:00
mempool_t * pending_pool ;
2009-12-11 02:52:10 +03:00
struct dm_exception_table pending ;
struct dm_exception_table complete ;
2009-04-02 22:55:34 +04:00
/*
* pe_lock protects all pending_exception operations and access
* as well as the snapshot_bios list .
*/
spinlock_t pe_lock ;
2010-03-06 05:32:33 +03:00
/* Chunks with outstanding reads */
spinlock_t tracked_chunk_lock ;
struct hlist_head tracked_chunk_hash [ DM_TRACKED_CHUNK_HASH_SIZE ] ;
2009-04-02 22:55:34 +04:00
/* The on disk metadata handler */
struct dm_exception_store * store ;
struct dm_kcopyd_client * kcopyd_client ;
2010-03-06 05:32:33 +03:00
/* Wait for events based on state_bits */
unsigned long state_bits ;
/* Range of chunks currently being merged. */
chunk_t first_merging_chunk ;
int num_merging_chunks ;
2009-12-11 02:52:32 +03:00
2009-12-11 02:52:35 +03:00
/*
* The merge operation failed if this flag is set .
* Failure modes are handled as follows :
* - I / O error reading the header
* = > don ' t load the target ; abort .
* - Header does not have " valid " flag set
* = > use the origin ; forget about the snapshot .
* - I / O error when reading exceptions
* = > don ' t load the target ; abort .
* ( We can ' t use the intermediate origin state . )
* - I / O error while merging
* = > stop merging ; set merge_failed ; process I / O normally .
*/
int merge_failed ;
2009-12-11 02:52:33 +03:00
/*
* Incoming bios that overlap with chunks being merged must wait
* for them to be committed .
*/
struct bio_list bios_queued_during_merge ;
2009-04-02 22:55:34 +04:00
} ;
2009-12-11 02:52:32 +03:00
/*
* state_bits :
* RUNNING_MERGE - Merge operation is in progress .
* SHUTDOWN_MERGE - Set to signal that merge needs to be stopped ;
* cleared afterwards .
*/
# define RUNNING_MERGE 0
# define SHUTDOWN_MERGE 1
2013-03-02 02:45:49 +04:00
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM ( snapshot_copy_throttle ,
" A percentage of time allocated for copy on write " ) ;
2010-08-12 07:13:51 +04:00
struct dm_dev * dm_snap_origin ( struct dm_snapshot * s )
{
return s - > origin ;
}
EXPORT_SYMBOL ( dm_snap_origin ) ;
2009-12-11 02:52:12 +03:00
struct dm_dev * dm_snap_cow ( struct dm_snapshot * s )
{
return s - > cow ;
}
EXPORT_SYMBOL ( dm_snap_cow ) ;
2009-04-02 22:55:34 +04:00
static sector_t chunk_to_sector ( struct dm_exception_store * store ,
chunk_t chunk )
{
return chunk < < store - > chunk_shift ;
}
static int bdev_equal ( struct block_device * lhs , struct block_device * rhs )
{
/*
* There is only ever one instance of a particular block
* device so we can compare pointers safely .
*/
return lhs = = rhs ;
}
2007-07-12 20:26:32 +04:00
struct dm_snap_pending_exception {
2009-12-11 02:52:10 +03:00
struct dm_exception e ;
2005-04-17 02:20:36 +04:00
/*
* Origin buffers waiting for this to complete are held
* in a bio list
*/
struct bio_list origin_bios ;
struct bio_list snapshot_bios ;
/* Pointer back to snapshot context */
struct dm_snapshot * snap ;
/*
* 1 indicates the exception has already been sent to
* kcopyd .
*/
int started ;
2011-08-02 15:32:04 +04:00
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
/* There was copying error. */
int copy_error ;
/* A sequence number, it is used for in-order completion. */
sector_t exception_sequence ;
struct list_head out_of_order_entry ;
2011-08-02 15:32:04 +04:00
/*
* For writing a complete chunk , bypassing the copy .
*/
struct bio * full_bio ;
bio_end_io_t * full_bio_end_io ;
2005-04-17 02:20:36 +04:00
} ;
/*
* Hash table mapping origin volumes to lists of snapshots and
* a lock to protect it
*/
2006-12-07 07:33:20 +03:00
static struct kmem_cache * exception_cache ;
static struct kmem_cache * pending_cache ;
2005-04-17 02:20:36 +04:00
2008-07-21 15:00:32 +04:00
struct dm_snap_tracked_chunk {
struct hlist_node node ;
chunk_t chunk ;
} ;
2012-12-22 00:23:41 +04:00
static void init_tracked_chunk ( struct bio * bio )
{
struct dm_snap_tracked_chunk * c = dm_per_bio_data ( bio , sizeof ( struct dm_snap_tracked_chunk ) ) ;
INIT_HLIST_NODE ( & c - > node ) ;
}
static bool is_bio_tracked ( struct bio * bio )
{
struct dm_snap_tracked_chunk * c = dm_per_bio_data ( bio , sizeof ( struct dm_snap_tracked_chunk ) ) ;
return ! hlist_unhashed ( & c - > node ) ;
}
static void track_chunk ( struct dm_snapshot * s , struct bio * bio , chunk_t chunk )
2008-07-21 15:00:32 +04:00
{
2012-12-22 00:23:38 +04:00
struct dm_snap_tracked_chunk * c = dm_per_bio_data ( bio , sizeof ( struct dm_snap_tracked_chunk ) ) ;
2008-07-21 15:00:32 +04:00
c - > chunk = chunk ;
2012-12-22 00:23:33 +04:00
spin_lock_irq ( & s - > tracked_chunk_lock ) ;
2008-07-21 15:00:32 +04:00
hlist_add_head ( & c - > node ,
& s - > tracked_chunk_hash [ DM_TRACKED_CHUNK_HASH ( chunk ) ] ) ;
2012-12-22 00:23:33 +04:00
spin_unlock_irq ( & s - > tracked_chunk_lock ) ;
2008-07-21 15:00:32 +04:00
}
2012-12-22 00:23:41 +04:00
static void stop_tracking_chunk ( struct dm_snapshot * s , struct bio * bio )
2008-07-21 15:00:32 +04:00
{
2012-12-22 00:23:41 +04:00
struct dm_snap_tracked_chunk * c = dm_per_bio_data ( bio , sizeof ( struct dm_snap_tracked_chunk ) ) ;
2008-07-21 15:00:32 +04:00
unsigned long flags ;
spin_lock_irqsave ( & s - > tracked_chunk_lock , flags ) ;
hlist_del ( & c - > node ) ;
spin_unlock_irqrestore ( & s - > tracked_chunk_lock , flags ) ;
}
2008-07-21 15:00:34 +04:00
static int __chunk_is_tracked ( struct dm_snapshot * s , chunk_t chunk )
{
struct dm_snap_tracked_chunk * c ;
int found = 0 ;
spin_lock_irq ( & s - > tracked_chunk_lock ) ;
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 05:06:00 +04:00
hlist_for_each_entry ( c ,
2008-07-21 15:00:34 +04:00
& s - > tracked_chunk_hash [ DM_TRACKED_CHUNK_HASH ( chunk ) ] , node ) {
if ( c - > chunk = = chunk ) {
found = 1 ;
break ;
}
}
spin_unlock_irq ( & s - > tracked_chunk_lock ) ;
return found ;
}
2009-12-11 02:52:29 +03:00
/*
* This conflicting I / O is extremely improbable in the caller ,
* so msleep ( 1 ) is sufficient and there is no need for a wait queue .
*/
static void __check_for_conflicting_io ( struct dm_snapshot * s , chunk_t chunk )
{
while ( __chunk_is_tracked ( s , chunk ) )
msleep ( 1 ) ;
}
2005-04-17 02:20:36 +04:00
/*
* One of these per registered origin , held in the snapshot_origins hash
*/
struct origin {
/* The origin device */
struct block_device * bdev ;
struct list_head hash_list ;
/* List of snapshots for this origin */
struct list_head snapshots ;
} ;
2015-02-26 19:40:35 +03:00
/*
* This structure is allocated for each origin target
*/
struct dm_origin {
struct dm_dev * dev ;
struct dm_target * ti ;
unsigned split_boundary ;
struct list_head hash_list ;
} ;
2005-04-17 02:20:36 +04:00
/*
* Size of the hash table for origin volumes . If we make this
* the size of the minors list then it should be nearly perfect
*/
# define ORIGIN_HASH_SIZE 256
# define ORIGIN_MASK 0xFF
static struct list_head * _origins ;
2015-02-26 19:40:35 +03:00
static struct list_head * _dm_origins ;
2005-04-17 02:20:36 +04:00
static struct rw_semaphore _origins_lock ;
2009-12-11 02:52:34 +03:00
static DECLARE_WAIT_QUEUE_HEAD ( _pending_exceptions_done ) ;
static DEFINE_SPINLOCK ( _pending_exceptions_done_spinlock ) ;
static uint64_t _pending_exceptions_done_count ;
2005-04-17 02:20:36 +04:00
static int init_origin_hash ( void )
{
int i ;
_origins = kmalloc ( ORIGIN_HASH_SIZE * sizeof ( struct list_head ) ,
GFP_KERNEL ) ;
if ( ! _origins ) {
2015-02-26 19:40:35 +03:00
DMERR ( " unable to allocate memory for _origins " ) ;
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
}
for ( i = 0 ; i < ORIGIN_HASH_SIZE ; i + + )
INIT_LIST_HEAD ( _origins + i ) ;
2015-02-26 19:40:35 +03:00
_dm_origins = kmalloc ( ORIGIN_HASH_SIZE * sizeof ( struct list_head ) ,
GFP_KERNEL ) ;
if ( ! _dm_origins ) {
DMERR ( " unable to allocate memory for _dm_origins " ) ;
kfree ( _origins ) ;
return - ENOMEM ;
}
for ( i = 0 ; i < ORIGIN_HASH_SIZE ; i + + )
INIT_LIST_HEAD ( _dm_origins + i ) ;
2005-04-17 02:20:36 +04:00
init_rwsem ( & _origins_lock ) ;
return 0 ;
}
static void exit_origin_hash ( void )
{
kfree ( _origins ) ;
2015-02-26 19:40:35 +03:00
kfree ( _dm_origins ) ;
2005-04-17 02:20:36 +04:00
}
2007-07-12 20:26:32 +04:00
static unsigned origin_hash ( struct block_device * bdev )
2005-04-17 02:20:36 +04:00
{
return bdev - > bd_dev & ORIGIN_MASK ;
}
static struct origin * __lookup_origin ( struct block_device * origin )
{
struct list_head * ol ;
struct origin * o ;
ol = & _origins [ origin_hash ( origin ) ] ;
list_for_each_entry ( o , ol , hash_list )
if ( bdev_equal ( o - > bdev , origin ) )
return o ;
return NULL ;
}
static void __insert_origin ( struct origin * o )
{
struct list_head * sl = & _origins [ origin_hash ( o - > bdev ) ] ;
list_add_tail ( & o - > hash_list , sl ) ;
}
2015-02-26 19:40:35 +03:00
static struct dm_origin * __lookup_dm_origin ( struct block_device * origin )
{
struct list_head * ol ;
struct dm_origin * o ;
ol = & _dm_origins [ origin_hash ( origin ) ] ;
list_for_each_entry ( o , ol , hash_list )
if ( bdev_equal ( o - > dev - > bdev , origin ) )
return o ;
return NULL ;
}
static void __insert_dm_origin ( struct dm_origin * o )
{
struct list_head * sl = & _dm_origins [ origin_hash ( o - > dev - > bdev ) ] ;
list_add_tail ( & o - > hash_list , sl ) ;
}
static void __remove_dm_origin ( struct dm_origin * o )
{
list_del ( & o - > hash_list ) ;
}
2009-12-11 02:52:24 +03:00
/*
* _origins_lock must be held when calling this function .
* Returns number of snapshots registered using the supplied cow device , plus :
* snap_src - a snapshot suitable for use as a source of exception handover
* snap_dest - a snapshot capable of receiving exception handover .
2009-12-11 02:52:32 +03:00
* snap_merge - an existing snapshot - merge target linked to the same origin .
* There can be at most one snapshot - merge target . The parameter is optional .
2009-12-11 02:52:24 +03:00
*
2009-12-11 02:52:32 +03:00
* Possible return values and states of snap_src and snap_dest .
2009-12-11 02:52:24 +03:00
* 0 : NULL , NULL - first new snapshot
* 1 : snap_src , NULL - normal snapshot
* 2 : snap_src , snap_dest - waiting for handover
* 2 : snap_src , NULL - handed over , waiting for old to be deleted
* 1 : NULL , snap_dest - source got destroyed without handover
*/
static int __find_snapshots_sharing_cow ( struct dm_snapshot * snap ,
struct dm_snapshot * * snap_src ,
2009-12-11 02:52:32 +03:00
struct dm_snapshot * * snap_dest ,
struct dm_snapshot * * snap_merge )
2009-12-11 02:52:24 +03:00
{
struct dm_snapshot * s ;
struct origin * o ;
int count = 0 ;
int active ;
o = __lookup_origin ( snap - > origin - > bdev ) ;
if ( ! o )
goto out ;
list_for_each_entry ( s , & o - > snapshots , list ) {
2009-12-11 02:52:32 +03:00
if ( dm_target_is_snapshot_merge ( s - > ti ) & & snap_merge )
* snap_merge = s ;
2009-12-11 02:52:24 +03:00
if ( ! bdev_equal ( s - > cow - > bdev , snap - > cow - > bdev ) )
continue ;
down_read ( & s - > lock ) ;
active = s - > active ;
up_read ( & s - > lock ) ;
if ( active ) {
if ( snap_src )
* snap_src = s ;
} else if ( snap_dest )
* snap_dest = s ;
count + + ;
}
out :
return count ;
}
/*
* On success , returns 1 if this snapshot is a handover destination ,
* otherwise returns 0.
*/
static int __validate_exception_handover ( struct dm_snapshot * snap )
{
struct dm_snapshot * snap_src = NULL , * snap_dest = NULL ;
2009-12-11 02:52:32 +03:00
struct dm_snapshot * snap_merge = NULL ;
2009-12-11 02:52:24 +03:00
/* Does snapshot need exceptions handed over to it? */
2009-12-11 02:52:32 +03:00
if ( ( __find_snapshots_sharing_cow ( snap , & snap_src , & snap_dest ,
& snap_merge ) = = 2 ) | |
2009-12-11 02:52:24 +03:00
snap_dest ) {
snap - > ti - > error = " Snapshot cow pairing for exception "
" table handover failed " ;
return - EINVAL ;
}
/*
* If no snap_src was found , snap cannot become a handover
* destination .
*/
if ( ! snap_src )
return 0 ;
2009-12-11 02:52:32 +03:00
/*
* Non - snapshot - merge handover ?
*/
if ( ! dm_target_is_snapshot_merge ( snap - > ti ) )
return 1 ;
/*
* Do not allow more than one merging snapshot .
*/
if ( snap_merge ) {
snap - > ti - > error = " A snapshot is already merging. " ;
return - EINVAL ;
}
2009-12-11 02:52:32 +03:00
if ( ! snap_src - > store - > type - > prepare_merge | |
! snap_src - > store - > type - > commit_merge ) {
snap - > ti - > error = " Snapshot exception store does not "
" support snapshot-merge. " ;
return - EINVAL ;
}
2009-12-11 02:52:24 +03:00
return 1 ;
}
static void __insert_snapshot ( struct origin * o , struct dm_snapshot * s )
{
struct dm_snapshot * l ;
/* Sort the list according to chunk size, largest-first smallest-last */
list_for_each_entry ( l , & o - > snapshots , list )
if ( l - > store - > chunk_size < s - > store - > chunk_size )
break ;
list_add_tail ( & s - > list , & l - > list ) ;
}
2005-04-17 02:20:36 +04:00
/*
* Make a note of the snapshot and its origin so we can look it
* up when the origin has a write on it .
2009-12-11 02:52:24 +03:00
*
* Also validate snapshot exception store handovers .
* On success , returns 1 if this registration is a handover destination ,
* otherwise returns 0.
2005-04-17 02:20:36 +04:00
*/
static int register_snapshot ( struct dm_snapshot * snap )
{
2009-12-11 02:52:24 +03:00
struct origin * o , * new_o = NULL ;
2005-04-17 02:20:36 +04:00
struct block_device * bdev = snap - > origin - > bdev ;
2009-12-11 02:52:24 +03:00
int r = 0 ;
2005-04-17 02:20:36 +04:00
2008-10-30 16:33:12 +03:00
new_o = kmalloc ( sizeof ( * new_o ) , GFP_KERNEL ) ;
if ( ! new_o )
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
down_write ( & _origins_lock ) ;
2009-12-11 02:52:24 +03:00
r = __validate_exception_handover ( snap ) ;
if ( r < 0 ) {
kfree ( new_o ) ;
goto out ;
}
o = __lookup_origin ( bdev ) ;
2008-10-30 16:33:12 +03:00
if ( o )
kfree ( new_o ) ;
else {
2005-04-17 02:20:36 +04:00
/* New origin */
2008-10-30 16:33:12 +03:00
o = new_o ;
2005-04-17 02:20:36 +04:00
/* Initialise the struct */
INIT_LIST_HEAD ( & o - > snapshots ) ;
o - > bdev = bdev ;
__insert_origin ( o ) ;
}
2009-12-11 02:52:24 +03:00
__insert_snapshot ( o , snap ) ;
out :
up_write ( & _origins_lock ) ;
return r ;
}
/*
* Move snapshot to correct place in list according to chunk size .
*/
static void reregister_snapshot ( struct dm_snapshot * s )
{
struct block_device * bdev = s - > origin - > bdev ;
down_write ( & _origins_lock ) ;
list_del ( & s - > list ) ;
__insert_snapshot ( __lookup_origin ( bdev ) , s ) ;
2005-04-17 02:20:36 +04:00
up_write ( & _origins_lock ) ;
}
static void unregister_snapshot ( struct dm_snapshot * s )
{
struct origin * o ;
down_write ( & _origins_lock ) ;
o = __lookup_origin ( s - > origin - > bdev ) ;
list_del ( & s - > list ) ;
2009-12-11 02:52:24 +03:00
if ( o & & list_empty ( & o - > snapshots ) ) {
2005-04-17 02:20:36 +04:00
list_del ( & o - > hash_list ) ;
kfree ( o ) ;
}
up_write ( & _origins_lock ) ;
}
/*
* Implementation of the exception hash tables .
2008-02-08 05:11:27 +03:00
* The lowest hash_shift bits of the chunk number are ignored , allowing
* some consecutive chunks to be grouped together .
2005-04-17 02:20:36 +04:00
*/
2009-12-11 02:52:11 +03:00
static int dm_exception_table_init ( struct dm_exception_table * et ,
uint32_t size , unsigned hash_shift )
2005-04-17 02:20:36 +04:00
{
unsigned int i ;
2008-02-08 05:11:27 +03:00
et - > hash_shift = hash_shift ;
2005-04-17 02:20:36 +04:00
et - > hash_mask = size - 1 ;
et - > table = dm_vcalloc ( size , sizeof ( struct list_head ) ) ;
if ( ! et - > table )
return - ENOMEM ;
for ( i = 0 ; i < size ; i + + )
INIT_LIST_HEAD ( et - > table + i ) ;
return 0 ;
}
2009-12-11 02:52:11 +03:00
static void dm_exception_table_exit ( struct dm_exception_table * et ,
struct kmem_cache * mem )
2005-04-17 02:20:36 +04:00
{
struct list_head * slot ;
2009-12-11 02:52:10 +03:00
struct dm_exception * ex , * next ;
2005-04-17 02:20:36 +04:00
int i , size ;
size = et - > hash_mask + 1 ;
for ( i = 0 ; i < size ; i + + ) {
slot = et - > table + i ;
list_for_each_entry_safe ( ex , next , slot , hash_list )
kmem_cache_free ( mem , ex ) ;
}
vfree ( et - > table ) ;
}
2009-12-11 02:52:10 +03:00
static uint32_t exception_hash ( struct dm_exception_table * et , chunk_t chunk )
2005-04-17 02:20:36 +04:00
{
2008-02-08 05:11:27 +03:00
return ( chunk > > et - > hash_shift ) & et - > hash_mask ;
2005-04-17 02:20:36 +04:00
}
2009-12-11 02:52:11 +03:00
static void dm_remove_exception ( struct dm_exception * e )
2005-04-17 02:20:36 +04:00
{
list_del ( & e - > hash_list ) ;
}
/*
* Return the exception data for a sector , or NULL if not
* remapped .
*/
2009-12-11 02:52:11 +03:00
static struct dm_exception * dm_lookup_exception ( struct dm_exception_table * et ,
chunk_t chunk )
2005-04-17 02:20:36 +04:00
{
struct list_head * slot ;
2009-12-11 02:52:10 +03:00
struct dm_exception * e ;
2005-04-17 02:20:36 +04:00
slot = & et - > table [ exception_hash ( et , chunk ) ] ;
list_for_each_entry ( e , slot , hash_list )
2008-02-08 05:11:27 +03:00
if ( chunk > = e - > old_chunk & &
chunk < = e - > old_chunk + dm_consecutive_chunk_count ( e ) )
2005-04-17 02:20:36 +04:00
return e ;
return NULL ;
}
2014-01-14 04:13:36 +04:00
static struct dm_exception * alloc_completed_exception ( gfp_t gfp )
2005-04-17 02:20:36 +04:00
{
2009-12-11 02:52:10 +03:00
struct dm_exception * e ;
2005-04-17 02:20:36 +04:00
2014-01-14 04:13:36 +04:00
e = kmem_cache_alloc ( exception_cache , gfp ) ;
if ( ! e & & gfp = = GFP_NOIO )
2005-04-17 02:20:36 +04:00
e = kmem_cache_alloc ( exception_cache , GFP_ATOMIC ) ;
return e ;
}
2009-12-11 02:52:11 +03:00
static void free_completed_exception ( struct dm_exception * e )
2005-04-17 02:20:36 +04:00
{
kmem_cache_free ( exception_cache , e ) ;
}
2008-07-21 15:00:35 +04:00
static struct dm_snap_pending_exception * alloc_pending_exception ( struct dm_snapshot * s )
2005-04-17 02:20:36 +04:00
{
2008-07-21 15:00:35 +04:00
struct dm_snap_pending_exception * pe = mempool_alloc ( s - > pending_pool ,
GFP_NOIO ) ;
2008-10-30 16:33:16 +03:00
atomic_inc ( & s - > pending_exceptions_count ) ;
2008-07-21 15:00:35 +04:00
pe - > snap = s ;
return pe ;
2005-04-17 02:20:36 +04:00
}
2007-07-12 20:26:32 +04:00
static void free_pending_exception ( struct dm_snap_pending_exception * pe )
2005-04-17 02:20:36 +04:00
{
2008-10-30 16:33:16 +03:00
struct dm_snapshot * s = pe - > snap ;
mempool_free ( pe , s - > pending_pool ) ;
2014-03-17 21:06:10 +04:00
smp_mb__before_atomic ( ) ;
2008-10-30 16:33:16 +03:00
atomic_dec ( & s - > pending_exceptions_count ) ;
2005-04-17 02:20:36 +04:00
}
2009-12-11 02:52:11 +03:00
static void dm_insert_exception ( struct dm_exception_table * eh ,
struct dm_exception * new_e )
2008-02-08 05:11:27 +03:00
{
struct list_head * l ;
2009-12-11 02:52:10 +03:00
struct dm_exception * e = NULL ;
2008-02-08 05:11:27 +03:00
l = & eh - > table [ exception_hash ( eh , new_e - > old_chunk ) ] ;
/* Add immediately if this table doesn't support consecutive chunks */
if ( ! eh - > hash_shift )
goto out ;
/* List is ordered by old_chunk */
list_for_each_entry_reverse ( e , l , hash_list ) {
/* Insert after an existing chunk? */
if ( new_e - > old_chunk = = ( e - > old_chunk +
dm_consecutive_chunk_count ( e ) + 1 ) & &
new_e - > new_chunk = = ( dm_chunk_number ( e - > new_chunk ) +
dm_consecutive_chunk_count ( e ) + 1 ) ) {
dm_consecutive_chunk_count_inc ( e ) ;
2009-12-11 02:52:11 +03:00
free_completed_exception ( new_e ) ;
2008-02-08 05:11:27 +03:00
return ;
}
/* Insert before an existing chunk? */
if ( new_e - > old_chunk = = ( e - > old_chunk - 1 ) & &
new_e - > new_chunk = = ( dm_chunk_number ( e - > new_chunk ) - 1 ) ) {
dm_consecutive_chunk_count_inc ( e ) ;
e - > old_chunk - - ;
e - > new_chunk - - ;
2009-12-11 02:52:11 +03:00
free_completed_exception ( new_e ) ;
2008-02-08 05:11:27 +03:00
return ;
}
if ( new_e - > old_chunk > e - > old_chunk )
break ;
}
out :
list_add ( & new_e - > hash_list , e ? & e - > hash_list : l ) ;
}
2009-01-06 06:05:19 +03:00
/*
* Callback used by the exception stores to load exceptions when
* initialising .
*/
static int dm_add_exception ( void * context , chunk_t old , chunk_t new )
2005-04-17 02:20:36 +04:00
{
2009-01-06 06:05:19 +03:00
struct dm_snapshot * s = context ;
2009-12-11 02:52:10 +03:00
struct dm_exception * e ;
2005-04-17 02:20:36 +04:00
2014-01-14 04:13:36 +04:00
e = alloc_completed_exception ( GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! e )
return - ENOMEM ;
e - > old_chunk = old ;
2008-02-08 05:11:27 +03:00
/* Consecutive_count is implicitly initialised to zero */
2005-04-17 02:20:36 +04:00
e - > new_chunk = new ;
2008-02-08 05:11:27 +03:00
2009-12-11 02:52:11 +03:00
dm_insert_exception ( & s - > complete , e ) ;
2008-02-08 05:11:27 +03:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
2009-12-11 02:52:08 +03:00
/*
* Return a minimum chunk size of all snapshots that have the specified origin .
* Return zero if the origin has no snapshots .
*/
2012-07-27 18:08:00 +04:00
static uint32_t __minimum_chunk_size ( struct origin * o )
2009-12-11 02:52:08 +03:00
{
struct dm_snapshot * snap ;
unsigned chunk_size = 0 ;
if ( o )
list_for_each_entry ( snap , & o - > snapshots , list )
chunk_size = min_not_zero ( chunk_size ,
snap - > store - > chunk_size ) ;
2012-07-27 18:08:00 +04:00
return ( uint32_t ) chunk_size ;
2009-12-11 02:52:08 +03:00
}
2005-04-17 02:20:36 +04:00
/*
* Hard coded magic .
*/
static int calc_max_buckets ( void )
{
/* use a fixed size of 2MB */
unsigned long mem = 2 * 1024 * 1024 ;
mem / = sizeof ( struct list_head ) ;
return mem ;
}
/*
* Allocate room for a suitable hash table .
*/
2009-04-02 22:55:34 +04:00
static int init_hash_tables ( struct dm_snapshot * s )
2005-04-17 02:20:36 +04:00
{
2013-09-19 03:40:42 +04:00
sector_t hash_size , cow_dev_size , max_buckets ;
2005-04-17 02:20:36 +04:00
/*
* Calculate based on the size of the original volume or
* the COW volume . . .
*/
2009-12-11 02:52:12 +03:00
cow_dev_size = get_dev_size ( s - > cow - > bdev ) ;
2005-04-17 02:20:36 +04:00
max_buckets = calc_max_buckets ( ) ;
2013-09-19 03:40:42 +04:00
hash_size = cow_dev_size > > s - > store - > chunk_shift ;
2005-04-17 02:20:36 +04:00
hash_size = min ( hash_size , max_buckets ) ;
2009-12-11 02:51:54 +03:00
if ( hash_size < 64 )
hash_size = 64 ;
2008-02-08 05:10:06 +03:00
hash_size = rounddown_pow_of_two ( hash_size ) ;
2009-12-11 02:52:11 +03:00
if ( dm_exception_table_init ( & s - > complete , hash_size ,
DM_CHUNK_CONSECUTIVE_BITS ) )
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
/*
* Allocate hash table for in - flight exceptions
* Make this smaller than the real hash table
*/
hash_size > > = 3 ;
if ( hash_size < 64 )
hash_size = 64 ;
2009-12-11 02:52:11 +03:00
if ( dm_exception_table_init ( & s - > pending , hash_size , 0 ) ) {
dm_exception_table_exit ( & s - > complete , exception_cache ) ;
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
}
return 0 ;
}
2009-12-11 02:52:32 +03:00
static void merge_shutdown ( struct dm_snapshot * s )
{
clear_bit_unlock ( RUNNING_MERGE , & s - > state_bits ) ;
2014-03-17 21:06:10 +04:00
smp_mb__after_atomic ( ) ;
2009-12-11 02:52:32 +03:00
wake_up_bit ( & s - > state_bits , RUNNING_MERGE ) ;
}
2009-12-11 02:52:33 +03:00
static struct bio * __release_queued_bios_after_merge ( struct dm_snapshot * s )
{
s - > first_merging_chunk = 0 ;
s - > num_merging_chunks = 0 ;
return bio_list_get ( & s - > bios_queued_during_merge ) ;
}
2009-12-11 02:52:32 +03:00
/*
* Remove one chunk from the index of completed exceptions .
*/
static int __remove_single_exception_chunk ( struct dm_snapshot * s ,
chunk_t old_chunk )
{
struct dm_exception * e ;
e = dm_lookup_exception ( & s - > complete , old_chunk ) ;
if ( ! e ) {
DMERR ( " Corruption detected: exception for block %llu is "
" on disk but not in memory " ,
( unsigned long long ) old_chunk ) ;
return - EINVAL ;
}
/*
* If this is the only chunk using this exception , remove exception .
*/
if ( ! dm_consecutive_chunk_count ( e ) ) {
dm_remove_exception ( e ) ;
free_completed_exception ( e ) ;
return 0 ;
}
/*
* The chunk may be either at the beginning or the end of a
* group of consecutive chunks - never in the middle . We are
* removing chunks in the opposite order to that in which they
* were added , so this should always be true .
* Decrement the consecutive chunk counter and adjust the
* starting point if necessary .
*/
if ( old_chunk = = e - > old_chunk ) {
e - > old_chunk + + ;
e - > new_chunk + + ;
} else if ( old_chunk ! = e - > old_chunk +
dm_consecutive_chunk_count ( e ) ) {
DMERR ( " Attempt to merge block %llu from the "
" middle of a chunk range [%llu - %llu] " ,
( unsigned long long ) old_chunk ,
( unsigned long long ) e - > old_chunk ,
( unsigned long long )
e - > old_chunk + dm_consecutive_chunk_count ( e ) ) ;
return - EINVAL ;
}
dm_consecutive_chunk_count_dec ( e ) ;
return 0 ;
}
2009-12-11 02:52:33 +03:00
static void flush_bios ( struct bio * bio ) ;
static int remove_single_exception_chunk ( struct dm_snapshot * s )
2009-12-11 02:52:32 +03:00
{
2009-12-11 02:52:33 +03:00
struct bio * b = NULL ;
int r ;
chunk_t old_chunk = s - > first_merging_chunk + s - > num_merging_chunks - 1 ;
2009-12-11 02:52:32 +03:00
down_write ( & s - > lock ) ;
2009-12-11 02:52:33 +03:00
/*
* Process chunks ( and associated exceptions ) in reverse order
* so that dm_consecutive_chunk_count_dec ( ) accounting works .
*/
do {
r = __remove_single_exception_chunk ( s , old_chunk ) ;
if ( r )
goto out ;
} while ( old_chunk - - > s - > first_merging_chunk ) ;
b = __release_queued_bios_after_merge ( s ) ;
out :
2009-12-11 02:52:32 +03:00
up_write ( & s - > lock ) ;
2009-12-11 02:52:33 +03:00
if ( b )
flush_bios ( b ) ;
2009-12-11 02:52:32 +03:00
return r ;
}
2009-12-11 02:52:34 +03:00
static int origin_write_extent ( struct dm_snapshot * merging_snap ,
sector_t sector , unsigned chunk_size ) ;
2009-12-11 02:52:32 +03:00
static void merge_callback ( int read_err , unsigned long write_err ,
void * context ) ;
2009-12-11 02:52:34 +03:00
static uint64_t read_pending_exceptions_done_count ( void )
{
uint64_t pending_exceptions_done ;
spin_lock ( & _pending_exceptions_done_spinlock ) ;
pending_exceptions_done = _pending_exceptions_done_count ;
spin_unlock ( & _pending_exceptions_done_spinlock ) ;
return pending_exceptions_done ;
}
static void increment_pending_exceptions_done_count ( void )
{
spin_lock ( & _pending_exceptions_done_spinlock ) ;
_pending_exceptions_done_count + + ;
spin_unlock ( & _pending_exceptions_done_spinlock ) ;
wake_up_all ( & _pending_exceptions_done ) ;
}
2009-12-11 02:52:32 +03:00
static void snapshot_merge_next_chunks ( struct dm_snapshot * s )
{
2009-12-11 02:52:34 +03:00
int i , linear_chunks ;
2009-12-11 02:52:32 +03:00
chunk_t old_chunk , new_chunk ;
struct dm_io_region src , dest ;
2009-12-11 02:52:34 +03:00
sector_t io_size ;
2009-12-11 02:52:34 +03:00
uint64_t previous_count ;
2009-12-11 02:52:32 +03:00
BUG_ON ( ! test_bit ( RUNNING_MERGE , & s - > state_bits ) ) ;
if ( unlikely ( test_bit ( SHUTDOWN_MERGE , & s - > state_bits ) ) )
goto shut ;
/*
* valid flag never changes during merge , so no lock required .
*/
if ( ! s - > valid ) {
DMERR ( " Snapshot is invalid: can't merge " ) ;
goto shut ;
}
2009-12-11 02:52:34 +03:00
linear_chunks = s - > store - > type - > prepare_merge ( s - > store , & old_chunk ,
& new_chunk ) ;
if ( linear_chunks < = 0 ) {
2009-12-11 02:52:35 +03:00
if ( linear_chunks < 0 ) {
2009-12-11 02:52:32 +03:00
DMERR ( " Read error in exception store: "
" shutting down merge " ) ;
2009-12-11 02:52:35 +03:00
down_write ( & s - > lock ) ;
s - > merge_failed = 1 ;
up_write ( & s - > lock ) ;
}
2009-12-11 02:52:32 +03:00
goto shut ;
}
2009-12-11 02:52:34 +03:00
/* Adjust old_chunk and new_chunk to reflect start of linear region */
old_chunk = old_chunk + 1 - linear_chunks ;
new_chunk = new_chunk + 1 - linear_chunks ;
/*
* Use one ( potentially large ) I / O to copy all ' linear_chunks '
* from the exception store to the origin
*/
io_size = linear_chunks * s - > store - > chunk_size ;
2009-12-11 02:52:32 +03:00
dest . bdev = s - > origin - > bdev ;
dest . sector = chunk_to_sector ( s - > store , old_chunk ) ;
2009-12-11 02:52:34 +03:00
dest . count = min ( io_size , get_dev_size ( dest . bdev ) - dest . sector ) ;
2009-12-11 02:52:32 +03:00
src . bdev = s - > cow - > bdev ;
src . sector = chunk_to_sector ( s - > store , new_chunk ) ;
src . count = dest . count ;
2009-12-11 02:52:34 +03:00
/*
* Reallocate any exceptions needed in other snapshots then
* wait for the pending exceptions to complete .
* Each time any pending exception ( globally on the system )
* completes we are woken and repeat the process to find out
* if we can proceed . While this may not seem a particularly
* efficient algorithm , it is not expected to have any
* significant impact on performance .
*/
previous_count = read_pending_exceptions_done_count ( ) ;
2009-12-11 02:52:34 +03:00
while ( origin_write_extent ( s , dest . sector , io_size ) ) {
2009-12-11 02:52:34 +03:00
wait_event ( _pending_exceptions_done ,
( read_pending_exceptions_done_count ( ) ! =
previous_count ) ) ;
/* Retry after the wait, until all exceptions are done. */
previous_count = read_pending_exceptions_done_count ( ) ;
}
2009-12-11 02:52:33 +03:00
down_write ( & s - > lock ) ;
s - > first_merging_chunk = old_chunk ;
2009-12-11 02:52:34 +03:00
s - > num_merging_chunks = linear_chunks ;
2009-12-11 02:52:33 +03:00
up_write ( & s - > lock ) ;
2009-12-11 02:52:34 +03:00
/* Wait until writes to all 'linear_chunks' drain */
for ( i = 0 ; i < linear_chunks ; i + + )
__check_for_conflicting_io ( s , old_chunk + i ) ;
2009-12-11 02:52:33 +03:00
2009-12-11 02:52:32 +03:00
dm_kcopyd_copy ( s - > kcopyd_client , & src , 1 , & dest , 0 , merge_callback , s ) ;
return ;
shut :
merge_shutdown ( s ) ;
}
2009-12-11 02:52:33 +03:00
static void error_bios ( struct bio * bio ) ;
2009-12-11 02:52:32 +03:00
static void merge_callback ( int read_err , unsigned long write_err , void * context )
{
struct dm_snapshot * s = context ;
2009-12-11 02:52:33 +03:00
struct bio * b = NULL ;
2009-12-11 02:52:32 +03:00
if ( read_err | | write_err ) {
if ( read_err )
DMERR ( " Read error: shutting down merge. " ) ;
else
DMERR ( " Write error: shutting down merge. " ) ;
goto shut ;
}
2009-12-11 02:52:33 +03:00
if ( s - > store - > type - > commit_merge ( s - > store ,
s - > num_merging_chunks ) < 0 ) {
2009-12-11 02:52:32 +03:00
DMERR ( " Write error in exception store: shutting down merge " ) ;
goto shut ;
}
2009-12-11 02:52:33 +03:00
if ( remove_single_exception_chunk ( s ) < 0 )
goto shut ;
2009-12-11 02:52:32 +03:00
snapshot_merge_next_chunks ( s ) ;
return ;
shut :
2009-12-11 02:52:33 +03:00
down_write ( & s - > lock ) ;
2009-12-11 02:52:35 +03:00
s - > merge_failed = 1 ;
2009-12-11 02:52:33 +03:00
b = __release_queued_bios_after_merge ( s ) ;
up_write ( & s - > lock ) ;
error_bios ( b ) ;
2009-12-11 02:52:32 +03:00
merge_shutdown ( s ) ;
}
static void start_merge ( struct dm_snapshot * s )
{
if ( ! test_and_set_bit ( RUNNING_MERGE , & s - > state_bits ) )
snapshot_merge_next_chunks ( s ) ;
}
/*
* Stop the merging process and wait until it finishes .
*/
static void stop_merge ( struct dm_snapshot * s )
{
set_bit ( SHUTDOWN_MERGE , & s - > state_bits ) ;
sched: Remove proliferation of wait_on_bit() action functions
The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().
So:
Rename wait_on_bit and wait_on_bit_lock to
wait_on_bit_action and wait_on_bit_lock_action
to make it explicit that they need an action function.
Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
which are *not* given an action function but implicitly use
a standard one.
The decision to error-out if a signal is pending is now made
based on the 'mode' argument rather than being encoded in the action
function.
All instances of the old wait_on_bit and wait_on_bit_lock which
can use the new version have been changed accordingly and their
action functions have been discarded.
wait_on_bit{_lock} does not return any specific error code in the
event of a signal so the caller must check for non-zero and
interpolate their own error code as appropriate.
The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"
The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.
A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack. So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).
Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS. CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-07-07 09:16:04 +04:00
wait_on_bit ( & s - > state_bits , RUNNING_MERGE , TASK_UNINTERRUPTIBLE ) ;
2009-12-11 02:52:32 +03:00
clear_bit ( SHUTDOWN_MERGE , & s - > state_bits ) ;
}
2005-04-17 02:20:36 +04:00
/*
2015-10-09 01:05:41 +03:00
* Construct a snapshot mapping : < origin_dev > < COW - dev > < p | po | n > < chunk - size >
2005-04-17 02:20:36 +04:00
*/
static int snapshot_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
struct dm_snapshot * s ;
2008-07-21 15:00:32 +04:00
int i ;
2005-04-17 02:20:36 +04:00
int r = - EINVAL ;
2009-12-11 02:52:12 +03:00
char * origin_path , * cow_path ;
2016-02-02 07:29:18 +03:00
dev_t origin_dev , cow_dev ;
2013-03-02 02:45:47 +04:00
unsigned args_used , num_flush_bios = 1 ;
2009-12-11 02:52:31 +03:00
fmode_t origin_mode = FMODE_READ ;
2005-04-17 02:20:36 +04:00
2006-10-03 12:15:25 +04:00
if ( argc ! = 4 ) {
2006-06-26 11:27:35 +04:00
ti - > error = " requires exactly 4 arguments " ;
2005-04-17 02:20:36 +04:00
r = - EINVAL ;
2009-12-11 02:52:12 +03:00
goto bad ;
2005-04-17 02:20:36 +04:00
}
2009-12-11 02:52:31 +03:00
if ( dm_target_is_snapshot_merge ( ti ) ) {
2013-03-02 02:45:47 +04:00
num_flush_bios = 2 ;
2009-12-11 02:52:31 +03:00
origin_mode = FMODE_WRITE ;
}
2009-12-11 02:52:12 +03:00
s = kmalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( ! s ) {
2011-08-02 15:32:03 +04:00
ti - > error = " Cannot allocate private snapshot structure " ;
2009-12-11 02:52:12 +03:00
r = - ENOMEM ;
goto bad ;
}
2010-08-12 07:13:51 +04:00
origin_path = argv [ 0 ] ;
argv + + ;
argc - - ;
r = dm_get_device ( ti , origin_path , origin_mode , & s - > origin ) ;
if ( r ) {
ti - > error = " Cannot get origin device " ;
goto bad_origin ;
}
2016-02-02 07:29:18 +03:00
origin_dev = s - > origin - > bdev - > bd_dev ;
2010-08-12 07:13:51 +04:00
2009-12-11 02:52:12 +03:00
cow_path = argv [ 0 ] ;
argv + + ;
argc - - ;
2016-02-02 07:29:18 +03:00
cow_dev = dm_get_dev_t ( cow_path ) ;
if ( cow_dev & & cow_dev = = origin_dev ) {
ti - > error = " COW device cannot be the same as origin device " ;
r = - EINVAL ;
goto bad_cow ;
}
2011-03-24 16:52:14 +03:00
r = dm_get_device ( ti , cow_path , dm_table_get_mode ( ti - > table ) , & s - > cow ) ;
2009-12-11 02:52:12 +03:00
if ( r ) {
ti - > error = " Cannot get COW device " ;
goto bad_cow ;
}
r = dm_exception_store_create ( ti , argc , argv , s , & args_used , & s - > store ) ;
2009-04-02 22:55:34 +04:00
if ( r ) {
ti - > error = " Couldn't create exception store " ;
2005-04-17 02:20:36 +04:00
r = - EINVAL ;
2009-12-11 02:52:12 +03:00
goto bad_store ;
2005-04-17 02:20:36 +04:00
}
2009-04-02 22:55:34 +04:00
argv + = args_used ;
argc - = args_used ;
2009-12-11 02:52:12 +03:00
s - > ti = ti ;
2005-04-17 02:20:36 +04:00
s - > valid = 1 ;
2015-06-21 23:31:33 +03:00
s - > snapshot_overflowed = 0 ;
2006-02-01 14:04:50 +03:00
s - > active = 0 ;
2008-10-30 16:33:16 +03:00
atomic_set ( & s - > pending_exceptions_count , 0 ) ;
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
s - > exception_start_sequence = 0 ;
s - > exception_complete_sequence = 0 ;
INIT_LIST_HEAD ( & s - > out_of_order_list ) ;
2005-04-17 02:20:36 +04:00
init_rwsem ( & s - > lock ) ;
2009-12-11 02:52:24 +03:00
INIT_LIST_HEAD ( & s - > list ) ;
2006-10-03 12:15:30 +04:00
spin_lock_init ( & s - > pe_lock ) ;
2009-12-11 02:52:32 +03:00
s - > state_bits = 0 ;
2009-12-11 02:52:35 +03:00
s - > merge_failed = 0 ;
2009-12-11 02:52:33 +03:00
s - > first_merging_chunk = 0 ;
s - > num_merging_chunks = 0 ;
bio_list_init ( & s - > bios_queued_during_merge ) ;
2005-04-17 02:20:36 +04:00
/* Allocate hash table for COW data */
2009-04-02 22:55:34 +04:00
if ( init_hash_tables ( s ) ) {
2005-04-17 02:20:36 +04:00
ti - > error = " Unable to allocate hash table space " ;
r = - ENOMEM ;
2009-04-02 22:55:34 +04:00
goto bad_hash_tables ;
2005-04-17 02:20:36 +04:00
}
2013-03-02 02:45:49 +04:00
s - > kcopyd_client = dm_kcopyd_client_create ( & dm_kcopyd_throttle ) ;
2011-05-29 16:03:13 +04:00
if ( IS_ERR ( s - > kcopyd_client ) ) {
r = PTR_ERR ( s - > kcopyd_client ) ;
2005-04-17 02:20:36 +04:00
ti - > error = " Could not create kcopyd client " ;
2009-04-02 22:55:34 +04:00
goto bad_kcopyd ;
2005-04-17 02:20:36 +04:00
}
2008-07-21 15:00:35 +04:00
s - > pending_pool = mempool_create_slab_pool ( MIN_IOS , pending_cache ) ;
if ( ! s - > pending_pool ) {
ti - > error = " Could not allocate mempool for pending exceptions " ;
2013-05-10 17:37:15 +04:00
r = - ENOMEM ;
2009-04-02 22:55:34 +04:00
goto bad_pending_pool ;
2008-07-21 15:00:35 +04:00
}
2008-07-21 15:00:32 +04:00
for ( i = 0 ; i < DM_TRACKED_CHUNK_HASH_SIZE ; i + + )
INIT_HLIST_HEAD ( & s - > tracked_chunk_hash [ i ] ) ;
spin_lock_init ( & s - > tracked_chunk_lock ) ;
2009-12-11 02:52:24 +03:00
ti - > private = s ;
2013-03-02 02:45:47 +04:00
ti - > num_flush_bios = num_flush_bios ;
2016-01-31 21:28:26 +03:00
ti - > per_io_data_size = sizeof ( struct dm_snap_tracked_chunk ) ;
2009-12-11 02:52:24 +03:00
/* Add snapshot to the list of snapshots for this origin */
/* Exceptions aren't triggered till snapshot_resume() is called */
r = register_snapshot ( s ) ;
if ( r = = - ENOMEM ) {
ti - > error = " Snapshot origin struct allocation failed " ;
goto bad_load_and_register ;
} else if ( r < 0 ) {
/* invalid handover, register_snapshot has set ti->error */
goto bad_load_and_register ;
}
/*
* Metadata must only be loaded into one table at once , so skip this
* if metadata will be handed over during resume .
* Chunk size will be set during the handover - set it to zero to
* ensure it ' s ignored .
*/
if ( r > 0 ) {
s - > store - > chunk_size = 0 ;
return 0 ;
}
2009-04-02 22:55:31 +04:00
r = s - > store - > type - > read_metadata ( s - > store , dm_add_exception ,
( void * ) s ) ;
2007-07-12 20:28:13 +04:00
if ( r < 0 ) {
2006-10-03 12:15:25 +04:00
ti - > error = " Failed to read snapshot metadata " ;
2009-12-11 02:52:24 +03:00
goto bad_read_metadata ;
2007-07-12 20:28:13 +04:00
} else if ( r > 0 ) {
s - > valid = 0 ;
DMWARN ( " Snapshot is marked invalid. " ) ;
2006-10-03 12:15:25 +04:00
}
2006-02-01 14:04:50 +03:00
2009-10-17 02:18:16 +04:00
if ( ! s - > store - > chunk_size ) {
ti - > error = " Chunk size not set " ;
2009-12-11 02:52:24 +03:00
goto bad_read_metadata ;
2005-04-17 02:20:36 +04:00
}
2012-07-27 18:08:00 +04:00
r = dm_set_target_max_io_len ( ti , s - > store - > chunk_size ) ;
if ( r )
goto bad_read_metadata ;
2005-04-17 02:20:36 +04:00
return 0 ;
2009-12-11 02:52:24 +03:00
bad_read_metadata :
unregister_snapshot ( s ) ;
2009-04-02 22:55:34 +04:00
bad_load_and_register :
2008-07-21 15:00:35 +04:00
mempool_destroy ( s - > pending_pool ) ;
2009-04-02 22:55:34 +04:00
bad_pending_pool :
2008-04-25 00:43:19 +04:00
dm_kcopyd_client_destroy ( s - > kcopyd_client ) ;
2005-04-17 02:20:36 +04:00
2009-04-02 22:55:34 +04:00
bad_kcopyd :
2009-12-11 02:52:11 +03:00
dm_exception_table_exit ( & s - > pending , pending_cache ) ;
dm_exception_table_exit ( & s - > complete , exception_cache ) ;
2005-04-17 02:20:36 +04:00
2009-04-02 22:55:34 +04:00
bad_hash_tables :
2009-12-11 02:52:12 +03:00
dm_exception_store_destroy ( s - > store ) ;
2005-04-17 02:20:36 +04:00
2009-12-11 02:52:12 +03:00
bad_store :
dm_put_device ( ti , s - > cow ) ;
2009-04-02 22:55:34 +04:00
2009-12-11 02:52:12 +03:00
bad_cow :
2010-08-12 07:13:51 +04:00
dm_put_device ( ti , s - > origin ) ;
bad_origin :
2009-12-11 02:52:12 +03:00
kfree ( s ) ;
bad :
2005-04-17 02:20:36 +04:00
return r ;
}
2006-12-08 13:41:11 +03:00
static void __free_exceptions ( struct dm_snapshot * s )
{
2008-04-25 00:43:19 +04:00
dm_kcopyd_client_destroy ( s - > kcopyd_client ) ;
2006-12-08 13:41:11 +03:00
s - > kcopyd_client = NULL ;
2009-12-11 02:52:11 +03:00
dm_exception_table_exit ( & s - > pending , pending_cache ) ;
dm_exception_table_exit ( & s - > complete , exception_cache ) ;
2006-12-08 13:41:11 +03:00
}
2009-12-11 02:52:24 +03:00
static void __handover_exceptions ( struct dm_snapshot * snap_src ,
struct dm_snapshot * snap_dest )
{
union {
struct dm_exception_table table_swap ;
struct dm_exception_store * store_swap ;
} u ;
/*
* Swap all snapshot context information between the two instances .
*/
u . table_swap = snap_dest - > complete ;
snap_dest - > complete = snap_src - > complete ;
snap_src - > complete = u . table_swap ;
u . store_swap = snap_dest - > store ;
snap_dest - > store = snap_src - > store ;
2015-10-09 01:05:41 +03:00
snap_dest - > store - > userspace_supports_overflow = u . store_swap - > userspace_supports_overflow ;
2009-12-11 02:52:24 +03:00
snap_src - > store = u . store_swap ;
snap_dest - > store - > snap = snap_dest ;
snap_src - > store - > snap = snap_src ;
2012-07-27 18:08:00 +04:00
snap_dest - > ti - > max_io_len = snap_dest - > store - > chunk_size ;
2009-12-11 02:52:24 +03:00
snap_dest - > valid = snap_src - > valid ;
2015-06-21 23:31:33 +03:00
snap_dest - > snapshot_overflowed = snap_src - > snapshot_overflowed ;
2009-12-11 02:52:24 +03:00
/*
* Set source invalid to ensure it receives no further I / O .
*/
snap_src - > valid = 0 ;
}
2005-04-17 02:20:36 +04:00
static void snapshot_dtr ( struct dm_target * ti )
{
2008-07-21 15:00:32 +04:00
# ifdef CONFIG_DM_DEBUG
int i ;
# endif
2007-07-12 20:26:32 +04:00
struct dm_snapshot * s = ti - > private ;
2009-12-11 02:52:24 +03:00
struct dm_snapshot * snap_src = NULL , * snap_dest = NULL ;
2005-04-17 02:20:36 +04:00
2009-12-11 02:52:24 +03:00
down_read ( & _origins_lock ) ;
/* Check whether exception handover must be cancelled */
2009-12-11 02:52:32 +03:00
( void ) __find_snapshots_sharing_cow ( s , & snap_src , & snap_dest , NULL ) ;
2009-12-11 02:52:24 +03:00
if ( snap_src & & snap_dest & & ( s = = snap_src ) ) {
down_write ( & snap_dest - > lock ) ;
snap_dest - > valid = 0 ;
up_write ( & snap_dest - > lock ) ;
DMERR ( " Cancelling snapshot handover. " ) ;
}
up_read ( & _origins_lock ) ;
2009-12-11 02:52:32 +03:00
if ( dm_target_is_snapshot_merge ( ti ) )
stop_merge ( s ) ;
2006-03-27 13:17:50 +04:00
/* Prevent further origin writes from using this snapshot. */
/* After this returns there can be no new kcopyd jobs. */
2005-04-17 02:20:36 +04:00
unregister_snapshot ( s ) ;
2008-10-30 16:33:16 +03:00
while ( atomic_read ( & s - > pending_exceptions_count ) )
2009-01-06 06:04:54 +03:00
msleep ( 1 ) ;
2008-10-30 16:33:16 +03:00
/*
* Ensure instructions in mempool_destroy aren ' t reordered
* before atomic_read .
*/
smp_mb ( ) ;
2008-07-21 15:00:32 +04:00
# ifdef CONFIG_DM_DEBUG
for ( i = 0 ; i < DM_TRACKED_CHUNK_HASH_SIZE ; i + + )
BUG_ON ( ! hlist_empty ( & s - > tracked_chunk_hash [ i ] ) ) ;
# endif
2006-12-08 13:41:11 +03:00
__free_exceptions ( s ) ;
2005-04-17 02:20:36 +04:00
2008-07-21 15:00:35 +04:00
mempool_destroy ( s - > pending_pool ) ;
2009-04-02 22:55:34 +04:00
dm_exception_store_destroy ( s - > store ) ;
2006-03-27 13:17:50 +04:00
2009-12-11 02:52:12 +03:00
dm_put_device ( ti , s - > cow ) ;
2010-08-12 07:13:51 +04:00
dm_put_device ( ti , s - > origin ) ;
2005-04-17 02:20:36 +04:00
kfree ( s ) ;
}
/*
* Flush a list of buffers .
*/
static void flush_bios ( struct bio * bio )
{
struct bio * n ;
while ( bio ) {
n = bio - > bi_next ;
bio - > bi_next = NULL ;
generic_make_request ( bio ) ;
bio = n ;
}
}
2009-12-11 02:52:30 +03:00
static int do_origin ( struct dm_dev * origin , struct bio * bio ) ;
/*
* Flush a list of buffers .
*/
static void retry_origin_bios ( struct dm_snapshot * s , struct bio * bio )
{
struct bio * n ;
int r ;
while ( bio ) {
n = bio - > bi_next ;
bio - > bi_next = NULL ;
r = do_origin ( s - > origin , bio ) ;
if ( r = = DM_MAPIO_REMAPPED )
generic_make_request ( bio ) ;
bio = n ;
}
}
2005-04-17 02:20:36 +04:00
/*
* Error a list of buffers .
*/
static void error_bios ( struct bio * bio )
{
struct bio * n ;
while ( bio ) {
n = bio - > bi_next ;
bio - > bi_next = NULL ;
2007-09-27 14:47:43 +04:00
bio_io_error ( bio ) ;
2005-04-17 02:20:36 +04:00
bio = n ;
}
}
2006-10-03 12:15:31 +04:00
static void __invalidate_snapshot ( struct dm_snapshot * s , int err )
2006-03-27 13:17:45 +04:00
{
if ( ! s - > valid )
return ;
if ( err = = - EIO )
DMERR ( " Invalidating snapshot: Error reading/writing. " ) ;
else if ( err = = - ENOMEM )
DMERR ( " Invalidating snapshot: Unable to allocate exception. " ) ;
2009-04-02 22:55:31 +04:00
if ( s - > store - > type - > drop_snapshot )
s - > store - > type - > drop_snapshot ( s - > store ) ;
2006-03-27 13:17:45 +04:00
s - > valid = 0 ;
2009-12-11 02:52:12 +03:00
dm_table_event ( s - > ti - > table ) ;
2006-03-27 13:17:45 +04:00
}
2016-01-09 03:07:55 +03:00
static void pending_complete ( void * context , int success )
2005-04-17 02:20:36 +04:00
{
2016-01-09 03:07:55 +03:00
struct dm_snap_pending_exception * pe = context ;
2009-12-11 02:52:10 +03:00
struct dm_exception * e ;
2005-04-17 02:20:36 +04:00
struct dm_snapshot * s = pe - > snap ;
2006-10-03 12:15:29 +04:00
struct bio * origin_bios = NULL ;
struct bio * snapshot_bios = NULL ;
2011-08-02 15:32:04 +04:00
struct bio * full_bio = NULL ;
2006-10-03 12:15:29 +04:00
int error = 0 ;
2005-04-17 02:20:36 +04:00
2006-03-27 13:17:45 +04:00
if ( ! success ) {
/* Read/write error - snapshot is unusable */
2005-04-17 02:20:36 +04:00
down_write ( & s - > lock ) ;
2006-10-03 12:15:31 +04:00
__invalidate_snapshot ( s , - EIO ) ;
2006-10-03 12:15:29 +04:00
error = 1 ;
2006-03-27 13:17:45 +04:00
goto out ;
}
2014-01-14 04:13:36 +04:00
e = alloc_completed_exception ( GFP_NOIO ) ;
2006-03-27 13:17:45 +04:00
if ( ! e ) {
2005-04-17 02:20:36 +04:00
down_write ( & s - > lock ) ;
2006-10-03 12:15:31 +04:00
__invalidate_snapshot ( s , - ENOMEM ) ;
2006-10-03 12:15:29 +04:00
error = 1 ;
2006-03-27 13:17:45 +04:00
goto out ;
}
* e = pe - > e ;
2005-04-17 02:20:36 +04:00
2006-03-27 13:17:45 +04:00
down_write ( & s - > lock ) ;
if ( ! s - > valid ) {
2009-12-11 02:52:11 +03:00
free_completed_exception ( e ) ;
2006-10-03 12:15:29 +04:00
error = 1 ;
2006-03-27 13:17:45 +04:00
goto out ;
2005-04-17 02:20:36 +04:00
}
2009-12-11 02:52:29 +03:00
/* Check for conflicting reads */
__check_for_conflicting_io ( s , pe - > e . old_chunk ) ;
2008-07-21 15:00:34 +04:00
2006-10-03 12:15:29 +04:00
/*
* Add a proper exception , and remove the
* in - flight exception from the list .
*/
2009-12-11 02:52:11 +03:00
dm_insert_exception ( & s - > complete , e ) ;
2006-03-27 13:17:45 +04:00
2011-08-02 15:32:03 +04:00
out :
2009-12-11 02:52:11 +03:00
dm_remove_exception ( & pe - > e ) ;
2006-10-03 12:15:29 +04:00
snapshot_bios = bio_list_get ( & pe - > snapshot_bios ) ;
2009-12-11 02:52:30 +03:00
origin_bios = bio_list_get ( & pe - > origin_bios ) ;
2011-08-02 15:32:04 +04:00
full_bio = pe - > full_bio ;
2015-11-26 00:03:31 +03:00
if ( full_bio )
2011-08-02 15:32:04 +04:00
full_bio - > bi_end_io = pe - > full_bio_end_io ;
2009-12-11 02:52:34 +03:00
increment_pending_exceptions_done_count ( ) ;
2006-10-03 12:15:29 +04:00
up_write ( & s - > lock ) ;
/* Submit any pending write bios */
2011-08-02 15:32:04 +04:00
if ( error ) {
if ( full_bio )
bio_io_error ( full_bio ) ;
2006-10-03 12:15:29 +04:00
error_bios ( snapshot_bios ) ;
2011-08-02 15:32:04 +04:00
} else {
if ( full_bio )
2015-07-20 16:29:37 +03:00
bio_endio ( full_bio ) ;
2006-10-03 12:15:29 +04:00
flush_bios ( snapshot_bios ) ;
2011-08-02 15:32:04 +04:00
}
2006-10-03 12:15:29 +04:00
2009-12-11 02:52:30 +03:00
retry_origin_bios ( s , origin_bios ) ;
2015-02-17 22:34:00 +03:00
free_pending_exception ( pe ) ;
2005-04-17 02:20:36 +04:00
}
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
static void complete_exception ( struct dm_snap_pending_exception * pe )
{
struct dm_snapshot * s = pe - > snap ;
2016-01-09 03:07:55 +03:00
/* Update the metadata if we are persistent */
s - > store - > type - > commit_exception ( s - > store , & pe - > e , ! pe - > copy_error ,
pending_complete , pe ) ;
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* Called when the copy I / O has finished . kcopyd actually runs
* this code so don ' t block .
*/
2008-03-29 00:16:10 +03:00
static void copy_callback ( int read_err , unsigned long write_err , void * context )
2005-04-17 02:20:36 +04:00
{
2007-07-12 20:26:32 +04:00
struct dm_snap_pending_exception * pe = context ;
2005-04-17 02:20:36 +04:00
struct dm_snapshot * s = pe - > snap ;
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
pe - > copy_error = read_err | | write_err ;
2005-04-17 02:20:36 +04:00
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
if ( pe - > exception_sequence = = s - > exception_complete_sequence ) {
s - > exception_complete_sequence + + ;
complete_exception ( pe ) ;
while ( ! list_empty ( & s - > out_of_order_list ) ) {
pe = list_entry ( s - > out_of_order_list . next ,
struct dm_snap_pending_exception , out_of_order_entry ) ;
if ( pe - > exception_sequence ! = s - > exception_complete_sequence )
break ;
s - > exception_complete_sequence + + ;
list_del ( & pe - > out_of_order_entry ) ;
complete_exception ( pe ) ;
}
} else {
struct list_head * lh ;
struct dm_snap_pending_exception * pe2 ;
list_for_each_prev ( lh , & s - > out_of_order_list ) {
pe2 = list_entry ( lh , struct dm_snap_pending_exception , out_of_order_entry ) ;
if ( pe2 - > exception_sequence < pe - > exception_sequence )
break ;
}
list_add ( & pe - > out_of_order_entry , lh ) ;
}
2005-04-17 02:20:36 +04:00
}
/*
* Dispatches the copy operation to kcopyd .
*/
2007-07-12 20:26:32 +04:00
static void start_copy ( struct dm_snap_pending_exception * pe )
2005-04-17 02:20:36 +04:00
{
struct dm_snapshot * s = pe - > snap ;
2008-04-25 00:43:17 +04:00
struct dm_io_region src , dest ;
2005-04-17 02:20:36 +04:00
struct block_device * bdev = s - > origin - > bdev ;
sector_t dev_size ;
dev_size = get_dev_size ( bdev ) ;
src . bdev = bdev ;
2009-04-02 22:55:33 +04:00
src . sector = chunk_to_sector ( s - > store , pe - > e . old_chunk ) ;
2009-10-17 02:18:17 +04:00
src . count = min ( ( sector_t ) s - > store - > chunk_size , dev_size - src . sector ) ;
2005-04-17 02:20:36 +04:00
2009-12-11 02:52:12 +03:00
dest . bdev = s - > cow - > bdev ;
2009-04-02 22:55:33 +04:00
dest . sector = chunk_to_sector ( s - > store , pe - > e . new_chunk ) ;
2005-04-17 02:20:36 +04:00
dest . count = src . count ;
/* Hand over to kcopyd */
2011-08-02 15:32:03 +04:00
dm_kcopyd_copy ( s - > kcopyd_client , & src , 1 , & dest , 0 , copy_callback , pe ) ;
2005-04-17 02:20:36 +04:00
}
2015-07-20 16:29:37 +03:00
static void full_bio_end_io ( struct bio * bio )
2011-08-02 15:32:04 +04:00
{
void * callback_data = bio - > bi_private ;
2015-07-20 16:29:37 +03:00
dm_kcopyd_do_callback ( callback_data , 0 , bio - > bi_error ? 1 : 0 ) ;
2011-08-02 15:32:04 +04:00
}
static void start_full_bio ( struct dm_snap_pending_exception * pe ,
struct bio * bio )
{
struct dm_snapshot * s = pe - > snap ;
void * callback_data ;
pe - > full_bio = bio ;
pe - > full_bio_end_io = bio - > bi_end_io ;
callback_data = dm_kcopyd_prepare_callback ( s - > kcopyd_client ,
copy_callback , pe ) ;
bio - > bi_end_io = full_bio_end_io ;
bio - > bi_private = callback_data ;
generic_make_request ( bio ) ;
}
2009-04-02 22:55:25 +04:00
static struct dm_snap_pending_exception *
__lookup_pending_exception ( struct dm_snapshot * s , chunk_t chunk )
{
2009-12-11 02:52:11 +03:00
struct dm_exception * e = dm_lookup_exception ( & s - > pending , chunk ) ;
2009-04-02 22:55:25 +04:00
if ( ! e )
return NULL ;
return container_of ( e , struct dm_snap_pending_exception , e ) ;
}
2005-04-17 02:20:36 +04:00
/*
* Looks to see if this snapshot already has a pending exception
* for this chunk , otherwise it allocates a new one and inserts
* it into the pending table .
*
* NOTE : a write lock must be held on snap - > lock before calling
* this .
*/
2007-07-12 20:26:32 +04:00
static struct dm_snap_pending_exception *
2009-04-02 22:55:25 +04:00
__find_pending_exception ( struct dm_snapshot * s ,
struct dm_snap_pending_exception * pe , chunk_t chunk )
2005-04-17 02:20:36 +04:00
{
2009-04-02 22:55:25 +04:00
struct dm_snap_pending_exception * pe2 ;
2005-04-17 02:20:36 +04:00
2009-04-02 22:55:25 +04:00
pe2 = __lookup_pending_exception ( s , chunk ) ;
if ( pe2 ) {
2006-03-27 13:17:45 +04:00
free_pending_exception ( pe ) ;
2009-04-02 22:55:25 +04:00
return pe2 ;
2005-04-17 02:20:36 +04:00
}
2006-03-27 13:17:45 +04:00
pe - > e . old_chunk = chunk ;
bio_list_init ( & pe - > origin_bios ) ;
bio_list_init ( & pe - > snapshot_bios ) ;
pe - > started = 0 ;
2011-08-02 15:32:04 +04:00
pe - > full_bio = NULL ;
2006-03-27 13:17:45 +04:00
2009-04-02 22:55:31 +04:00
if ( s - > store - > type - > prepare_exception ( s - > store , & pe - > e ) ) {
2006-03-27 13:17:45 +04:00
free_pending_exception ( pe ) ;
return NULL ;
}
dm snapshot: avoid snapshot space leak on crash
There is a possible leak of snapshot space in case of crash.
The reason for space leaking is that chunks in the snapshot device are
allocated sequentially, but they are finished (and stored in the metadata)
out of order, depending on the order in which copying finished.
For example, supposed that the metadata contains the following records
SUPERBLOCK
METADATA (blocks 0 ... 250)
DATA 0
DATA 1
DATA 2
...
DATA 250
Now suppose that you allocate 10 new data blocks 251-260. Suppose that
copying of these blocks finish out of order (block 260 finished first
and the block 251 finished last). Now, the snapshot device looks like
this:
SUPERBLOCK
METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
DATA 0
DATA 1
DATA 2
...
DATA 250
DATA 251
DATA 252
DATA 253
DATA 254
DATA 255
METADATA (blocks 255, 254, 253, 252, 251)
DATA 256
DATA 257
DATA 258
DATA 259
DATA 260
Now, if the machine crashes after writing the first metadata block but
before writing the second metadata block, the space for areas DATA 250-255
is leaked, it contains no valid data and it will never be used in the
future.
This patch makes dm-snapshot complete exceptions in the same order they
were allocated, thus fixing this bug.
Note: when backporting this patch to the stable kernel, change the version
field in the following way:
* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
to {1, 10, 2}
Userspace reads the version to determine if the bug was fixed, so the
version change is needed.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2013-11-30 03:13:37 +04:00
pe - > exception_sequence = s - > exception_start_sequence + + ;
2009-12-11 02:52:11 +03:00
dm_insert_exception ( & s - > pending , & pe - > e ) ;
2006-03-27 13:17:45 +04:00
2005-04-17 02:20:36 +04:00
return pe ;
}
2009-12-11 02:52:10 +03:00
static void remap_exception ( struct dm_snapshot * s , struct dm_exception * e ,
2008-02-08 05:11:27 +03:00
struct bio * bio , chunk_t chunk )
2005-04-17 02:20:36 +04:00
{
2009-12-11 02:52:12 +03:00
bio - > bi_bdev = s - > cow - > bdev ;
2013-10-12 02:44:27 +04:00
bio - > bi_iter . bi_sector =
chunk_to_sector ( s - > store , dm_chunk_number ( e - > new_chunk ) +
( chunk - e - > old_chunk ) ) +
( bio - > bi_iter . bi_sector & s - > store - > chunk_mask ) ;
2005-04-17 02:20:36 +04:00
}
2012-12-22 00:23:41 +04:00
static int snapshot_map ( struct dm_target * ti , struct bio * bio )
2005-04-17 02:20:36 +04:00
{
2009-12-11 02:52:10 +03:00
struct dm_exception * e ;
2007-07-12 20:26:32 +04:00
struct dm_snapshot * s = ti - > private ;
2006-12-08 13:41:06 +03:00
int r = DM_MAPIO_REMAPPED ;
2005-04-17 02:20:36 +04:00
chunk_t chunk ;
2007-07-12 20:26:32 +04:00
struct dm_snap_pending_exception * pe = NULL ;
2005-04-17 02:20:36 +04:00
2012-12-22 00:23:41 +04:00
init_tracked_chunk ( bio ) ;
2016-08-06 00:35:16 +03:00
if ( bio - > bi_opf & REQ_PREFLUSH ) {
2009-12-11 02:52:12 +03:00
bio - > bi_bdev = s - > cow - > bdev ;
2009-06-22 13:12:25 +04:00
return DM_MAPIO_REMAPPED ;
}
2013-10-12 02:44:27 +04:00
chunk = sector_to_chunk ( s - > store , bio - > bi_iter . bi_sector ) ;
2005-04-17 02:20:36 +04:00
/* Full snapshots are not usable */
2006-03-27 13:17:45 +04:00
/* To get here the table must be live so s->active is always set. */
2005-04-17 02:20:36 +04:00
if ( ! s - > valid )
2005-07-13 02:53:01 +04:00
return - EIO ;
2005-04-17 02:20:36 +04:00
2006-10-03 12:15:28 +04:00
/* FIXME: should only take write lock if we need
* to copy an exception */
down_write ( & s - > lock ) ;
2016-07-19 12:28:41 +03:00
if ( ! s - > valid | | ( unlikely ( s - > snapshot_overflowed ) & &
bio_data_dir ( bio ) = = WRITE ) ) {
2006-10-03 12:15:28 +04:00
r = - EIO ;
goto out_unlock ;
}
/* If the block is already remapped - use that, else remap it */
2009-12-11 02:52:11 +03:00
e = dm_lookup_exception ( & s - > complete , chunk ) ;
2006-10-03 12:15:28 +04:00
if ( e ) {
2008-02-08 05:11:27 +03:00
remap_exception ( s , e , bio , chunk ) ;
2006-10-03 12:15:28 +04:00
goto out_unlock ;
}
2005-04-17 02:20:36 +04:00
/*
* Write to snapshot - higher level takes care of RW / RO
* flags so we should only get this if we are
* writeable .
*/
2016-07-19 12:28:41 +03:00
if ( bio_data_dir ( bio ) = = WRITE ) {
2009-04-02 22:55:25 +04:00
pe = __lookup_pending_exception ( s , chunk ) ;
2006-03-27 13:17:45 +04:00
if ( ! pe ) {
2009-04-02 22:55:25 +04:00
up_write ( & s - > lock ) ;
pe = alloc_pending_exception ( s ) ;
down_write ( & s - > lock ) ;
2015-06-21 23:31:33 +03:00
if ( ! s - > valid | | s - > snapshot_overflowed ) {
2009-04-02 22:55:25 +04:00
free_pending_exception ( pe ) ;
r = - EIO ;
goto out_unlock ;
}
2009-12-11 02:52:11 +03:00
e = dm_lookup_exception ( & s - > complete , chunk ) ;
2009-04-02 22:55:26 +04:00
if ( e ) {
free_pending_exception ( pe ) ;
remap_exception ( s , e , bio , chunk ) ;
goto out_unlock ;
}
2009-04-02 22:55:25 +04:00
pe = __find_pending_exception ( s , pe , chunk ) ;
2009-04-02 22:55:25 +04:00
if ( ! pe ) {
2015-10-09 01:05:41 +03:00
if ( s - > store - > userspace_supports_overflow ) {
s - > snapshot_overflowed = 1 ;
DMERR ( " Snapshot overflowed: Unable to allocate exception. " ) ;
} else
__invalidate_snapshot ( s , - ENOMEM ) ;
2009-04-02 22:55:25 +04:00
r = - EIO ;
goto out_unlock ;
}
2005-04-17 02:20:36 +04:00
}
2008-02-08 05:11:27 +03:00
remap_exception ( s , & pe - > e , bio , chunk ) ;
2006-03-27 13:17:45 +04:00
2006-12-08 13:41:06 +03:00
r = DM_MAPIO_SUBMITTED ;
2006-10-03 12:15:28 +04:00
2011-08-02 15:32:04 +04:00
if ( ! pe - > started & &
2013-10-12 02:44:27 +04:00
bio - > bi_iter . bi_size = =
( s - > store - > chunk_size < < SECTOR_SHIFT ) ) {
2011-08-02 15:32:04 +04:00
pe - > started = 1 ;
up_write ( & s - > lock ) ;
start_full_bio ( pe , bio ) ;
goto out ;
}
bio_list_add ( & pe - > snapshot_bios , bio ) ;
2006-03-27 13:17:45 +04:00
if ( ! pe - > started ) {
/* this is protected by snap->lock */
pe - > started = 1 ;
2006-10-03 12:15:28 +04:00
up_write ( & s - > lock ) ;
2006-03-27 13:17:45 +04:00
start_copy ( pe ) ;
2006-10-03 12:15:28 +04:00
goto out ;
}
2008-07-21 15:00:32 +04:00
} else {
2006-10-03 12:15:28 +04:00
bio - > bi_bdev = s - > origin - > bdev ;
2012-12-22 00:23:41 +04:00
track_chunk ( s , bio , chunk ) ;
2008-07-21 15:00:32 +04:00
}
2005-04-17 02:20:36 +04:00
2011-08-02 15:32:03 +04:00
out_unlock :
2006-10-03 12:15:28 +04:00
up_write ( & s - > lock ) ;
2011-08-02 15:32:03 +04:00
out :
2005-04-17 02:20:36 +04:00
return r ;
}
2009-12-11 02:52:31 +03:00
/*
* A snapshot - merge target behaves like a combination of a snapshot
* target and a snapshot - origin target . It only generates new
* exceptions in other snapshots and not in the one that is being
* merged .
*
* For each chunk , if there is an existing exception , it is used to
* redirect I / O to the cow device . Otherwise I / O is sent to the origin ,
* which in turn might generate exceptions in other snapshots .
2009-12-11 02:52:33 +03:00
* If merging is currently taking place on the chunk in question , the
* I / O is deferred by adding it to s - > bios_queued_during_merge .
2009-12-11 02:52:31 +03:00
*/
2012-12-22 00:23:41 +04:00
static int snapshot_merge_map ( struct dm_target * ti , struct bio * bio )
2009-12-11 02:52:31 +03:00
{
struct dm_exception * e ;
struct dm_snapshot * s = ti - > private ;
int r = DM_MAPIO_REMAPPED ;
chunk_t chunk ;
2012-12-22 00:23:41 +04:00
init_tracked_chunk ( bio ) ;
2016-08-06 00:35:16 +03:00
if ( bio - > bi_opf & REQ_PREFLUSH ) {
2013-03-02 02:45:47 +04:00
if ( ! dm_bio_get_target_bio_nr ( bio ) )
2009-12-11 02:52:31 +03:00
bio - > bi_bdev = s - > origin - > bdev ;
else
bio - > bi_bdev = s - > cow - > bdev ;
return DM_MAPIO_REMAPPED ;
}
2013-10-12 02:44:27 +04:00
chunk = sector_to_chunk ( s - > store , bio - > bi_iter . bi_sector ) ;
2009-12-11 02:52:31 +03:00
2009-12-11 02:52:33 +03:00
down_write ( & s - > lock ) ;
2009-12-11 02:52:31 +03:00
2009-12-11 02:52:36 +03:00
/* Full merging snapshots are redirected to the origin */
if ( ! s - > valid )
goto redirect_to_origin ;
2009-12-11 02:52:31 +03:00
/* If the block is already remapped - use that */
e = dm_lookup_exception ( & s - > complete , chunk ) ;
if ( e ) {
2009-12-11 02:52:33 +03:00
/* Queue writes overlapping with chunks being merged */
2016-07-19 12:28:41 +03:00
if ( bio_data_dir ( bio ) = = WRITE & &
2009-12-11 02:52:33 +03:00
chunk > = s - > first_merging_chunk & &
chunk < ( s - > first_merging_chunk +
s - > num_merging_chunks ) ) {
bio - > bi_bdev = s - > origin - > bdev ;
bio_list_add ( & s - > bios_queued_during_merge , bio ) ;
r = DM_MAPIO_SUBMITTED ;
goto out_unlock ;
}
2009-12-11 02:52:33 +03:00
2009-12-11 02:52:31 +03:00
remap_exception ( s , e , bio , chunk ) ;
2009-12-11 02:52:33 +03:00
2016-07-19 12:28:41 +03:00
if ( bio_data_dir ( bio ) = = WRITE )
2012-12-22 00:23:41 +04:00
track_chunk ( s , bio , chunk ) ;
2009-12-11 02:52:31 +03:00
goto out_unlock ;
}
2009-12-11 02:52:36 +03:00
redirect_to_origin :
2009-12-11 02:52:31 +03:00
bio - > bi_bdev = s - > origin - > bdev ;
2016-07-19 12:28:41 +03:00
if ( bio_data_dir ( bio ) = = WRITE ) {
2009-12-11 02:52:33 +03:00
up_write ( & s - > lock ) ;
2009-12-11 02:52:31 +03:00
return do_origin ( s - > origin , bio ) ;
}
out_unlock :
2009-12-11 02:52:33 +03:00
up_write ( & s - > lock ) ;
2009-12-11 02:52:31 +03:00
return r ;
}
2012-12-22 00:23:41 +04:00
static int snapshot_end_io ( struct dm_target * ti , struct bio * bio , int error )
2008-07-21 15:00:32 +04:00
{
struct dm_snapshot * s = ti - > private ;
2012-12-22 00:23:41 +04:00
if ( is_bio_tracked ( bio ) )
stop_tracking_chunk ( s , bio ) ;
2008-07-21 15:00:32 +04:00
return 0 ;
}
2009-12-11 02:52:32 +03:00
static void snapshot_merge_presuspend ( struct dm_target * ti )
{
struct dm_snapshot * s = ti - > private ;
stop_merge ( s ) ;
}
2009-12-11 02:52:24 +03:00
static int snapshot_preresume ( struct dm_target * ti )
{
int r = 0 ;
struct dm_snapshot * s = ti - > private ;
struct dm_snapshot * snap_src = NULL , * snap_dest = NULL ;
down_read ( & _origins_lock ) ;
2009-12-11 02:52:32 +03:00
( void ) __find_snapshots_sharing_cow ( s , & snap_src , & snap_dest , NULL ) ;
2009-12-11 02:52:24 +03:00
if ( snap_src & & snap_dest ) {
down_read ( & snap_src - > lock ) ;
if ( s = = snap_src ) {
DMERR ( " Unable to resume snapshot source until "
" handover completes. " ) ;
r = - EINVAL ;
2011-01-13 22:59:59 +03:00
} else if ( ! dm_suspended ( snap_src - > ti ) ) {
2009-12-11 02:52:24 +03:00
DMERR ( " Unable to perform snapshot handover until "
" source is suspended. " ) ;
r = - EINVAL ;
}
up_read ( & snap_src - > lock ) ;
}
up_read ( & _origins_lock ) ;
return r ;
}
2005-04-17 02:20:36 +04:00
static void snapshot_resume ( struct dm_target * ti )
{
2007-07-12 20:26:32 +04:00
struct dm_snapshot * s = ti - > private ;
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
struct dm_snapshot * snap_src = NULL , * snap_dest = NULL , * snap_merging = NULL ;
2015-02-26 19:40:35 +03:00
struct dm_origin * o ;
struct mapped_device * origin_md = NULL ;
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
bool must_restart_merging = false ;
2009-12-11 02:52:24 +03:00
down_read ( & _origins_lock ) ;
2015-02-26 19:40:35 +03:00
o = __lookup_dm_origin ( s - > origin - > bdev ) ;
if ( o )
origin_md = dm_table_get_md ( o - > ti - > table ) ;
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
if ( ! origin_md ) {
( void ) __find_snapshots_sharing_cow ( s , NULL , NULL , & snap_merging ) ;
if ( snap_merging )
origin_md = dm_table_get_md ( snap_merging - > ti - > table ) ;
}
2015-02-26 19:40:35 +03:00
if ( origin_md = = dm_table_get_md ( ti - > table ) )
origin_md = NULL ;
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
if ( origin_md ) {
if ( dm_hold ( origin_md ) )
origin_md = NULL ;
}
2015-02-26 19:40:35 +03:00
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
up_read ( & _origins_lock ) ;
if ( origin_md ) {
2015-02-26 19:40:35 +03:00
dm_internal_suspend_fast ( origin_md ) ;
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
if ( snap_merging & & test_bit ( RUNNING_MERGE , & snap_merging - > state_bits ) ) {
must_restart_merging = true ;
stop_merge ( snap_merging ) ;
}
}
down_read ( & _origins_lock ) ;
2015-02-26 19:40:35 +03:00
2009-12-11 02:52:32 +03:00
( void ) __find_snapshots_sharing_cow ( s , & snap_src , & snap_dest , NULL ) ;
2009-12-11 02:52:24 +03:00
if ( snap_src & & snap_dest ) {
down_write ( & snap_src - > lock ) ;
down_write_nested ( & snap_dest - > lock , SINGLE_DEPTH_NESTING ) ;
__handover_exceptions ( snap_src , snap_dest ) ;
up_write ( & snap_dest - > lock ) ;
up_write ( & snap_src - > lock ) ;
}
2015-02-26 19:40:35 +03:00
2009-12-11 02:52:24 +03:00
up_read ( & _origins_lock ) ;
dm snapshot: suspend merging snapshot when doing exception handover
The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.
However, a similar problem exists in snapshot merging. When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin". Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.
To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.
Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.
In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear). Then we release _origins_lock, suspend the
device and grab _origins_lock again.
NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
2015-02-26 19:41:28 +03:00
if ( origin_md ) {
if ( must_restart_merging )
start_merge ( snap_merging ) ;
dm_internal_resume_fast ( origin_md ) ;
dm_put ( origin_md ) ;
}
2009-12-11 02:52:24 +03:00
/* Now we have correct chunk size, reregister */
reregister_snapshot ( s ) ;
2005-04-17 02:20:36 +04:00
2006-02-01 14:04:50 +03:00
down_write ( & s - > lock ) ;
s - > active = 1 ;
up_write ( & s - > lock ) ;
2005-04-17 02:20:36 +04:00
}
2012-07-27 18:08:00 +04:00
static uint32_t get_origin_minimum_chunksize ( struct block_device * bdev )
2009-12-11 02:52:32 +03:00
{
2012-07-27 18:08:00 +04:00
uint32_t min_chunksize ;
2009-12-11 02:52:32 +03:00
down_read ( & _origins_lock ) ;
min_chunksize = __minimum_chunk_size ( __lookup_origin ( bdev ) ) ;
up_read ( & _origins_lock ) ;
return min_chunksize ;
}
static void snapshot_merge_resume ( struct dm_target * ti )
{
struct dm_snapshot * s = ti - > private ;
/*
* Handover exceptions from existing snapshot .
*/
snapshot_resume ( ti ) ;
/*
2012-07-27 18:08:00 +04:00
* snapshot - merge acts as an origin , so set ti - > max_io_len
2009-12-11 02:52:32 +03:00
*/
2012-07-27 18:08:00 +04:00
ti - > max_io_len = get_origin_minimum_chunksize ( s - > origin - > bdev ) ;
2009-12-11 02:52:32 +03:00
start_merge ( s ) ;
}
2013-03-02 02:45:44 +04:00
static void snapshot_status ( struct dm_target * ti , status_type_t type ,
unsigned status_flags , char * result , unsigned maxlen )
2005-04-17 02:20:36 +04:00
{
2009-04-02 22:55:34 +04:00
unsigned sz = 0 ;
2007-07-12 20:26:32 +04:00
struct dm_snapshot * snap = ti - > private ;
2005-04-17 02:20:36 +04:00
switch ( type ) {
case STATUSTYPE_INFO :
2009-12-11 02:51:53 +03:00
down_write ( & snap - > lock ) ;
2005-04-17 02:20:36 +04:00
if ( ! snap - > valid )
2009-04-02 22:55:34 +04:00
DMEMIT ( " Invalid " ) ;
2009-12-11 02:52:35 +03:00
else if ( snap - > merge_failed )
DMEMIT ( " Merge failed " ) ;
2015-06-21 23:31:33 +03:00
else if ( snap - > snapshot_overflowed )
DMEMIT ( " Overflow " ) ;
2005-04-17 02:20:36 +04:00
else {
2009-12-11 02:52:11 +03:00
if ( snap - > store - > type - > usage ) {
sector_t total_sectors , sectors_allocated ,
metadata_sectors ;
snap - > store - > type - > usage ( snap - > store ,
& total_sectors ,
& sectors_allocated ,
& metadata_sectors ) ;
DMEMIT ( " %llu/%llu %llu " ,
( unsigned long long ) sectors_allocated ,
( unsigned long long ) total_sectors ,
( unsigned long long ) metadata_sectors ) ;
2005-04-17 02:20:36 +04:00
}
else
2009-04-02 22:55:34 +04:00
DMEMIT ( " Unknown " ) ;
2005-04-17 02:20:36 +04:00
}
2009-12-11 02:51:53 +03:00
up_write ( & snap - > lock ) ;
2005-04-17 02:20:36 +04:00
break ;
case STATUSTYPE_TABLE :
/*
* kdevname returns a static pointer so we need
* to make private copies if the output is to
* make sense .
*/
2009-12-11 02:52:12 +03:00
DMEMIT ( " %s %s " , snap - > origin - > name , snap - > cow - > name ) ;
2009-04-02 22:55:35 +04:00
snap - > store - > type - > status ( snap - > store , type , result + sz ,
maxlen - sz ) ;
2005-04-17 02:20:36 +04:00
break ;
}
}
2009-09-04 23:40:19 +04:00
static int snapshot_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
struct dm_snapshot * snap = ti - > private ;
2010-08-12 07:13:50 +04:00
int r ;
r = fn ( ti , snap - > origin , 0 , ti - > len , data ) ;
2009-09-04 23:40:19 +04:00
2010-08-12 07:13:50 +04:00
if ( ! r )
r = fn ( ti , snap - > cow , 0 , get_dev_size ( snap - > cow - > bdev ) , data ) ;
return r ;
2009-09-04 23:40:19 +04:00
}
2005-04-17 02:20:36 +04:00
/*-----------------------------------------------------------------
* Origin methods
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2009-12-11 02:52:28 +03:00
/*
* If no exceptions need creating , DM_MAPIO_REMAPPED is returned and any
* supplied bio was ignored . The caller may submit it immediately .
* ( No remapping actually occurs as the origin is always a direct linear
* map . )
*
* If further exceptions are required , DM_MAPIO_SUBMITTED is returned
* and any supplied bio is added to a list to be submitted once all
* the necessary exceptions exist .
*/
static int __origin_write ( struct list_head * snapshots , sector_t sector ,
struct bio * bio )
2005-04-17 02:20:36 +04:00
{
2009-12-11 02:52:30 +03:00
int r = DM_MAPIO_REMAPPED ;
2005-04-17 02:20:36 +04:00
struct dm_snapshot * snap ;
2009-12-11 02:52:10 +03:00
struct dm_exception * e ;
2009-12-11 02:52:30 +03:00
struct dm_snap_pending_exception * pe ;
struct dm_snap_pending_exception * pe_to_start_now = NULL ;
struct dm_snap_pending_exception * pe_to_start_last = NULL ;
2005-04-17 02:20:36 +04:00
chunk_t chunk ;
/* Do all the snapshots on this origin */
list_for_each_entry ( snap , snapshots , list ) {
2009-12-11 02:52:31 +03:00
/*
* Don ' t make new exceptions in a merging snapshot
* because it has effectively been deleted
*/
if ( dm_target_is_snapshot_merge ( snap - > ti ) )
continue ;
2006-03-27 13:17:45 +04:00
down_write ( & snap - > lock ) ;
2006-02-01 14:04:50 +03:00
/* Only deal with valid and active snapshots */
if ( ! snap - > valid | | ! snap - > active )
2006-03-27 13:17:45 +04:00
goto next_snapshot ;
2005-04-17 02:20:36 +04:00
2005-07-13 02:53:05 +04:00
/* Nothing to do if writing beyond end of snapshot */
2009-12-11 02:52:28 +03:00
if ( sector > = dm_table_get_size ( snap - > ti - > table ) )
2006-03-27 13:17:45 +04:00
goto next_snapshot ;
2005-04-17 02:20:36 +04:00
/*
* Remember , different snapshots can have
* different chunk sizes .
*/
2009-12-11 02:52:28 +03:00
chunk = sector_to_chunk ( snap - > store , sector ) ;
2005-04-17 02:20:36 +04:00
/*
* Check exception table to see if block
* is already remapped in this snapshot
* and trigger an exception if not .
*/
2009-12-11 02:52:11 +03:00
e = dm_lookup_exception ( & snap - > complete , chunk ) ;
2006-03-27 13:17:45 +04:00
if ( e )
goto next_snapshot ;
2009-04-02 22:55:25 +04:00
pe = __lookup_pending_exception ( snap , chunk ) ;
2006-03-27 13:17:45 +04:00
if ( ! pe ) {
2009-04-02 22:55:25 +04:00
up_write ( & snap - > lock ) ;
pe = alloc_pending_exception ( snap ) ;
down_write ( & snap - > lock ) ;
if ( ! snap - > valid ) {
free_pending_exception ( pe ) ;
goto next_snapshot ;
}
2009-12-11 02:52:11 +03:00
e = dm_lookup_exception ( & snap - > complete , chunk ) ;
2009-04-02 22:55:26 +04:00
if ( e ) {
free_pending_exception ( pe ) ;
goto next_snapshot ;
}
2009-04-02 22:55:25 +04:00
pe = __find_pending_exception ( snap , pe , chunk ) ;
2009-04-02 22:55:25 +04:00
if ( ! pe ) {
__invalidate_snapshot ( snap , - ENOMEM ) ;
goto next_snapshot ;
}
2006-03-27 13:17:45 +04:00
}
2009-12-11 02:52:30 +03:00
r = DM_MAPIO_SUBMITTED ;
2006-03-27 13:17:45 +04:00
2009-12-11 02:52:30 +03:00
/*
* If an origin bio was supplied , queue it to wait for the
* completion of this exception , and start this one last ,
* at the end of the function .
*/
if ( bio ) {
bio_list_add ( & pe - > origin_bios , bio ) ;
bio = NULL ;
2006-03-27 13:17:45 +04:00
2009-12-11 02:52:30 +03:00
if ( ! pe - > started ) {
pe - > started = 1 ;
pe_to_start_last = pe ;
}
2006-03-27 13:17:45 +04:00
}
if ( ! pe - > started ) {
pe - > started = 1 ;
2009-12-11 02:52:30 +03:00
pe_to_start_now = pe ;
2005-04-17 02:20:36 +04:00
}
2011-08-02 15:32:03 +04:00
next_snapshot :
2005-04-17 02:20:36 +04:00
up_write ( & snap - > lock ) ;
2009-12-11 02:52:30 +03:00
if ( pe_to_start_now ) {
start_copy ( pe_to_start_now ) ;
pe_to_start_now = NULL ;
}
2006-03-27 13:17:44 +04:00
}
2005-04-17 02:20:36 +04:00
/*
2009-12-11 02:52:30 +03:00
* Submit the exception against which the bio is queued last ,
* to give the other exceptions a head start .
2005-04-17 02:20:36 +04:00
*/
2009-12-11 02:52:30 +03:00
if ( pe_to_start_last )
start_copy ( pe_to_start_last ) ;
2005-04-17 02:20:36 +04:00
return r ;
}
/*
* Called on a write from the origin driver .
*/
static int do_origin ( struct dm_dev * origin , struct bio * bio )
{
struct origin * o ;
2006-12-08 13:41:06 +03:00
int r = DM_MAPIO_REMAPPED ;
2005-04-17 02:20:36 +04:00
down_read ( & _origins_lock ) ;
o = __lookup_origin ( origin - > bdev ) ;
if ( o )
2013-10-12 02:44:27 +04:00
r = __origin_write ( & o - > snapshots , bio - > bi_iter . bi_sector , bio ) ;
2005-04-17 02:20:36 +04:00
up_read ( & _origins_lock ) ;
return r ;
}
2009-12-11 02:52:34 +03:00
/*
* Trigger exceptions in all non - merging snapshots .
*
* The chunk size of the merging snapshot may be larger than the chunk
* size of some other snapshot so we may need to reallocate multiple
* chunks in other snapshots .
*
* We scan all the overlapping exceptions in the other snapshots .
* Returns 1 if anything was reallocated and must be waited for ,
* otherwise returns 0.
*
* size must be a multiple of merging_snap ' s chunk_size .
*/
static int origin_write_extent ( struct dm_snapshot * merging_snap ,
sector_t sector , unsigned size )
{
int must_wait = 0 ;
sector_t n ;
struct origin * o ;
/*
2012-07-27 18:08:00 +04:00
* The origin ' s __minimum_chunk_size ( ) got stored in max_io_len
2009-12-11 02:52:34 +03:00
* by snapshot_merge_resume ( ) .
*/
down_read ( & _origins_lock ) ;
o = __lookup_origin ( merging_snap - > origin - > bdev ) ;
2012-07-27 18:08:00 +04:00
for ( n = 0 ; n < size ; n + = merging_snap - > ti - > max_io_len )
2009-12-11 02:52:34 +03:00
if ( __origin_write ( & o - > snapshots , sector + n , NULL ) = =
DM_MAPIO_SUBMITTED )
must_wait = 1 ;
up_read ( & _origins_lock ) ;
return must_wait ;
}
2005-04-17 02:20:36 +04:00
/*
* Origin : maps a linear range of a device , with hooks for snapshotting .
*/
/*
* Construct an origin mapping : < dev_path >
* The context for an origin is merely a ' struct dm_dev * '
* pointing to the real device .
*/
static int origin_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
int r ;
2014-03-15 02:42:12 +04:00
struct dm_origin * o ;
2005-04-17 02:20:36 +04:00
if ( argc ! = 1 ) {
2006-06-26 11:27:35 +04:00
ti - > error = " origin: incorrect number of arguments " ;
2005-04-17 02:20:36 +04:00
return - EINVAL ;
}
2014-03-15 02:42:12 +04:00
o = kmalloc ( sizeof ( struct dm_origin ) , GFP_KERNEL ) ;
if ( ! o ) {
ti - > error = " Cannot allocate private origin structure " ;
r = - ENOMEM ;
goto bad_alloc ;
}
r = dm_get_device ( ti , argv [ 0 ] , dm_table_get_mode ( ti - > table ) , & o - > dev ) ;
2005-04-17 02:20:36 +04:00
if ( r ) {
ti - > error = " Cannot get target device " ;
2014-03-15 02:42:12 +04:00
goto bad_open ;
2005-04-17 02:20:36 +04:00
}
2015-02-26 19:40:35 +03:00
o - > ti = ti ;
2014-03-15 02:42:12 +04:00
ti - > private = o ;
2013-03-02 02:45:47 +04:00
ti - > num_flush_bios = 1 ;
2009-06-22 13:12:25 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
2014-03-15 02:42:12 +04:00
bad_open :
kfree ( o ) ;
bad_alloc :
return r ;
2005-04-17 02:20:36 +04:00
}
static void origin_dtr ( struct dm_target * ti )
{
2014-03-15 02:42:12 +04:00
struct dm_origin * o = ti - > private ;
2015-02-26 19:40:35 +03:00
2014-03-15 02:42:12 +04:00
dm_put_device ( ti , o - > dev ) ;
kfree ( o ) ;
2005-04-17 02:20:36 +04:00
}
2012-12-22 00:23:41 +04:00
static int origin_map ( struct dm_target * ti , struct bio * bio )
2005-04-17 02:20:36 +04:00
{
2014-03-15 02:42:12 +04:00
struct dm_origin * o = ti - > private ;
2014-03-15 02:43:07 +04:00
unsigned available_sectors ;
2005-04-17 02:20:36 +04:00
2014-03-15 02:42:12 +04:00
bio - > bi_bdev = o - > dev - > bdev ;
2005-04-17 02:20:36 +04:00
2016-08-06 00:35:16 +03:00
if ( unlikely ( bio - > bi_opf & REQ_PREFLUSH ) )
2009-06-22 13:12:25 +04:00
return DM_MAPIO_REMAPPED ;
2016-07-19 12:28:41 +03:00
if ( bio_data_dir ( bio ) ! = WRITE )
2009-06-22 13:12:25 +04:00
return DM_MAPIO_REMAPPED ;
2014-03-15 02:43:07 +04:00
available_sectors = o - > split_boundary -
( ( unsigned ) bio - > bi_iter . bi_sector & ( o - > split_boundary - 1 ) ) ;
if ( bio_sectors ( bio ) > available_sectors )
dm_accept_partial_bio ( bio , available_sectors ) ;
2005-04-17 02:20:36 +04:00
/* Only tell snapshots if this is a write */
2014-03-15 02:43:07 +04:00
return do_origin ( o - > dev , bio ) ;
2005-04-17 02:20:36 +04:00
}
2016-06-28 22:37:16 +03:00
static long origin_direct_access ( struct dm_target * ti , sector_t sector ,
libnvdimm for 4.8
1/ Replace pcommit with ADR / directed-flushing:
The pcommit instruction, which has not shipped on any product, is
deprecated. Instead, the requirement is that platforms implement either
ADR, or provide one or more flush addresses per nvdimm. ADR
(Asynchronous DRAM Refresh) flushes data in posted write buffers to the
memory controller on a power-fail event. Flush addresses are defined in
ACPI 6.x as an NVDIMM Firmware Interface Table (NFIT) sub-structure:
"Flush Hint Address Structure". A flush hint is an mmio address that
when written and fenced assures that all previous posted writes
targeting a given dimm have been flushed to media.
2/ On-demand ARS (address range scrub):
Linux uses the results of the ACPI ARS commands to track bad blocks
in pmem devices. When latent errors are detected we re-scrub the media
to refresh the bad block list, userspace can also request a re-scrub at
any time.
3/ Support for the Microsoft DSM (device specific method) command format.
4/ Support for EDK2/OVMF virtual disk device memory ranges.
5/ Various fixes and cleanups across the subsystem.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQIcBAABAgAGBQJXmXBsAAoJEB7SkWpmfYgCEwwP/1IOt9ocP+iHLMDH9KE7VaTZ
NmUDR+Zy6g5cRQM7SgcuU5BXUcx+OsSrSrUTVF1cW994o9Gbz1mFotkv0ZAsPcYY
ZVRQxo2oqHrssyOcg+PsgKWiXn68rJOCgmpEyzaJywl5qTMst7pzsT1s1f7rSh6h
trCf4VaJJwxZR8fARGtlHUnnhPe2Orp99EZRKEWprAsIv2kPuWpPHSjRjuEgN1JG
KW8AYwWqFTtiLRUk86I4KBB0wcDrfctsjgN9Ogd6+aHyQBRnVSr2U+vDCFkC8KLu
qiDCpYp+yyxBjclnljz7tRRT3GtzfCUWd4v2KVWqgg2IaobUc0Lbukp/rmikUXQP
WLikT2OCQ994eFK5OX3Q3cIU/4j459TQnof8q14yVSpjAKrNUXVSR5puN7Hxa+V7
41wKrAsnsyY1oq+Yd/rMR8VfH7PHx3bFkrmRCGZCufLX1UQm4aYj+sWagDKiV3yA
DiudghbOnhfurfGsnXUVw7y7GKs+gNWNBmB6ndAD6ZEHmKoGUhAEbJDLCc3DnANl
b/2mv1MIdIcC1DlCmnbbcn6fv6bICe/r8poK3VrCK3UgOq/EOvKIWl7giP+k1JuC
6DdVYhlNYIVFXUNSLFAwz8OkLu8byx7WDm36iEqrKHtPw+8qa/2bWVgOU6OBgpjV
cN3edFVIdxvZeMgM5Ubq
=xCBG
-----END PGP SIGNATURE-----
Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
- Replace pcommit with ADR / directed-flushing.
The pcommit instruction, which has not shipped on any product, is
deprecated. Instead, the requirement is that platforms implement
either ADR, or provide one or more flush addresses per nvdimm.
ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers
to the memory controller on a power-fail event.
Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware
Interface Table (NFIT) sub-structure: "Flush Hint Address Structure".
A flush hint is an mmio address that when written and fenced assures
that all previous posted writes targeting a given dimm have been
flushed to media.
- On-demand ARS (address range scrub).
Linux uses the results of the ACPI ARS commands to track bad blocks
in pmem devices. When latent errors are detected we re-scrub the
media to refresh the bad block list, userspace can also request a
re-scrub at any time.
- Support for the Microsoft DSM (device specific method) command
format.
- Support for EDK2/OVMF virtual disk device memory ranges.
- Various fixes and cleanups across the subsystem.
* tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits)
libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register"
nfit: do an ARS scrub on hitting a latent media error
nfit: move to nfit/ sub-directory
nfit, libnvdimm: allow an ARS scrub to be triggered on demand
libnvdimm: register nvdimm_bus devices with an nd_bus driver
pmem: clarify a debug print in pmem_clear_poison
x86/insn: remove pcommit
Revert "KVM: x86: add pcommit support"
nfit, tools/testing/nvdimm/: unify shutdown paths
libnvdimm: move ->module to struct nvdimm_bus_descriptor
nfit: cleanup acpi_nfit_init calling convention
nfit: fix _FIT evaluation memory leak + use after free
tools/testing/nvdimm: add manufacturing_{date|location} dimm properties
tools/testing/nvdimm: add virtual ramdisk range
acpi, nfit: treat virtual ramdisk SPA as pmem region
pmem: kill __pmem address space
pmem: kill wmb_pmem()
libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes
fs/dax: remove wmb_pmem()
libnvdimm, pmem: flush posted-write queues on shutdown
...
2016-07-29 03:22:07 +03:00
void * * kaddr , pfn_t * pfn , long size )
2016-06-28 22:37:16 +03:00
{
DMWARN ( " device does not support dax. " ) ;
return - EIO ;
}
2005-04-17 02:20:36 +04:00
/*
2012-07-27 18:08:00 +04:00
* Set the target " max_io_len " field to the minimum of all the snapshots '
2005-04-17 02:20:36 +04:00
* chunk sizes .
*/
static void origin_resume ( struct dm_target * ti )
{
2014-03-15 02:42:12 +04:00
struct dm_origin * o = ti - > private ;
2005-04-17 02:20:36 +04:00
2014-03-15 02:43:07 +04:00
o - > split_boundary = get_origin_minimum_chunksize ( o - > dev - > bdev ) ;
2015-02-26 19:40:35 +03:00
down_write ( & _origins_lock ) ;
__insert_dm_origin ( o ) ;
up_write ( & _origins_lock ) ;
}
static void origin_postsuspend ( struct dm_target * ti )
{
struct dm_origin * o = ti - > private ;
down_write ( & _origins_lock ) ;
__remove_dm_origin ( o ) ;
up_write ( & _origins_lock ) ;
2005-04-17 02:20:36 +04:00
}
2013-03-02 02:45:44 +04:00
static void origin_status ( struct dm_target * ti , status_type_t type ,
unsigned status_flags , char * result , unsigned maxlen )
2005-04-17 02:20:36 +04:00
{
2014-03-15 02:42:12 +04:00
struct dm_origin * o = ti - > private ;
2005-04-17 02:20:36 +04:00
switch ( type ) {
case STATUSTYPE_INFO :
result [ 0 ] = ' \0 ' ;
break ;
case STATUSTYPE_TABLE :
2014-03-15 02:42:12 +04:00
snprintf ( result , maxlen , " %s " , o - > dev - > name ) ;
2005-04-17 02:20:36 +04:00
break ;
}
}
2009-09-04 23:40:19 +04:00
static int origin_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
2014-03-15 02:42:12 +04:00
struct dm_origin * o = ti - > private ;
2009-09-04 23:40:19 +04:00
2014-03-15 02:42:12 +04:00
return fn ( ti , o - > dev , 0 , ti - > len , data ) ;
2009-09-04 23:40:19 +04:00
}
2005-04-17 02:20:36 +04:00
static struct target_type origin_target = {
. name = " snapshot-origin " ,
2015-02-26 19:40:35 +03:00
. version = { 1 , 9 , 0 } ,
2005-04-17 02:20:36 +04:00
. module = THIS_MODULE ,
. ctr = origin_ctr ,
. dtr = origin_dtr ,
. map = origin_map ,
. resume = origin_resume ,
2015-02-26 19:40:35 +03:00
. postsuspend = origin_postsuspend ,
2005-04-17 02:20:36 +04:00
. status = origin_status ,
2009-09-04 23:40:19 +04:00
. iterate_devices = origin_iterate_devices ,
2016-06-28 22:37:16 +03:00
. direct_access = origin_direct_access ,
2005-04-17 02:20:36 +04:00
} ;
static struct target_type snapshot_target = {
. name = " snapshot " ,
2015-10-09 01:05:41 +03:00
. version = { 1 , 15 , 0 } ,
2005-04-17 02:20:36 +04:00
. module = THIS_MODULE ,
. ctr = snapshot_ctr ,
. dtr = snapshot_dtr ,
. map = snapshot_map ,
2008-07-21 15:00:32 +04:00
. end_io = snapshot_end_io ,
2009-12-11 02:52:24 +03:00
. preresume = snapshot_preresume ,
2005-04-17 02:20:36 +04:00
. resume = snapshot_resume ,
. status = snapshot_status ,
2009-09-04 23:40:19 +04:00
. iterate_devices = snapshot_iterate_devices ,
2005-04-17 02:20:36 +04:00
} ;
2009-12-11 02:52:30 +03:00
static struct target_type merge_target = {
. name = dm_snapshot_merge_target_name ,
2015-10-09 01:05:41 +03:00
. version = { 1 , 4 , 0 } ,
2009-12-11 02:52:30 +03:00
. module = THIS_MODULE ,
. ctr = snapshot_ctr ,
. dtr = snapshot_dtr ,
2009-12-11 02:52:31 +03:00
. map = snapshot_merge_map ,
2009-12-11 02:52:30 +03:00
. end_io = snapshot_end_io ,
2009-12-11 02:52:32 +03:00
. presuspend = snapshot_merge_presuspend ,
2009-12-11 02:52:30 +03:00
. preresume = snapshot_preresume ,
2009-12-11 02:52:32 +03:00
. resume = snapshot_merge_resume ,
2009-12-11 02:52:30 +03:00
. status = snapshot_status ,
. iterate_devices = snapshot_iterate_devices ,
} ;
2005-04-17 02:20:36 +04:00
static int __init dm_snapshot_init ( void )
{
int r ;
2009-01-06 06:05:17 +03:00
r = dm_exception_store_init ( ) ;
if ( r ) {
DMERR ( " Failed to initialize exception stores " ) ;
return r ;
}
2005-04-17 02:20:36 +04:00
r = dm_register_target ( & snapshot_target ) ;
2009-12-11 02:52:30 +03:00
if ( r < 0 ) {
2005-04-17 02:20:36 +04:00
DMERR ( " snapshot target register failed %d " , r ) ;
2009-10-17 02:18:14 +04:00
goto bad_register_snapshot_target ;
2005-04-17 02:20:36 +04:00
}
r = dm_register_target ( & origin_target ) ;
if ( r < 0 ) {
2006-06-26 11:27:35 +04:00
DMERR ( " Origin target register failed %d " , r ) ;
2009-12-11 02:52:30 +03:00
goto bad_register_origin_target ;
}
r = dm_register_target ( & merge_target ) ;
if ( r < 0 ) {
DMERR ( " Merge target register failed %d " , r ) ;
goto bad_register_merge_target ;
2005-04-17 02:20:36 +04:00
}
r = init_origin_hash ( ) ;
if ( r ) {
DMERR ( " init_origin_hash failed. " ) ;
2009-12-11 02:52:30 +03:00
goto bad_origin_hash ;
2005-04-17 02:20:36 +04:00
}
2009-12-11 02:52:10 +03:00
exception_cache = KMEM_CACHE ( dm_exception , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( ! exception_cache ) {
DMERR ( " Couldn't create exception cache. " ) ;
r = - ENOMEM ;
2009-12-11 02:52:30 +03:00
goto bad_exception_cache ;
2005-04-17 02:20:36 +04:00
}
2007-07-12 20:26:32 +04:00
pending_cache = KMEM_CACHE ( dm_snap_pending_exception , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( ! pending_cache ) {
DMERR ( " Couldn't create pending cache. " ) ;
r = - ENOMEM ;
2009-12-11 02:52:30 +03:00
goto bad_pending_cache ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
2009-12-11 02:52:30 +03:00
bad_pending_cache :
2005-04-17 02:20:36 +04:00
kmem_cache_destroy ( exception_cache ) ;
2009-12-11 02:52:30 +03:00
bad_exception_cache :
2005-04-17 02:20:36 +04:00
exit_origin_hash ( ) ;
2009-12-11 02:52:30 +03:00
bad_origin_hash :
dm_unregister_target ( & merge_target ) ;
bad_register_merge_target :
2005-04-17 02:20:36 +04:00
dm_unregister_target ( & origin_target ) ;
2009-12-11 02:52:30 +03:00
bad_register_origin_target :
2005-04-17 02:20:36 +04:00
dm_unregister_target ( & snapshot_target ) ;
2009-10-17 02:18:14 +04:00
bad_register_snapshot_target :
dm_exception_store_exit ( ) ;
2009-12-11 02:52:30 +03:00
2005-04-17 02:20:36 +04:00
return r ;
}
static void __exit dm_snapshot_exit ( void )
{
2009-01-06 06:04:58 +03:00
dm_unregister_target ( & snapshot_target ) ;
dm_unregister_target ( & origin_target ) ;
2009-12-11 02:52:30 +03:00
dm_unregister_target ( & merge_target ) ;
2005-04-17 02:20:36 +04:00
exit_origin_hash ( ) ;
kmem_cache_destroy ( pending_cache ) ;
kmem_cache_destroy ( exception_cache ) ;
2009-01-06 06:05:17 +03:00
dm_exception_store_exit ( ) ;
2005-04-17 02:20:36 +04:00
}
/* Module hooks */
module_init ( dm_snapshot_init ) ;
module_exit ( dm_snapshot_exit ) ;
MODULE_DESCRIPTION ( DM_NAME " snapshot target " ) ;
MODULE_AUTHOR ( " Joe Thornber " ) ;
MODULE_LICENSE ( " GPL " ) ;
2013-03-02 02:45:47 +04:00
MODULE_ALIAS ( " dm-snapshot-origin " ) ;
MODULE_ALIAS ( " dm-snapshot-merge " ) ;