2012-11-05 17:33:06 +01:00
/*
* Copyright ( C ) STRATO AG 2012. All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/sched.h>
# include <linux/bio.h>
# include <linux/slab.h>
# include <linux/buffer_head.h>
# include <linux/blkdev.h>
# include <linux/random.h>
# include <linux/iocontext.h>
# include <linux/capability.h>
# include <linux/kthread.h>
# include <linux/math64.h>
# include <asm/div64.h>
# include "compat.h"
# include "ctree.h"
# include "extent_map.h"
# include "disk-io.h"
# include "transaction.h"
# include "print-tree.h"
# include "volumes.h"
# include "async-thread.h"
# include "check-integrity.h"
# include "rcu-string.h"
# include "dev-replace.h"
static u64 btrfs_get_seconds_since_1970 ( void ) ;
static int btrfs_dev_replace_finishing ( struct btrfs_fs_info * fs_info ,
int scrub_ret ) ;
static void btrfs_dev_replace_update_device_in_mapping_tree (
struct btrfs_fs_info * fs_info ,
struct btrfs_device * srcdev ,
struct btrfs_device * tgtdev ) ;
static int btrfs_dev_replace_find_srcdev ( struct btrfs_root * root , u64 srcdevid ,
char * srcdev_name ,
struct btrfs_device * * device ) ;
static u64 __btrfs_dev_replace_cancel ( struct btrfs_fs_info * fs_info ) ;
static int btrfs_dev_replace_kthread ( void * data ) ;
static int btrfs_dev_replace_continue_on_mount ( struct btrfs_fs_info * fs_info ) ;
int btrfs_init_dev_replace ( struct btrfs_fs_info * fs_info )
{
struct btrfs_key key ;
struct btrfs_root * dev_root = fs_info - > dev_root ;
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
struct extent_buffer * eb ;
int slot ;
int ret = 0 ;
struct btrfs_path * path = NULL ;
int item_size ;
struct btrfs_dev_replace_item * ptr ;
u64 src_devid ;
path = btrfs_alloc_path ( ) ;
if ( ! path ) {
ret = - ENOMEM ;
goto out ;
}
key . objectid = 0 ;
key . type = BTRFS_DEV_REPLACE_KEY ;
key . offset = 0 ;
ret = btrfs_search_slot ( NULL , dev_root , & key , path , 0 , 0 ) ;
if ( ret ) {
no_valid_dev_replace_entry_found :
ret = 0 ;
dev_replace - > replace_state =
BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED ;
dev_replace - > cont_reading_from_srcdev_mode =
BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS ;
dev_replace - > replace_state = 0 ;
dev_replace - > time_started = 0 ;
dev_replace - > time_stopped = 0 ;
atomic64_set ( & dev_replace - > num_write_errors , 0 ) ;
atomic64_set ( & dev_replace - > num_uncorrectable_read_errors , 0 ) ;
dev_replace - > cursor_left = 0 ;
dev_replace - > committed_cursor_left = 0 ;
dev_replace - > cursor_left_last_write_of_item = 0 ;
dev_replace - > cursor_right = 0 ;
dev_replace - > srcdev = NULL ;
dev_replace - > tgtdev = NULL ;
dev_replace - > is_valid = 0 ;
dev_replace - > item_needs_writeback = 0 ;
goto out ;
}
slot = path - > slots [ 0 ] ;
eb = path - > nodes [ 0 ] ;
item_size = btrfs_item_size_nr ( eb , slot ) ;
ptr = btrfs_item_ptr ( eb , slot , struct btrfs_dev_replace_item ) ;
if ( item_size ! = sizeof ( struct btrfs_dev_replace_item ) ) {
pr_warn ( " btrfs: dev_replace entry found has unexpected size, ignore entry \n " ) ;
goto no_valid_dev_replace_entry_found ;
}
src_devid = btrfs_dev_replace_src_devid ( eb , ptr ) ;
dev_replace - > cont_reading_from_srcdev_mode =
btrfs_dev_replace_cont_reading_from_srcdev_mode ( eb , ptr ) ;
dev_replace - > replace_state = btrfs_dev_replace_replace_state ( eb , ptr ) ;
dev_replace - > time_started = btrfs_dev_replace_time_started ( eb , ptr ) ;
dev_replace - > time_stopped =
btrfs_dev_replace_time_stopped ( eb , ptr ) ;
atomic64_set ( & dev_replace - > num_write_errors ,
btrfs_dev_replace_num_write_errors ( eb , ptr ) ) ;
atomic64_set ( & dev_replace - > num_uncorrectable_read_errors ,
btrfs_dev_replace_num_uncorrectable_read_errors ( eb , ptr ) ) ;
dev_replace - > cursor_left = btrfs_dev_replace_cursor_left ( eb , ptr ) ;
dev_replace - > committed_cursor_left = dev_replace - > cursor_left ;
dev_replace - > cursor_left_last_write_of_item = dev_replace - > cursor_left ;
dev_replace - > cursor_right = btrfs_dev_replace_cursor_right ( eb , ptr ) ;
dev_replace - > is_valid = 1 ;
dev_replace - > item_needs_writeback = 0 ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
dev_replace - > srcdev = NULL ;
dev_replace - > tgtdev = NULL ;
break ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
dev_replace - > srcdev = btrfs_find_device ( fs_info , src_devid ,
NULL , NULL ) ;
dev_replace - > tgtdev = btrfs_find_device ( fs_info ,
BTRFS_DEV_REPLACE_DEVID ,
NULL , NULL ) ;
/*
* allow ' btrfs dev replace_cancel ' if src / tgt device is
* missing
*/
if ( ! dev_replace - > srcdev & &
! btrfs_test_opt ( dev_root , DEGRADED ) ) {
ret = - EIO ;
pr_warn ( " btrfs: cannot mount because device replace operation is ongoing and \n " " srcdev (devid %llu) is missing, need to run 'btrfs dev scan'? \n " ,
( unsigned long long ) src_devid ) ;
}
if ( ! dev_replace - > tgtdev & &
! btrfs_test_opt ( dev_root , DEGRADED ) ) {
ret = - EIO ;
pr_warn ( " btrfs: cannot mount because device replace operation is ongoing and \n " " tgtdev (devid %llu) is missing, need to run btrfs dev scan? \n " ,
( unsigned long long ) BTRFS_DEV_REPLACE_DEVID ) ;
}
if ( dev_replace - > tgtdev ) {
if ( dev_replace - > srcdev ) {
dev_replace - > tgtdev - > total_bytes =
dev_replace - > srcdev - > total_bytes ;
dev_replace - > tgtdev - > disk_total_bytes =
dev_replace - > srcdev - > disk_total_bytes ;
dev_replace - > tgtdev - > bytes_used =
dev_replace - > srcdev - > bytes_used ;
}
dev_replace - > tgtdev - > is_tgtdev_for_dev_replace = 1 ;
btrfs_init_dev_replace_tgtdev_for_resume ( fs_info ,
dev_replace - > tgtdev ) ;
}
break ;
}
out :
if ( path )
btrfs_free_path ( path ) ;
return ret ;
}
/*
* called from commit_transaction . Writes changed device replace state to
* disk .
*/
int btrfs_run_dev_replace ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info )
{
int ret ;
struct btrfs_root * dev_root = fs_info - > dev_root ;
struct btrfs_path * path ;
struct btrfs_key key ;
struct extent_buffer * eb ;
struct btrfs_dev_replace_item * ptr ;
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
btrfs_dev_replace_lock ( dev_replace ) ;
if ( ! dev_replace - > is_valid | |
! dev_replace - > item_needs_writeback ) {
btrfs_dev_replace_unlock ( dev_replace ) ;
return 0 ;
}
btrfs_dev_replace_unlock ( dev_replace ) ;
key . objectid = 0 ;
key . type = BTRFS_DEV_REPLACE_KEY ;
key . offset = 0 ;
path = btrfs_alloc_path ( ) ;
if ( ! path ) {
ret = - ENOMEM ;
goto out ;
}
ret = btrfs_search_slot ( trans , dev_root , & key , path , - 1 , 1 ) ;
if ( ret < 0 ) {
pr_warn ( " btrfs: error %d while searching for dev_replace item! \n " ,
ret ) ;
goto out ;
}
if ( ret = = 0 & &
btrfs_item_size_nr ( path - > nodes [ 0 ] , path - > slots [ 0 ] ) < sizeof ( * ptr ) ) {
/*
* need to delete old one and insert a new one .
* Since no attempt is made to recover any old state , if the
* dev_replace state is ' running ' , the data on the target
* drive is lost .
* It would be possible to recover the state : just make sure
* that the beginning of the item is never changed and always
* contains all the essential information . Then read this
* minimal set of information and use it as a base for the
* new state .
*/
ret = btrfs_del_item ( trans , dev_root , path ) ;
if ( ret ! = 0 ) {
pr_warn ( " btrfs: delete too small dev_replace item failed %d! \n " ,
ret ) ;
goto out ;
}
ret = 1 ;
}
if ( ret = = 1 ) {
/* need to insert a new item */
btrfs_release_path ( path ) ;
ret = btrfs_insert_empty_item ( trans , dev_root , path ,
& key , sizeof ( * ptr ) ) ;
if ( ret < 0 ) {
pr_warn ( " btrfs: insert dev_replace item failed %d! \n " ,
ret ) ;
goto out ;
}
}
eb = path - > nodes [ 0 ] ;
ptr = btrfs_item_ptr ( eb , path - > slots [ 0 ] ,
struct btrfs_dev_replace_item ) ;
btrfs_dev_replace_lock ( dev_replace ) ;
if ( dev_replace - > srcdev )
btrfs_set_dev_replace_src_devid ( eb , ptr ,
dev_replace - > srcdev - > devid ) ;
else
btrfs_set_dev_replace_src_devid ( eb , ptr , ( u64 ) - 1 ) ;
btrfs_set_dev_replace_cont_reading_from_srcdev_mode ( eb , ptr ,
dev_replace - > cont_reading_from_srcdev_mode ) ;
btrfs_set_dev_replace_replace_state ( eb , ptr ,
dev_replace - > replace_state ) ;
btrfs_set_dev_replace_time_started ( eb , ptr , dev_replace - > time_started ) ;
btrfs_set_dev_replace_time_stopped ( eb , ptr , dev_replace - > time_stopped ) ;
btrfs_set_dev_replace_num_write_errors ( eb , ptr ,
atomic64_read ( & dev_replace - > num_write_errors ) ) ;
btrfs_set_dev_replace_num_uncorrectable_read_errors ( eb , ptr ,
atomic64_read ( & dev_replace - > num_uncorrectable_read_errors ) ) ;
dev_replace - > cursor_left_last_write_of_item =
dev_replace - > cursor_left ;
btrfs_set_dev_replace_cursor_left ( eb , ptr ,
dev_replace - > cursor_left_last_write_of_item ) ;
btrfs_set_dev_replace_cursor_right ( eb , ptr ,
dev_replace - > cursor_right ) ;
dev_replace - > item_needs_writeback = 0 ;
btrfs_dev_replace_unlock ( dev_replace ) ;
btrfs_mark_buffer_dirty ( eb ) ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
void btrfs_after_dev_replace_commit ( struct btrfs_fs_info * fs_info )
{
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
dev_replace - > committed_cursor_left =
dev_replace - > cursor_left_last_write_of_item ;
}
static u64 btrfs_get_seconds_since_1970 ( void )
{
struct timespec t = CURRENT_TIME_SEC ;
return t . tv_sec ;
}
int btrfs_dev_replace_start ( struct btrfs_root * root ,
struct btrfs_ioctl_dev_replace_args * args )
{
struct btrfs_trans_handle * trans ;
struct btrfs_fs_info * fs_info = root - > fs_info ;
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
int ret ;
struct btrfs_device * tgt_device = NULL ;
struct btrfs_device * src_device = NULL ;
2013-05-07 17:28:03 +00:00
if ( btrfs_fs_incompat ( fs_info , RAID56 ) ) {
pr_warn ( " btrfs: dev_replace cannot yet handle RAID5/RAID6 \n " ) ;
return - EINVAL ;
}
2012-11-05 17:33:06 +01:00
switch ( args - > start . cont_reading_from_srcdev_mode ) {
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS :
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID :
break ;
default :
return - EINVAL ;
}
if ( ( args - > start . srcdevid = = 0 & & args - > start . srcdev_name [ 0 ] = = ' \0 ' ) | |
args - > start . tgtdev_name [ 0 ] = = ' \0 ' )
return - EINVAL ;
mutex_lock ( & fs_info - > volume_mutex ) ;
ret = btrfs_init_dev_replace_tgtdev ( root , args - > start . tgtdev_name ,
& tgt_device ) ;
if ( ret ) {
pr_err ( " btrfs: target device %s is invalid! \n " ,
args - > start . tgtdev_name ) ;
mutex_unlock ( & fs_info - > volume_mutex ) ;
return - EINVAL ;
}
ret = btrfs_dev_replace_find_srcdev ( root , args - > start . srcdevid ,
args - > start . srcdev_name ,
& src_device ) ;
mutex_unlock ( & fs_info - > volume_mutex ) ;
if ( ret ) {
ret = - EINVAL ;
goto leave_no_lock ;
}
if ( tgt_device - > total_bytes < src_device - > total_bytes ) {
pr_err ( " btrfs: target device is smaller than source device! \n " ) ;
ret = - EINVAL ;
goto leave_no_lock ;
}
btrfs_dev_replace_lock ( dev_replace ) ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
break ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
args - > result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED ;
goto leave ;
}
dev_replace - > cont_reading_from_srcdev_mode =
args - > start . cont_reading_from_srcdev_mode ;
WARN_ON ( ! src_device ) ;
dev_replace - > srcdev = src_device ;
WARN_ON ( ! tgt_device ) ;
dev_replace - > tgtdev = tgt_device ;
printk_in_rcu ( KERN_INFO
" btrfs: dev_replace from %s (devid %llu) to %s) started \n " ,
src_device - > missing ? " <missing disk> " :
rcu_str_deref ( src_device - > name ) ,
src_device - > devid ,
rcu_str_deref ( tgt_device - > name ) ) ;
tgt_device - > total_bytes = src_device - > total_bytes ;
tgt_device - > disk_total_bytes = src_device - > disk_total_bytes ;
tgt_device - > bytes_used = src_device - > bytes_used ;
/*
* from now on , the writes to the srcdev are all duplicated to
* go to the tgtdev as well ( refer to btrfs_map_block ( ) ) .
*/
dev_replace - > replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED ;
dev_replace - > time_started = btrfs_get_seconds_since_1970 ( ) ;
dev_replace - > cursor_left = 0 ;
dev_replace - > committed_cursor_left = 0 ;
dev_replace - > cursor_left_last_write_of_item = 0 ;
dev_replace - > cursor_right = 0 ;
dev_replace - > is_valid = 1 ;
dev_replace - > item_needs_writeback = 1 ;
args - > result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR ;
btrfs_dev_replace_unlock ( dev_replace ) ;
2013-05-15 07:48:23 +00:00
btrfs_wait_all_ordered_extents ( root - > fs_info , 0 ) ;
2012-11-05 17:33:06 +01:00
/* force writing the updated state information to disk */
trans = btrfs_start_transaction ( root , 0 ) ;
if ( IS_ERR ( trans ) ) {
ret = PTR_ERR ( trans ) ;
btrfs_dev_replace_lock ( dev_replace ) ;
goto leave ;
}
ret = btrfs_commit_transaction ( trans , root ) ;
WARN_ON ( ret ) ;
/* the disk copy procedure reuses the scrub code */
ret = btrfs_scrub_dev ( fs_info , src_device - > devid , 0 ,
src_device - > total_bytes ,
& dev_replace - > scrub_progress , 0 , 1 ) ;
ret = btrfs_dev_replace_finishing ( root - > fs_info , ret ) ;
WARN_ON ( ret ) ;
return 0 ;
leave :
dev_replace - > srcdev = NULL ;
dev_replace - > tgtdev = NULL ;
btrfs_dev_replace_unlock ( dev_replace ) ;
leave_no_lock :
if ( tgt_device )
btrfs_destroy_dev_replace_tgtdev ( fs_info , tgt_device ) ;
return ret ;
}
static int btrfs_dev_replace_finishing ( struct btrfs_fs_info * fs_info ,
int scrub_ret )
{
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
struct btrfs_device * tgt_device ;
struct btrfs_device * src_device ;
struct btrfs_root * root = fs_info - > tree_root ;
u8 uuid_tmp [ BTRFS_UUID_SIZE ] ;
struct btrfs_trans_handle * trans ;
int ret = 0 ;
/* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
btrfs_dev_replace_lock ( dev_replace ) ;
/* was the operation canceled, or is it finished? */
if ( dev_replace - > replace_state ! =
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED ) {
btrfs_dev_replace_unlock ( dev_replace ) ;
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return 0 ;
}
tgt_device = dev_replace - > tgtdev ;
src_device = dev_replace - > srcdev ;
btrfs_dev_replace_unlock ( dev_replace ) ;
/* replace old device with new one in mapping tree */
if ( ! scrub_ret )
btrfs_dev_replace_update_device_in_mapping_tree ( fs_info ,
src_device ,
tgt_device ) ;
/*
* flush all outstanding I / O and inode extent mappings before the
* copy operation is declared as being finished
*/
2013-05-15 07:48:22 +00:00
ret = btrfs_start_all_delalloc_inodes ( root - > fs_info , 0 ) ;
2013-01-22 10:49:33 +00:00
if ( ret ) {
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return ret ;
}
2013-05-15 07:48:23 +00:00
btrfs_wait_all_ordered_extents ( root - > fs_info , 0 ) ;
2012-11-05 17:33:06 +01:00
trans = btrfs_start_transaction ( root , 0 ) ;
if ( IS_ERR ( trans ) ) {
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return PTR_ERR ( trans ) ;
}
ret = btrfs_commit_transaction ( trans , root ) ;
WARN_ON ( ret ) ;
/* keep away write_all_supers() during the finishing procedure */
mutex_lock ( & root - > fs_info - > fs_devices - > device_list_mutex ) ;
btrfs_dev_replace_lock ( dev_replace ) ;
dev_replace - > replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED ;
dev_replace - > tgtdev = NULL ;
dev_replace - > srcdev = NULL ;
dev_replace - > time_stopped = btrfs_get_seconds_since_1970 ( ) ;
dev_replace - > item_needs_writeback = 1 ;
if ( scrub_ret ) {
printk_in_rcu ( KERN_ERR
" btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d \n " ,
src_device - > missing ? " <missing disk> " :
rcu_str_deref ( src_device - > name ) ,
src_device - > devid ,
rcu_str_deref ( tgt_device - > name ) , scrub_ret ) ;
btrfs_dev_replace_unlock ( dev_replace ) ;
mutex_unlock ( & root - > fs_info - > fs_devices - > device_list_mutex ) ;
if ( tgt_device )
btrfs_destroy_dev_replace_tgtdev ( fs_info , tgt_device ) ;
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return 0 ;
}
printk_in_rcu ( KERN_INFO
" btrfs: dev_replace from %s (devid %llu) to %s) finished \n " ,
src_device - > missing ? " <missing disk> " :
rcu_str_deref ( src_device - > name ) ,
src_device - > devid ,
rcu_str_deref ( tgt_device - > name ) ) ;
tgt_device - > is_tgtdev_for_dev_replace = 0 ;
tgt_device - > devid = src_device - > devid ;
src_device - > devid = BTRFS_DEV_REPLACE_DEVID ;
tgt_device - > bytes_used = src_device - > bytes_used ;
memcpy ( uuid_tmp , tgt_device - > uuid , sizeof ( uuid_tmp ) ) ;
memcpy ( tgt_device - > uuid , src_device - > uuid , sizeof ( tgt_device - > uuid ) ) ;
memcpy ( src_device - > uuid , uuid_tmp , sizeof ( src_device - > uuid ) ) ;
tgt_device - > total_bytes = src_device - > total_bytes ;
tgt_device - > disk_total_bytes = src_device - > disk_total_bytes ;
tgt_device - > bytes_used = src_device - > bytes_used ;
if ( fs_info - > sb - > s_bdev = = src_device - > bdev )
fs_info - > sb - > s_bdev = tgt_device - > bdev ;
if ( fs_info - > fs_devices - > latest_bdev = = src_device - > bdev )
fs_info - > fs_devices - > latest_bdev = tgt_device - > bdev ;
list_add ( & tgt_device - > dev_alloc_list , & fs_info - > fs_devices - > alloc_list ) ;
btrfs_rm_dev_replace_srcdev ( fs_info , src_device ) ;
if ( src_device - > bdev ) {
/* zero out the old super */
btrfs_scratch_superblock ( src_device ) ;
}
/*
* this is again a consistent state where no dev_replace procedure
* is running , the target device is part of the filesystem , the
* source device is not part of the filesystem anymore and its 1 st
* superblock is scratched out so that it is no longer marked to
* belong to this filesystem .
*/
btrfs_dev_replace_unlock ( dev_replace ) ;
mutex_unlock ( & root - > fs_info - > fs_devices - > device_list_mutex ) ;
/* write back the superblocks */
trans = btrfs_start_transaction ( root , 0 ) ;
if ( ! IS_ERR ( trans ) )
btrfs_commit_transaction ( trans , root ) ;
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return 0 ;
}
static void btrfs_dev_replace_update_device_in_mapping_tree (
struct btrfs_fs_info * fs_info ,
struct btrfs_device * srcdev ,
struct btrfs_device * tgtdev )
{
struct extent_map_tree * em_tree = & fs_info - > mapping_tree . map_tree ;
struct extent_map * em ;
struct map_lookup * map ;
u64 start = 0 ;
int i ;
write_lock ( & em_tree - > lock ) ;
do {
em = lookup_extent_mapping ( em_tree , start , ( u64 ) - 1 ) ;
if ( ! em )
break ;
map = ( struct map_lookup * ) em - > bdev ;
for ( i = 0 ; i < map - > num_stripes ; i + + )
if ( srcdev = = map - > stripes [ i ] . dev )
map - > stripes [ i ] . dev = tgtdev ;
start = em - > start + em - > len ;
free_extent_map ( em ) ;
} while ( start ) ;
write_unlock ( & em_tree - > lock ) ;
}
static int btrfs_dev_replace_find_srcdev ( struct btrfs_root * root , u64 srcdevid ,
char * srcdev_name ,
struct btrfs_device * * device )
{
int ret ;
if ( srcdevid ) {
ret = 0 ;
* device = btrfs_find_device ( root - > fs_info , srcdevid , NULL ,
NULL ) ;
if ( ! * device )
ret = - ENOENT ;
} else {
ret = btrfs_find_device_missing_or_by_path ( root , srcdev_name ,
device ) ;
}
return ret ;
}
void btrfs_dev_replace_status ( struct btrfs_fs_info * fs_info ,
struct btrfs_ioctl_dev_replace_args * args )
{
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
btrfs_dev_replace_lock ( dev_replace ) ;
/* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */
args - > result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR ;
args - > status . replace_state = dev_replace - > replace_state ;
args - > status . time_started = dev_replace - > time_started ;
args - > status . time_stopped = dev_replace - > time_stopped ;
args - > status . num_write_errors =
atomic64_read ( & dev_replace - > num_write_errors ) ;
args - > status . num_uncorrectable_read_errors =
atomic64_read ( & dev_replace - > num_uncorrectable_read_errors ) ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
args - > status . progress_1000 = 0 ;
break ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
args - > status . progress_1000 = 1000 ;
break ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
args - > status . progress_1000 = div64_u64 ( dev_replace - > cursor_left ,
div64_u64 ( dev_replace - > srcdev - > total_bytes , 1000 ) ) ;
break ;
}
btrfs_dev_replace_unlock ( dev_replace ) ;
}
int btrfs_dev_replace_cancel ( struct btrfs_fs_info * fs_info ,
struct btrfs_ioctl_dev_replace_args * args )
{
args - > result = __btrfs_dev_replace_cancel ( fs_info ) ;
return 0 ;
}
static u64 __btrfs_dev_replace_cancel ( struct btrfs_fs_info * fs_info )
{
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
struct btrfs_device * tgt_device = NULL ;
struct btrfs_trans_handle * trans ;
struct btrfs_root * root = fs_info - > tree_root ;
u64 result ;
int ret ;
mutex_lock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
btrfs_dev_replace_lock ( dev_replace ) ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED ;
btrfs_dev_replace_unlock ( dev_replace ) ;
goto leave ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR ;
tgt_device = dev_replace - > tgtdev ;
dev_replace - > tgtdev = NULL ;
dev_replace - > srcdev = NULL ;
break ;
}
dev_replace - > replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED ;
dev_replace - > time_stopped = btrfs_get_seconds_since_1970 ( ) ;
dev_replace - > item_needs_writeback = 1 ;
btrfs_dev_replace_unlock ( dev_replace ) ;
btrfs_scrub_cancel ( fs_info ) ;
trans = btrfs_start_transaction ( root , 0 ) ;
if ( IS_ERR ( trans ) ) {
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return PTR_ERR ( trans ) ;
}
ret = btrfs_commit_transaction ( trans , root ) ;
WARN_ON ( ret ) ;
if ( tgt_device )
btrfs_destroy_dev_replace_tgtdev ( fs_info , tgt_device ) ;
leave :
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
return result ;
}
void btrfs_dev_replace_suspend_for_unmount ( struct btrfs_fs_info * fs_info )
{
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
mutex_lock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
btrfs_dev_replace_lock ( dev_replace ) ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
break ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
dev_replace - > replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED ;
dev_replace - > time_stopped = btrfs_get_seconds_since_1970 ( ) ;
dev_replace - > item_needs_writeback = 1 ;
pr_info ( " btrfs: suspending dev_replace for unmount \n " ) ;
break ;
}
btrfs_dev_replace_unlock ( dev_replace ) ;
mutex_unlock ( & dev_replace - > lock_finishing_cancel_unmount ) ;
}
/* resume dev_replace procedure that was interrupted by unmount */
int btrfs_resume_dev_replace_async ( struct btrfs_fs_info * fs_info )
{
struct task_struct * task ;
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
btrfs_dev_replace_lock ( dev_replace ) ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
btrfs_dev_replace_unlock ( dev_replace ) ;
return 0 ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
break ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
dev_replace - > replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED ;
break ;
}
if ( ! dev_replace - > tgtdev | | ! dev_replace - > tgtdev - > bdev ) {
pr_info ( " btrfs: cannot continue dev_replace, tgtdev is missing \n "
" btrfs: you may cancel the operation after 'mount -o degraded' \n " ) ;
btrfs_dev_replace_unlock ( dev_replace ) ;
return 0 ;
}
btrfs_dev_replace_unlock ( dev_replace ) ;
WARN_ON ( atomic_xchg (
& fs_info - > mutually_exclusive_operation_running , 1 ) ) ;
task = kthread_run ( btrfs_dev_replace_kthread , fs_info , " btrfs-devrepl " ) ;
return PTR_RET ( task ) ;
}
static int btrfs_dev_replace_kthread ( void * data )
{
struct btrfs_fs_info * fs_info = data ;
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
struct btrfs_ioctl_dev_replace_args * status_args ;
u64 progress ;
status_args = kzalloc ( sizeof ( * status_args ) , GFP_NOFS ) ;
if ( status_args ) {
btrfs_dev_replace_status ( fs_info , status_args ) ;
progress = status_args - > status . progress_1000 ;
kfree ( status_args ) ;
do_div ( progress , 10 ) ;
printk_in_rcu ( KERN_INFO
" btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%% \n " ,
dev_replace - > srcdev - > missing ? " <missing disk> " :
rcu_str_deref ( dev_replace - > srcdev - > name ) ,
dev_replace - > srcdev - > devid ,
dev_replace - > tgtdev ?
rcu_str_deref ( dev_replace - > tgtdev - > name ) :
" <missing target disk> " ,
( unsigned int ) progress ) ;
}
btrfs_dev_replace_continue_on_mount ( fs_info ) ;
atomic_set ( & fs_info - > mutually_exclusive_operation_running , 0 ) ;
return 0 ;
}
static int btrfs_dev_replace_continue_on_mount ( struct btrfs_fs_info * fs_info )
{
struct btrfs_dev_replace * dev_replace = & fs_info - > dev_replace ;
int ret ;
ret = btrfs_scrub_dev ( fs_info , dev_replace - > srcdev - > devid ,
dev_replace - > committed_cursor_left ,
dev_replace - > srcdev - > total_bytes ,
& dev_replace - > scrub_progress , 0 , 1 ) ;
ret = btrfs_dev_replace_finishing ( fs_info , ret ) ;
WARN_ON ( ret ) ;
return 0 ;
}
int btrfs_dev_replace_is_ongoing ( struct btrfs_dev_replace * dev_replace )
{
if ( ! dev_replace - > is_valid )
return 0 ;
switch ( dev_replace - > replace_state ) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED :
return 0 ;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED :
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED :
/*
* return true even if tgtdev is missing ( this is
* something that can happen if the dev_replace
* procedure is suspended by an umount and then
* the tgtdev is missing ( or " btrfs dev scan " ) was
* not called and the the filesystem is remounted
* in degraded state . This does not stop the
* dev_replace procedure . It needs to be canceled
* manually if the cancelation is wanted .
*/
break ;
}
return 1 ;
}
void btrfs_dev_replace_lock ( struct btrfs_dev_replace * dev_replace )
{
/* the beginning is just an optimization for the typical case */
if ( atomic_read ( & dev_replace - > nesting_level ) = = 0 ) {
acquire_lock :
/* this is not a nested case where the same thread
* is trying to acqurire the same lock twice */
mutex_lock ( & dev_replace - > lock ) ;
mutex_lock ( & dev_replace - > lock_management_lock ) ;
dev_replace - > lock_owner = current - > pid ;
atomic_inc ( & dev_replace - > nesting_level ) ;
mutex_unlock ( & dev_replace - > lock_management_lock ) ;
return ;
}
mutex_lock ( & dev_replace - > lock_management_lock ) ;
if ( atomic_read ( & dev_replace - > nesting_level ) > 0 & &
dev_replace - > lock_owner = = current - > pid ) {
WARN_ON ( ! mutex_is_locked ( & dev_replace - > lock ) ) ;
atomic_inc ( & dev_replace - > nesting_level ) ;
mutex_unlock ( & dev_replace - > lock_management_lock ) ;
return ;
}
mutex_unlock ( & dev_replace - > lock_management_lock ) ;
goto acquire_lock ;
}
void btrfs_dev_replace_unlock ( struct btrfs_dev_replace * dev_replace )
{
WARN_ON ( ! mutex_is_locked ( & dev_replace - > lock ) ) ;
mutex_lock ( & dev_replace - > lock_management_lock ) ;
WARN_ON ( atomic_read ( & dev_replace - > nesting_level ) < 1 ) ;
WARN_ON ( dev_replace - > lock_owner ! = current - > pid ) ;
atomic_dec ( & dev_replace - > nesting_level ) ;
if ( atomic_read ( & dev_replace - > nesting_level ) = = 0 ) {
dev_replace - > lock_owner = 0 ;
mutex_unlock ( & dev_replace - > lock_management_lock ) ;
mutex_unlock ( & dev_replace - > lock ) ;
} else {
mutex_unlock ( & dev_replace - > lock_management_lock ) ;
}
}