2009-09-26 03:07:19 +04:00
/*
drbd_worker . c
This file is part of DRBD by Philipp Reisner and Lars Ellenberg .
Copyright ( C ) 2001 - 2008 , LINBIT Information Technologies GmbH .
Copyright ( C ) 1999 - 2008 , Philipp Reisner < philipp . reisner @ linbit . com > .
Copyright ( C ) 2002 - 2008 , Lars Ellenberg < lars . ellenberg @ linbit . com > .
drbd is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 , or ( at your option )
any later version .
drbd is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with drbd ; see the file COPYING . If not , write to
the Free Software Foundation , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include <linux/module.h>
# include <linux/drbd.h>
# include <linux/sched.h>
# include <linux/wait.h>
# include <linux/mm.h>
# include <linux/memcontrol.h>
# include <linux/mm_inline.h>
# include <linux/slab.h>
# include <linux/random.h>
# include <linux/string.h>
# include <linux/scatterlist.h>
# include "drbd_int.h"
# include "drbd_req.h"
2011-02-09 20:09:48 +03:00
static int w_make_ov_request ( struct drbd_work * w , int cancel ) ;
static int w_make_resync_request ( struct drbd_work * w , int cancel ) ;
2009-09-26 03:07:19 +04:00
2011-01-25 19:33:38 +03:00
/* endio handlers:
* drbd_md_io_complete ( defined here )
2011-02-17 18:46:59 +03:00
* drbd_request_endio ( defined here )
* drbd_peer_request_endio ( defined here )
2011-01-25 19:33:38 +03:00
* bm_async_io_complete ( defined in drbd_bitmap . c )
*
2009-09-26 03:07:19 +04:00
* For all these callbacks , note the following :
* The callbacks will be called in irq context by the IDE drivers ,
* and in Softirqs / Tasklets / BH context by the SCSI drivers .
* Try to get the locking right : )
*
*/
/* About the global_state_lock
Each state transition on an device holds a read lock . In case we have
to evaluate the sync after dependencies , we grab a write lock , because
we need stable states on all devices for that . */
rwlock_t global_state_lock ;
/* used for synchronous meta data and bitmap IO
* submitted by drbd_md_sync_page_io ( )
*/
void drbd_md_io_complete ( struct bio * bio , int error )
{
struct drbd_md_io * md_io ;
md_io = ( struct drbd_md_io * ) bio - > bi_private ;
md_io - > error = error ;
complete ( & md_io - > event ) ;
}
/* reads on behalf of the partner,
* " submitted " by the receiver
*/
2011-02-04 17:57:48 +03:00
void drbd_endio_read_sec_final ( struct drbd_peer_request * peer_req ) __releases ( local )
2009-09-26 03:07:19 +04:00
{
unsigned long flags = 0 ;
2011-02-08 17:08:49 +03:00
struct drbd_conf * mdev = peer_req - > w . mdev ;
2009-09-26 03:07:19 +04:00
2011-01-19 16:16:30 +03:00
spin_lock_irqsave ( & mdev - > tconn - > req_lock , flags ) ;
2011-02-04 17:57:48 +03:00
mdev - > read_cnt + = peer_req - > i . size > > 9 ;
list_del ( & peer_req - > w . list ) ;
2009-09-26 03:07:19 +04:00
if ( list_empty ( & mdev - > read_ee ) )
wake_up ( & mdev - > ee_wait ) ;
2011-02-04 17:57:48 +03:00
if ( test_bit ( __EE_WAS_ERROR , & peer_req - > flags ) )
2010-12-09 17:03:57 +03:00
__drbd_chk_io_error ( mdev , false ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irqrestore ( & mdev - > tconn - > req_lock , flags ) ;
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
drbd_queue_work ( & mdev - > tconn - > data . work , & peer_req - > w ) ;
2009-09-26 03:07:19 +04:00
put_ldev ( mdev ) ;
}
/* writes on behalf of the partner, or resync writes,
2010-05-14 19:10:48 +04:00
* " submitted " by the receiver , final stage . */
2011-02-04 17:57:48 +03:00
static void drbd_endio_write_sec_final ( struct drbd_peer_request * peer_req ) __releases ( local )
2009-09-26 03:07:19 +04:00
{
unsigned long flags = 0 ;
2011-02-08 17:08:49 +03:00
struct drbd_conf * mdev = peer_req - > w . mdev ;
2009-09-26 03:07:19 +04:00
sector_t e_sector ;
int do_wake ;
2011-01-13 20:40:57 +03:00
u64 block_id ;
2009-09-26 03:07:19 +04:00
int do_al_complete_io ;
2011-02-04 17:57:48 +03:00
/* after we moved peer_req to done_ee,
2009-09-26 03:07:19 +04:00
* we may no longer access it ,
* it may be freed / reused already !
* ( as soon as we release the req_lock ) */
2011-02-04 17:57:48 +03:00
e_sector = peer_req - > i . sector ;
do_al_complete_io = peer_req - > flags & EE_CALL_AL_COMPLETE_IO ;
block_id = peer_req - > block_id ;
2009-09-26 03:07:19 +04:00
2011-01-19 16:16:30 +03:00
spin_lock_irqsave ( & mdev - > tconn - > req_lock , flags ) ;
2011-02-04 17:57:48 +03:00
mdev - > writ_cnt + = peer_req - > i . size > > 9 ;
list_del ( & peer_req - > w . list ) ; /* has been on active_ee or sync_ee */
list_add_tail ( & peer_req - > w . list , & mdev - > done_ee ) ;
2009-09-26 03:07:19 +04:00
2011-01-21 17:59:23 +03:00
/*
2011-01-27 16:42:51 +03:00
* Do not remove from the write_requests tree here : we did not send the
2011-01-21 17:59:23 +03:00
* Ack yet and did not wake possibly waiting conflicting requests .
* Removed from the tree from " drbd_process_done_ee " within the
* appropriate w . cb ( e_end_block / e_end_resync_block ) or from
* _drbd_clear_done_ee .
*/
2009-09-26 03:07:19 +04:00
2011-01-13 20:40:57 +03:00
do_wake = list_empty ( block_id = = ID_SYNCER ? & mdev - > sync_ee : & mdev - > active_ee ) ;
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
if ( test_bit ( __EE_WAS_ERROR , & peer_req - > flags ) )
2010-12-09 17:03:57 +03:00
__drbd_chk_io_error ( mdev , false ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irqrestore ( & mdev - > tconn - > req_lock , flags ) ;
2009-09-26 03:07:19 +04:00
2011-01-13 20:40:57 +03:00
if ( block_id = = ID_SYNCER )
2009-09-26 03:07:19 +04:00
drbd_rs_complete_io ( mdev , e_sector ) ;
if ( do_wake )
wake_up ( & mdev - > ee_wait ) ;
if ( do_al_complete_io )
drbd_al_complete_io ( mdev , e_sector ) ;
2011-02-07 16:49:19 +03:00
wake_asender ( mdev - > tconn ) ;
2009-09-26 03:07:19 +04:00
put_ldev ( mdev ) ;
2010-05-14 19:10:48 +04:00
}
2009-09-26 03:07:19 +04:00
2010-05-14 19:10:48 +04:00
/* writes on behalf of the partner, or resync writes,
* " submitted " by the receiver .
*/
2011-02-17 18:46:59 +03:00
void drbd_peer_request_endio ( struct bio * bio , int error )
2010-05-14 19:10:48 +04:00
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req = bio - > bi_private ;
2011-02-08 17:08:49 +03:00
struct drbd_conf * mdev = peer_req - > w . mdev ;
2010-05-14 19:10:48 +04:00
int uptodate = bio_flagged ( bio , BIO_UPTODATE ) ;
int is_write = bio_data_dir ( bio ) = = WRITE ;
2010-12-20 17:38:07 +03:00
if ( error & & __ratelimit ( & drbd_ratelimit_state ) )
2010-05-14 19:10:48 +04:00
dev_warn ( DEV , " %s: error=%d s=%llus \n " ,
is_write ? " write " : " read " , error ,
2011-02-04 17:57:48 +03:00
( unsigned long long ) peer_req - > i . sector ) ;
2010-05-14 19:10:48 +04:00
if ( ! error & & ! uptodate ) {
2010-12-20 17:38:07 +03:00
if ( __ratelimit ( & drbd_ratelimit_state ) )
dev_warn ( DEV , " %s: setting error to -EIO s=%llus \n " ,
is_write ? " write " : " read " ,
2011-02-04 17:57:48 +03:00
( unsigned long long ) peer_req - > i . sector ) ;
2010-05-14 19:10:48 +04:00
/* strange behavior of some lower level drivers...
* fail the request by clearing the uptodate flag ,
* but do not return any error ? ! */
error = - EIO ;
}
if ( error )
2011-02-04 17:57:48 +03:00
set_bit ( __EE_WAS_ERROR , & peer_req - > flags ) ;
2010-05-14 19:10:48 +04:00
bio_put ( bio ) ; /* no need for the bio anymore */
2011-02-04 17:57:48 +03:00
if ( atomic_dec_and_test ( & peer_req - > pending_bios ) ) {
2010-05-14 19:10:48 +04:00
if ( is_write )
2011-02-04 17:57:48 +03:00
drbd_endio_write_sec_final ( peer_req ) ;
2010-05-14 19:10:48 +04:00
else
2011-02-04 17:57:48 +03:00
drbd_endio_read_sec_final ( peer_req ) ;
2010-05-14 19:10:48 +04:00
}
2009-09-26 03:07:19 +04:00
}
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
*/
2011-02-17 18:46:59 +03:00
void drbd_request_endio ( struct bio * bio , int error )
2009-09-26 03:07:19 +04:00
{
2010-11-13 22:42:29 +03:00
unsigned long flags ;
2009-09-26 03:07:19 +04:00
struct drbd_request * req = bio - > bi_private ;
2011-02-08 17:08:49 +03:00
struct drbd_conf * mdev = req - > w . mdev ;
2010-11-13 22:42:29 +03:00
struct bio_and_error m ;
2009-09-26 03:07:19 +04:00
enum drbd_req_event what ;
int uptodate = bio_flagged ( bio , BIO_UPTODATE ) ;
if ( ! error & & ! uptodate ) {
dev_warn ( DEV , " p %s: setting error to -EIO \n " ,
bio_data_dir ( bio ) = = WRITE ? " write " : " read " ) ;
/* strange behavior of some lower level drivers...
* fail the request by clearing the uptodate flag ,
* but do not return any error ? ! */
error = - EIO ;
}
/* to avoid recursion in __req_mod */
if ( unlikely ( error ) ) {
what = ( bio_data_dir ( bio ) = = WRITE )
2011-01-25 17:37:43 +03:00
? WRITE_COMPLETED_WITH_ERROR
2010-04-10 04:10:09 +04:00
: ( bio_rw ( bio ) = = READ )
2011-01-25 17:37:43 +03:00
? READ_COMPLETED_WITH_ERROR
: READ_AHEAD_COMPLETED_WITH_ERROR ;
2009-09-26 03:07:19 +04:00
} else
2011-01-25 17:37:43 +03:00
what = COMPLETED_OK ;
2009-09-26 03:07:19 +04:00
bio_put ( req - > private_bio ) ;
req - > private_bio = ERR_PTR ( error ) ;
2010-11-13 22:42:29 +03:00
/* not req_mod(), we need irqsave here! */
2011-01-19 16:16:30 +03:00
spin_lock_irqsave ( & mdev - > tconn - > req_lock , flags ) ;
2010-11-13 22:42:29 +03:00
__req_mod ( req , what , & m ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irqrestore ( & mdev - > tconn - > req_lock , flags ) ;
2010-11-13 22:42:29 +03:00
if ( m . bio )
complete_master_bio ( mdev , & m ) ;
2009-09-26 03:07:19 +04:00
}
2011-02-09 20:09:48 +03:00
int w_read_retry_remote ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
struct drbd_request * req = container_of ( w , struct drbd_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
/* We should not detach for read io-error,
* but try to WRITE the P_DATA_REPLY to the failed location ,
* to give the disk the chance to relocate that block */
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2010-05-27 11:45:45 +04:00
if ( cancel | | mdev - > state . pdsk ! = D_UP_TO_DATE ) {
2011-01-25 17:37:43 +03:00
_req_mod ( req , READ_RETRY_REMOTE_CANCELED ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
return 1 ;
}
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
2011-02-09 20:09:48 +03:00
return w_send_read_req ( w , 0 ) ;
2009-09-26 03:07:19 +04:00
}
2011-02-04 17:30:34 +03:00
void drbd_csum_ee ( struct drbd_conf * mdev , struct crypto_hash * tfm ,
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req , void * digest )
2010-05-14 19:10:48 +04:00
{
struct hash_desc desc ;
struct scatterlist sg ;
2011-02-04 17:57:48 +03:00
struct page * page = peer_req - > pages ;
2010-05-14 19:10:48 +04:00
struct page * tmp ;
unsigned len ;
desc . tfm = tfm ;
desc . flags = 0 ;
sg_init_table ( & sg , 1 ) ;
crypto_hash_init ( & desc ) ;
while ( ( tmp = page_chain_next ( page ) ) ) {
/* all but the last page will be fully used */
sg_set_page ( & sg , page , PAGE_SIZE , 0 ) ;
crypto_hash_update ( & desc , & sg , sg . length ) ;
page = tmp ;
}
/* and now the last, possibly only partially used page */
2011-02-04 17:57:48 +03:00
len = peer_req - > i . size & ( PAGE_SIZE - 1 ) ;
2010-05-14 19:10:48 +04:00
sg_set_page ( & sg , page , len ? : PAGE_SIZE , 0 ) ;
crypto_hash_update ( & desc , & sg , sg . length ) ;
crypto_hash_final ( & desc , digest ) ;
}
void drbd_csum_bio ( struct drbd_conf * mdev , struct crypto_hash * tfm , struct bio * bio , void * digest )
2009-09-26 03:07:19 +04:00
{
struct hash_desc desc ;
struct scatterlist sg ;
struct bio_vec * bvec ;
int i ;
desc . tfm = tfm ;
desc . flags = 0 ;
sg_init_table ( & sg , 1 ) ;
crypto_hash_init ( & desc ) ;
__bio_for_each_segment ( bvec , bio , i , 0 ) {
sg_set_page ( & sg , bvec - > bv_page , bvec - > bv_len , bvec - > bv_offset ) ;
crypto_hash_update ( & desc , & sg , sg . length ) ;
}
crypto_hash_final ( & desc , digest ) ;
}
2011-02-22 16:02:31 +03:00
/* MAYBE merge common code with w_e_end_ov_req */
2011-02-09 20:09:48 +03:00
static int w_e_send_csum ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_peer_request * peer_req = container_of ( w , struct drbd_peer_request , w ) ;
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
int digest_size ;
void * digest ;
2011-03-08 19:11:40 +03:00
int ok = 1 ;
2009-09-26 03:07:19 +04:00
2011-03-08 19:11:40 +03:00
if ( unlikely ( cancel ) )
goto out ;
2009-09-26 03:07:19 +04:00
2011-02-22 16:02:31 +03:00
if ( unlikely ( ( peer_req - > flags & EE_WAS_ERROR ) ! = 0 ) )
2011-03-08 19:11:40 +03:00
goto out ;
2009-09-26 03:07:19 +04:00
2011-03-23 16:31:09 +03:00
digest_size = crypto_hash_digestsize ( mdev - > tconn - > csums_tfm ) ;
2011-03-08 19:11:40 +03:00
digest = kmalloc ( digest_size , GFP_NOIO ) ;
if ( digest ) {
2011-02-04 17:57:48 +03:00
sector_t sector = peer_req - > i . sector ;
unsigned int size = peer_req - > i . size ;
2011-03-23 16:31:09 +03:00
drbd_csum_ee ( mdev , mdev - > tconn - > csums_tfm , peer_req , digest ) ;
2011-02-22 16:02:31 +03:00
/* Free peer_req and pages before send.
2011-03-08 19:11:40 +03:00
* In case we block on congestion , we could otherwise run into
* some distributed deadlock , if the other side blocks on
* congestion as well , because our receiver blocks in
* drbd_pp_alloc due to pp_in_use > max_buffers . */
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
peer_req = NULL ;
2011-03-08 19:11:40 +03:00
inc_rs_pending ( mdev ) ;
ok = drbd_send_drequest_csum ( mdev , sector , size ,
digest , digest_size ,
P_CSUM_RS_REQUEST ) ;
kfree ( digest ) ;
} else {
dev_err ( DEV , " kmalloc() of digest failed. \n " ) ;
ok = 0 ;
}
2009-09-26 03:07:19 +04:00
2011-03-08 19:11:40 +03:00
out :
2011-02-04 17:57:48 +03:00
if ( peer_req )
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
if ( unlikely ( ! ok ) )
dev_err ( DEV , " drbd_send_drequest(..., csum) failed \n " ) ;
return ok ;
}
# define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
static int read_for_csum ( struct drbd_conf * mdev , sector_t sector , int size )
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req ;
2009-09-26 03:07:19 +04:00
if ( ! get_ldev ( mdev ) )
2010-08-12 01:28:00 +04:00
return - EIO ;
2009-09-26 03:07:19 +04:00
2010-11-07 17:56:29 +03:00
if ( drbd_rs_should_slow_down ( mdev , sector ) )
2010-08-12 01:40:24 +04:00
goto defer ;
2009-09-26 03:07:19 +04:00
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later . It is " only " background resync , after all . */
2011-02-04 17:57:48 +03:00
peer_req = drbd_alloc_ee ( mdev , ID_SYNCER /* unused */ , sector , size , GFP_TRY ) ;
if ( ! peer_req )
2010-08-12 01:28:00 +04:00
goto defer ;
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
peer_req - > w . cb = w_e_send_csum ;
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2011-02-04 17:57:48 +03:00
list_add ( & peer_req - > w . list , & mdev - > read_ee ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
2010-08-12 01:40:24 +04:00
atomic_add ( size > > 9 , & mdev - > rs_sect_ev ) ;
2011-02-17 18:38:35 +03:00
if ( drbd_submit_peer_request ( mdev , peer_req , READ , DRBD_FAULT_RS_RD ) = = 0 )
2010-08-12 01:28:00 +04:00
return 0 ;
2009-09-26 03:07:19 +04:00
2011-01-24 16:47:09 +03:00
/* If it failed because of ENOMEM, retry should help. If it failed
* because bio_add_page failed ( probably broken lower level driver ) ,
* retry may or may not help .
* If it does not , you may need to force disconnect . */
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2011-02-04 17:57:48 +03:00
list_del ( & peer_req - > w . list ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2010-09-14 22:40:41 +04:00
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2010-08-12 01:28:00 +04:00
defer :
2010-05-14 19:10:48 +04:00
put_ldev ( mdev ) ;
2010-08-12 01:28:00 +04:00
return - EAGAIN ;
2009-09-26 03:07:19 +04:00
}
2011-02-09 20:09:48 +03:00
int w_resync_timer ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2010-09-01 17:47:15 +04:00
switch ( mdev - > state . conn ) {
case C_VERIFY_S :
2011-02-09 20:09:48 +03:00
w_make_ov_request ( w , cancel ) ;
2010-09-01 17:47:15 +04:00
break ;
case C_SYNC_TARGET :
2011-02-09 20:09:48 +03:00
w_make_resync_request ( w , cancel ) ;
2010-09-01 17:47:15 +04:00
break ;
2009-09-26 03:07:19 +04:00
}
2010-12-27 13:51:23 +03:00
return 1 ;
}
void resync_timer_fn ( unsigned long data )
{
struct drbd_conf * mdev = ( struct drbd_conf * ) data ;
if ( list_empty ( & mdev - > resync_work . list ) )
2011-01-19 15:55:45 +03:00
drbd_queue_work ( & mdev - > tconn - > data . work , & mdev - > resync_work ) ;
2009-09-26 03:07:19 +04:00
}
2010-07-06 13:14:00 +04:00
static void fifo_set ( struct fifo_buffer * fb , int value )
{
int i ;
for ( i = 0 ; i < fb - > size ; i + + )
2010-10-05 18:50:17 +04:00
fb - > values [ i ] = value ;
2010-07-06 13:14:00 +04:00
}
static int fifo_push ( struct fifo_buffer * fb , int value )
{
int ov ;
ov = fb - > values [ fb - > head_index ] ;
fb - > values [ fb - > head_index + + ] = value ;
if ( fb - > head_index > = fb - > size )
fb - > head_index = 0 ;
return ov ;
}
static void fifo_add_val ( struct fifo_buffer * fb , int value )
{
int i ;
for ( i = 0 ; i < fb - > size ; i + + )
fb - > values [ i ] + = value ;
}
2010-11-07 20:02:56 +03:00
static int drbd_rs_controller ( struct drbd_conf * mdev )
2010-07-06 13:14:00 +04:00
{
unsigned int sect_in ; /* Number of sectors that came in since the last turn */
unsigned int want ; /* The number of sectors we want in the proxy */
int req_sect ; /* Number of sectors to request in this turn */
int correction ; /* Number of sectors more we need in the proxy*/
int cps ; /* correction per invocation of drbd_rs_controller() */
int steps ; /* Number of time steps to plan ahead */
int curr_corr ;
int max_sect ;
sect_in = atomic_xchg ( & mdev - > rs_sect_in , 0 ) ; /* Number of sectors that came in */
mdev - > rs_in_flight - = sect_in ;
spin_lock ( & mdev - > peer_seq_lock ) ; /* get an atomic view on mdev->rs_plan_s */
2011-03-23 16:31:09 +03:00
steps = mdev - > rs_plan_s . size ; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
2010-07-06 13:14:00 +04:00
if ( mdev - > rs_in_flight + sect_in = = 0 ) { /* At start of resync */
2011-03-23 16:31:09 +03:00
want = ( ( mdev - > ldev - > dc . resync_rate * 2 * SLEEP_TIME ) / HZ ) * steps ;
2010-07-06 13:14:00 +04:00
} else { /* normal path */
2011-03-23 16:31:09 +03:00
want = mdev - > ldev - > dc . c_fill_target ? mdev - > ldev - > dc . c_fill_target :
sect_in * mdev - > ldev - > dc . c_delay_target * HZ / ( SLEEP_TIME * 10 ) ;
2010-07-06 13:14:00 +04:00
}
correction = want - mdev - > rs_in_flight - mdev - > rs_planed ;
/* Plan ahead */
cps = correction / steps ;
fifo_add_val ( & mdev - > rs_plan_s , cps ) ;
mdev - > rs_planed + = cps * steps ;
/* What we do in this step */
curr_corr = fifo_push ( & mdev - > rs_plan_s , 0 ) ;
spin_unlock ( & mdev - > peer_seq_lock ) ;
mdev - > rs_planed - = curr_corr ;
req_sect = sect_in + curr_corr ;
if ( req_sect < 0 )
req_sect = 0 ;
2011-03-23 16:31:09 +03:00
max_sect = ( mdev - > ldev - > dc . c_max_rate * 2 * SLEEP_TIME ) / HZ ;
2010-07-06 13:14:00 +04:00
if ( req_sect > max_sect )
req_sect = max_sect ;
/*
dev_warn ( DEV , " si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d \n " ,
sect_in , mdev - > rs_in_flight , want , correction ,
steps , cps , mdev - > rs_planed , curr_corr , req_sect ) ;
*/
return req_sect ;
}
2010-11-07 20:02:56 +03:00
static int drbd_rs_number_requests ( struct drbd_conf * mdev )
2010-11-05 12:04:07 +03:00
{
int number ;
2011-03-23 16:31:09 +03:00
if ( mdev - > rs_plan_s . size ) { /* mdev->ldev->dc.c_plan_ahead */
2010-11-05 12:04:07 +03:00
number = drbd_rs_controller ( mdev ) > > ( BM_BLOCK_SHIFT - 9 ) ;
mdev - > c_sync_rate = number * HZ * ( BM_BLOCK_SIZE / 1024 ) / SLEEP_TIME ;
} else {
2011-03-23 16:31:09 +03:00
mdev - > c_sync_rate = mdev - > ldev - > dc . resync_rate ;
2010-11-05 12:04:07 +03:00
number = SLEEP_TIME * mdev - > c_sync_rate / ( ( BM_BLOCK_SIZE / 1024 ) * HZ ) ;
}
/* ignore the amount of pending requests, the resync controller should
* throttle down to incoming reply rate soon enough anyways . */
return number ;
}
2011-02-09 20:09:48 +03:00
static int w_make_resync_request ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
unsigned long bit ;
sector_t sector ;
const sector_t capacity = drbd_get_capacity ( mdev - > this_bdev ) ;
2010-11-11 17:19:07 +03:00
int max_bio_size ;
2010-11-05 12:04:07 +03:00
int number , rollback_i , size ;
2009-09-26 03:07:19 +04:00
int align , queued , sndbuf ;
2010-08-12 01:40:24 +04:00
int i = 0 ;
2009-09-26 03:07:19 +04:00
if ( unlikely ( cancel ) )
return 1 ;
2010-10-07 18:07:55 +04:00
if ( mdev - > rs_total = = 0 ) {
/* empty resync? */
drbd_resync_finished ( mdev ) ;
return 1 ;
}
2009-09-26 03:07:19 +04:00
if ( ! get_ldev ( mdev ) ) {
/* Since we only need to access mdev->rsync a
get_ldev_if_state ( mdev , D_FAILED ) would be sufficient , but
to continue resync with a broken disk makes no sense at
all */
dev_err ( DEV , " Disk broke down during resync! \n " ) ;
return 1 ;
}
2011-05-25 13:14:35 +04:00
max_bio_size = queue_max_hw_sectors ( mdev - > rq_queue ) < < 9 ;
2010-11-05 12:04:07 +03:00
number = drbd_rs_number_requests ( mdev ) ;
if ( number = = 0 )
2010-08-12 01:40:24 +04:00
goto requeue ;
2009-09-26 03:07:19 +04:00
for ( i = 0 ; i < number ; i + + ) {
/* Stop generating RS requests, when half of the send buffer is filled */
2011-01-19 15:55:45 +03:00
mutex_lock ( & mdev - > tconn - > data . mutex ) ;
if ( mdev - > tconn - > data . socket ) {
queued = mdev - > tconn - > data . socket - > sk - > sk_wmem_queued ;
sndbuf = mdev - > tconn - > data . socket - > sk - > sk_sndbuf ;
2009-09-26 03:07:19 +04:00
} else {
queued = 1 ;
sndbuf = 0 ;
}
2011-01-19 15:55:45 +03:00
mutex_unlock ( & mdev - > tconn - > data . mutex ) ;
2009-09-26 03:07:19 +04:00
if ( queued > sndbuf / 2 )
goto requeue ;
next_sector :
size = BM_BLOCK_SIZE ;
bit = drbd_bm_find_next ( mdev , mdev - > bm_resync_fo ) ;
2010-12-14 17:13:04 +03:00
if ( bit = = DRBD_END_OF_BITMAP ) {
2009-09-26 03:07:19 +04:00
mdev - > bm_resync_fo = drbd_bm_bits ( mdev ) ;
put_ldev ( mdev ) ;
return 1 ;
}
sector = BM_BIT_TO_SECT ( bit ) ;
2010-11-07 17:56:29 +03:00
if ( drbd_rs_should_slow_down ( mdev , sector ) | |
drbd_try_rs_begin_io ( mdev , sector ) ) {
2009-09-26 03:07:19 +04:00
mdev - > bm_resync_fo = bit ;
goto requeue ;
}
mdev - > bm_resync_fo = bit + 1 ;
if ( unlikely ( drbd_bm_test_bit ( mdev , bit ) = = 0 ) ) {
drbd_rs_complete_io ( mdev , sector ) ;
goto next_sector ;
}
2010-11-11 17:19:07 +03:00
# if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
2009-09-26 03:07:19 +04:00
/* try to find some adjacent bits.
* we stop if we have already the maximum req size .
*
* Additionally always align bigger requests , in order to
* be prepared for all stripe sizes of software RAIDs .
*/
align = 1 ;
2010-07-22 17:27:27 +04:00
rollback_i = i ;
2009-09-26 03:07:19 +04:00
for ( ; ; ) {
2010-11-11 17:19:07 +03:00
if ( size + BM_BLOCK_SIZE > max_bio_size )
2009-09-26 03:07:19 +04:00
break ;
/* Be always aligned */
if ( sector & ( ( 1 < < ( align + 3 ) ) - 1 ) )
break ;
/* do not cross extent boundaries */
if ( ( ( bit + 1 ) & BM_BLOCKS_PER_BM_EXT_MASK ) = = 0 )
break ;
/* now, is it actually dirty, after all?
* caution , drbd_bm_test_bit is tri - state for some
* obscure reason ; ( b = = 0 ) would get the out - of - band
* only accidentally right because of the " oddly sized "
* adjustment below */
if ( drbd_bm_test_bit ( mdev , bit + 1 ) ! = 1 )
break ;
bit + + ;
size + = BM_BLOCK_SIZE ;
if ( ( BM_BLOCK_SIZE < < align ) < = size )
align + + ;
i + + ;
}
/* if we merged some,
* reset the offset to start the next drbd_bm_find_next from */
if ( size > BM_BLOCK_SIZE )
mdev - > bm_resync_fo = bit + 1 ;
# endif
/* adjust very last sectors, in case we are oddly sized */
if ( sector + ( size > > 9 ) > capacity )
size = ( capacity - sector ) < < 9 ;
2011-03-23 16:31:09 +03:00
if ( mdev - > tconn - > agreed_pro_version > = 89 & & mdev - > tconn - > csums_tfm ) {
2009-09-26 03:07:19 +04:00
switch ( read_for_csum ( mdev , sector , size ) ) {
2010-08-12 01:28:00 +04:00
case - EIO : /* Disk failure */
2009-09-26 03:07:19 +04:00
put_ldev ( mdev ) ;
return 0 ;
2010-08-12 01:28:00 +04:00
case - EAGAIN : /* allocation failed, or ldev busy */
2009-09-26 03:07:19 +04:00
drbd_rs_complete_io ( mdev , sector ) ;
mdev - > bm_resync_fo = BM_SECT_TO_BIT ( sector ) ;
2010-07-22 17:27:27 +04:00
i = rollback_i ;
2009-09-26 03:07:19 +04:00
goto requeue ;
2010-08-12 01:28:00 +04:00
case 0 :
/* everything ok */
break ;
default :
BUG ( ) ;
2009-09-26 03:07:19 +04:00
}
} else {
inc_rs_pending ( mdev ) ;
if ( ! drbd_send_drequest ( mdev , P_RS_DATA_REQUEST ,
sector , size , ID_SYNCER ) ) {
dev_err ( DEV , " drbd_send_drequest() failed, aborting... \n " ) ;
dec_rs_pending ( mdev ) ;
put_ldev ( mdev ) ;
return 0 ;
}
}
}
if ( mdev - > bm_resync_fo > = drbd_bm_bits ( mdev ) ) {
/* last syncer _request_ was sent,
* but the P_RS_DATA_REPLY not yet received . sync will end ( and
* next sync group will resume ) , as soon as we receive the last
* resync data block , and the last bit is cleared .
* until then resync " work " is " inactive " . . .
*/
put_ldev ( mdev ) ;
return 1 ;
}
requeue :
2010-07-06 13:14:00 +04:00
mdev - > rs_in_flight + = ( i < < ( BM_BLOCK_SHIFT - 9 ) ) ;
2009-09-26 03:07:19 +04:00
mod_timer ( & mdev - > resync_timer , jiffies + SLEEP_TIME ) ;
put_ldev ( mdev ) ;
return 1 ;
}
2011-02-09 20:09:48 +03:00
static int w_make_ov_request ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
int number , i , size ;
sector_t sector ;
const sector_t capacity = drbd_get_capacity ( mdev - > this_bdev ) ;
if ( unlikely ( cancel ) )
return 1 ;
2010-11-05 12:05:47 +03:00
number = drbd_rs_number_requests ( mdev ) ;
2009-09-26 03:07:19 +04:00
sector = mdev - > ov_position ;
for ( i = 0 ; i < number ; i + + ) {
if ( sector > = capacity ) {
return 1 ;
}
size = BM_BLOCK_SIZE ;
2010-11-07 17:56:29 +03:00
if ( drbd_rs_should_slow_down ( mdev , sector ) | |
drbd_try_rs_begin_io ( mdev , sector ) ) {
2009-09-26 03:07:19 +04:00
mdev - > ov_position = sector ;
goto requeue ;
}
if ( sector + ( size > > 9 ) > capacity )
size = ( capacity - sector ) < < 9 ;
inc_rs_pending ( mdev ) ;
2011-03-16 03:31:39 +03:00
if ( drbd_send_ov_request ( mdev , sector , size ) ) {
2009-09-26 03:07:19 +04:00
dec_rs_pending ( mdev ) ;
return 0 ;
}
sector + = BM_SECT_PER_BIT ;
}
mdev - > ov_position = sector ;
requeue :
2010-11-05 12:05:47 +03:00
mdev - > rs_in_flight + = ( i < < ( BM_BLOCK_SHIFT - 9 ) ) ;
2009-09-26 03:07:19 +04:00
mod_timer ( & mdev - > resync_timer , jiffies + SLEEP_TIME ) ;
return 1 ;
}
2011-02-09 20:09:48 +03:00
int w_ov_finished ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
kfree ( w ) ;
ov_oos_print ( mdev ) ;
drbd_resync_finished ( mdev ) ;
return 1 ;
}
2011-02-09 20:09:48 +03:00
static int w_resync_finished ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
kfree ( w ) ;
drbd_resync_finished ( mdev ) ;
return 1 ;
}
2010-10-07 18:07:55 +04:00
static void ping_peer ( struct drbd_conf * mdev )
{
2011-02-09 16:10:32 +03:00
struct drbd_tconn * tconn = mdev - > tconn ;
clear_bit ( GOT_PING_ACK , & tconn - > flags ) ;
request_ping ( tconn ) ;
wait_event ( tconn - > ping_wait ,
test_bit ( GOT_PING_ACK , & tconn - > flags ) | | mdev - > state . conn < C_CONNECTED ) ;
2010-10-07 18:07:55 +04:00
}
2009-09-26 03:07:19 +04:00
int drbd_resync_finished ( struct drbd_conf * mdev )
{
unsigned long db , dt , dbdt ;
unsigned long n_oos ;
union drbd_state os , ns ;
struct drbd_work * w ;
char * khelper_cmd = NULL ;
2010-11-05 11:56:33 +03:00
int verify_done = 0 ;
2009-09-26 03:07:19 +04:00
/* Remove all elements from the resync LRU. Since future actions
* might set bits in the ( main ) bitmap , then the entries in the
* resync LRU would be wrong . */
if ( drbd_rs_del_all ( mdev ) ) {
/* In case this is not possible now, most probably because
* there are P_RS_DATA_REPLY Packets lingering on the worker ' s
* queue ( or even the read operations for those packets
* is not finished by now ) . Retry in 100 ms . */
2011-01-18 17:28:59 +03:00
schedule_timeout_interruptible ( HZ / 10 ) ;
2009-09-26 03:07:19 +04:00
w = kmalloc ( sizeof ( struct drbd_work ) , GFP_ATOMIC ) ;
if ( w ) {
w - > cb = w_resync_finished ;
2011-01-19 15:55:45 +03:00
drbd_queue_work ( & mdev - > tconn - > data . work , w ) ;
2009-09-26 03:07:19 +04:00
return 1 ;
}
dev_err ( DEV , " Warn failed to drbd_rs_del_all() and to kmalloc(w). \n " ) ;
}
dt = ( jiffies - mdev - > rs_start - mdev - > rs_paused ) / HZ ;
if ( dt < = 0 )
dt = 1 ;
db = mdev - > rs_total ;
dbdt = Bit2KB ( db / dt ) ;
mdev - > rs_paused / = HZ ;
if ( ! get_ldev ( mdev ) )
goto out ;
2010-10-07 18:07:55 +04:00
ping_peer ( mdev ) ;
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
os = mdev - > state ;
2010-11-05 11:56:33 +03:00
verify_done = ( os . conn = = C_VERIFY_S | | os . conn = = C_VERIFY_T ) ;
2009-09-26 03:07:19 +04:00
/* This protects us against multiple calls (that can happen in the presence
of application IO ) , and against connectivity loss just before we arrive here . */
if ( os . conn < = C_CONNECTED )
goto out_unlock ;
ns = os ;
ns . conn = C_CONNECTED ;
dev_info ( DEV , " %s done (total %lu sec; paused %lu sec; %lu K/sec) \n " ,
2010-11-05 11:56:33 +03:00
verify_done ? " Online verify " : " Resync " ,
2009-09-26 03:07:19 +04:00
dt + mdev - > rs_paused , mdev - > rs_paused , dbdt ) ;
n_oos = drbd_bm_total_weight ( mdev ) ;
if ( os . conn = = C_VERIFY_S | | os . conn = = C_VERIFY_T ) {
if ( n_oos ) {
dev_alert ( DEV , " Online verify found %lu %dk block out of sync! \n " ,
n_oos , Bit2KB ( 1 ) ) ;
khelper_cmd = " out-of-sync " ;
}
} else {
D_ASSERT ( ( n_oos - mdev - > rs_failed ) = = 0 ) ;
if ( os . conn = = C_SYNC_TARGET | | os . conn = = C_PAUSED_SYNC_T )
khelper_cmd = " after-resync-target " ;
2011-03-23 16:31:09 +03:00
if ( mdev - > tconn - > csums_tfm & & mdev - > rs_total ) {
2009-09-26 03:07:19 +04:00
const unsigned long s = mdev - > rs_same_csum ;
const unsigned long t = mdev - > rs_total ;
const int ratio =
( t = = 0 ) ? 0 :
( t < 100000 ) ? ( ( s * 100 ) / t ) : ( s / ( t / 100 ) ) ;
2011-05-21 20:32:29 +04:00
dev_info ( DEV , " %u %% had equal checksums, eliminated: %luK; "
2009-09-26 03:07:19 +04:00
" transferred %luK total %luK \n " ,
ratio ,
Bit2KB ( mdev - > rs_same_csum ) ,
Bit2KB ( mdev - > rs_total - mdev - > rs_same_csum ) ,
Bit2KB ( mdev - > rs_total ) ) ;
}
}
if ( mdev - > rs_failed ) {
dev_info ( DEV , " %lu failed blocks \n " , mdev - > rs_failed ) ;
if ( os . conn = = C_SYNC_TARGET | | os . conn = = C_PAUSED_SYNC_T ) {
ns . disk = D_INCONSISTENT ;
ns . pdsk = D_UP_TO_DATE ;
} else {
ns . disk = D_UP_TO_DATE ;
ns . pdsk = D_INCONSISTENT ;
}
} else {
ns . disk = D_UP_TO_DATE ;
ns . pdsk = D_UP_TO_DATE ;
if ( os . conn = = C_SYNC_TARGET | | os . conn = = C_PAUSED_SYNC_T ) {
if ( mdev - > p_uuid ) {
int i ;
for ( i = UI_BITMAP ; i < = UI_HISTORY_END ; i + + )
_drbd_uuid_set ( mdev , i , mdev - > p_uuid [ i ] ) ;
drbd_uuid_set ( mdev , UI_BITMAP , mdev - > ldev - > md . uuid [ UI_CURRENT ] ) ;
_drbd_uuid_set ( mdev , UI_CURRENT , mdev - > p_uuid [ UI_CURRENT ] ) ;
} else {
dev_err ( DEV , " mdev->p_uuid is NULL! BUG \n " ) ;
}
}
2011-01-20 15:25:21 +03:00
if ( ! ( os . conn = = C_VERIFY_S | | os . conn = = C_VERIFY_T ) ) {
/* for verify runs, we don't update uuids here,
* so there would be nothing to report . */
drbd_uuid_set_bm ( mdev , 0UL ) ;
drbd_print_uuids ( mdev , " updated UUIDs " ) ;
if ( mdev - > p_uuid ) {
/* Now the two UUID sets are equal, update what we
* know of the peer . */
int i ;
for ( i = UI_CURRENT ; i < = UI_HISTORY_END ; i + + )
mdev - > p_uuid [ i ] = mdev - > ldev - > md . uuid [ i ] ;
}
2009-09-26 03:07:19 +04:00
}
}
_drbd_set_state ( mdev , ns , CS_VERBOSE , NULL ) ;
out_unlock :
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
put_ldev ( mdev ) ;
out :
mdev - > rs_total = 0 ;
mdev - > rs_failed = 0 ;
mdev - > rs_paused = 0 ;
2010-11-05 11:56:33 +03:00
if ( verify_done )
mdev - > ov_start_sector = 0 ;
2009-09-26 03:07:19 +04:00
2010-10-13 19:37:54 +04:00
drbd_md_sync ( mdev ) ;
2009-09-26 03:07:19 +04:00
if ( khelper_cmd )
drbd_khelper ( mdev , khelper_cmd ) ;
return 1 ;
}
/* helper */
2011-02-04 17:57:48 +03:00
static void move_to_net_ee_or_free ( struct drbd_conf * mdev , struct drbd_peer_request * peer_req )
2009-09-26 03:07:19 +04:00
{
2011-02-04 17:57:48 +03:00
if ( drbd_ee_has_active_page ( peer_req ) ) {
2009-09-26 03:07:19 +04:00
/* This might happen if sendpage() has not finished */
2011-02-04 17:57:48 +03:00
int i = ( peer_req - > i . size + PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
2010-09-06 14:30:25 +04:00
atomic_add ( i , & mdev - > pp_in_use_by_net ) ;
atomic_sub ( i , & mdev - > pp_in_use ) ;
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2011-02-04 17:57:48 +03:00
list_add_tail ( & peer_req - > w . list , & mdev - > net_ee ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2010-09-06 14:30:25 +04:00
wake_up ( & drbd_pp_wait ) ;
2009-09-26 03:07:19 +04:00
} else
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
}
/**
* w_e_end_data_req ( ) - Worker callback , to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
* @ mdev : DRBD device .
* @ w : work object .
* @ cancel : The connection will be closed anyways
*/
2011-02-09 20:09:48 +03:00
int w_e_end_data_req ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req = container_of ( w , struct drbd_peer_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
int ok ;
if ( unlikely ( cancel ) ) {
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
dec_unacked ( mdev ) ;
return 1 ;
}
2011-02-04 17:57:48 +03:00
if ( likely ( ( peer_req - > flags & EE_WAS_ERROR ) = = 0 ) ) {
ok = drbd_send_block ( mdev , P_DATA_REPLY , peer_req ) ;
2009-09-26 03:07:19 +04:00
} else {
if ( __ratelimit ( & drbd_ratelimit_state ) )
dev_err ( DEV , " Sending NegDReply. sector=%llus. \n " ,
2011-02-04 17:57:48 +03:00
( unsigned long long ) peer_req - > i . sector ) ;
2009-09-26 03:07:19 +04:00
2011-03-16 17:39:08 +03:00
ok = ! drbd_send_ack ( mdev , P_NEG_DREPLY , peer_req ) ;
2009-09-26 03:07:19 +04:00
}
dec_unacked ( mdev ) ;
2011-02-04 17:57:48 +03:00
move_to_net_ee_or_free ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
if ( unlikely ( ! ok ) )
dev_err ( DEV , " drbd_send_block() failed \n " ) ;
return ok ;
}
/**
* w_e_end_rsdata_req ( ) - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
* @ mdev : DRBD device .
* @ w : work object .
* @ cancel : The connection will be closed anyways
*/
2011-02-09 20:09:48 +03:00
int w_e_end_rsdata_req ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req = container_of ( w , struct drbd_peer_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
int ok ;
if ( unlikely ( cancel ) ) {
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
dec_unacked ( mdev ) ;
return 1 ;
}
if ( get_ldev_if_state ( mdev , D_FAILED ) ) {
2011-02-04 17:57:48 +03:00
drbd_rs_complete_io ( mdev , peer_req - > i . sector ) ;
2009-09-26 03:07:19 +04:00
put_ldev ( mdev ) ;
}
2010-12-27 12:53:28 +03:00
if ( mdev - > state . conn = = C_AHEAD ) {
2011-03-16 17:39:08 +03:00
ok = ! drbd_send_ack ( mdev , P_RS_CANCEL , peer_req ) ;
2011-02-04 17:57:48 +03:00
} else if ( likely ( ( peer_req - > flags & EE_WAS_ERROR ) = = 0 ) ) {
2009-09-26 03:07:19 +04:00
if ( likely ( mdev - > state . pdsk > = D_INCONSISTENT ) ) {
inc_rs_pending ( mdev ) ;
2011-02-04 17:57:48 +03:00
ok = drbd_send_block ( mdev , P_RS_DATA_REPLY , peer_req ) ;
2009-09-26 03:07:19 +04:00
} else {
if ( __ratelimit ( & drbd_ratelimit_state ) )
dev_err ( DEV , " Not sending RSDataReply, "
" partner DISKLESS! \n " ) ;
ok = 1 ;
}
} else {
if ( __ratelimit ( & drbd_ratelimit_state ) )
dev_err ( DEV , " Sending NegRSDReply. sector %llus. \n " ,
2011-02-04 17:57:48 +03:00
( unsigned long long ) peer_req - > i . sector ) ;
2009-09-26 03:07:19 +04:00
2011-03-16 17:39:08 +03:00
ok = ! drbd_send_ack ( mdev , P_NEG_RS_DREPLY , peer_req ) ;
2009-09-26 03:07:19 +04:00
/* update resync data with failure */
2011-02-04 17:57:48 +03:00
drbd_rs_failed_io ( mdev , peer_req - > i . sector , peer_req - > i . size ) ;
2009-09-26 03:07:19 +04:00
}
dec_unacked ( mdev ) ;
2011-02-04 17:57:48 +03:00
move_to_net_ee_or_free ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
if ( unlikely ( ! ok ) )
dev_err ( DEV , " drbd_send_block() failed \n " ) ;
return ok ;
}
2011-02-09 20:09:48 +03:00
int w_e_end_csum_rs_req ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req = container_of ( w , struct drbd_peer_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
struct digest_info * di ;
int digest_size ;
void * digest = NULL ;
int ok , eq = 0 ;
if ( unlikely ( cancel ) ) {
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
dec_unacked ( mdev ) ;
return 1 ;
}
2010-09-05 03:13:24 +04:00
if ( get_ldev ( mdev ) ) {
2011-02-04 17:57:48 +03:00
drbd_rs_complete_io ( mdev , peer_req - > i . sector ) ;
2010-09-05 03:13:24 +04:00
put_ldev ( mdev ) ;
}
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
di = peer_req - > digest ;
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
if ( likely ( ( peer_req - > flags & EE_WAS_ERROR ) = = 0 ) ) {
2009-09-26 03:07:19 +04:00
/* quick hack to try to avoid a race against reconfiguration.
* a real fix would be much more involved ,
* introducing more locking mechanisms */
2011-03-23 16:31:09 +03:00
if ( mdev - > tconn - > csums_tfm ) {
digest_size = crypto_hash_digestsize ( mdev - > tconn - > csums_tfm ) ;
2009-09-26 03:07:19 +04:00
D_ASSERT ( digest_size = = di - > digest_size ) ;
digest = kmalloc ( digest_size , GFP_NOIO ) ;
}
if ( digest ) {
2011-03-23 16:31:09 +03:00
drbd_csum_ee ( mdev , mdev - > tconn - > csums_tfm , peer_req , digest ) ;
2009-09-26 03:07:19 +04:00
eq = ! memcmp ( digest , di - > digest , digest_size ) ;
kfree ( digest ) ;
}
if ( eq ) {
2011-02-04 17:57:48 +03:00
drbd_set_in_sync ( mdev , peer_req - > i . sector , peer_req - > i . size ) ;
2010-03-03 04:08:22 +03:00
/* rs_same_csums unit is BM_BLOCK_SIZE */
2011-02-04 17:57:48 +03:00
mdev - > rs_same_csum + = peer_req - > i . size > > BM_BLOCK_SHIFT ;
2011-03-16 17:39:08 +03:00
ok = ! drbd_send_ack ( mdev , P_RS_IS_IN_SYNC , peer_req ) ;
2009-09-26 03:07:19 +04:00
} else {
inc_rs_pending ( mdev ) ;
2011-02-04 17:57:48 +03:00
peer_req - > block_id = ID_SYNCER ; /* By setting block_id, digest pointer becomes invalid! */
peer_req - > flags & = ~ EE_HAS_DIGEST ; /* This peer request no longer has a digest pointer */
2010-08-23 18:17:13 +04:00
kfree ( di ) ;
2011-02-04 17:57:48 +03:00
ok = drbd_send_block ( mdev , P_RS_DATA_REPLY , peer_req ) ;
2009-09-26 03:07:19 +04:00
}
} else {
2011-03-16 17:39:08 +03:00
ok = ! drbd_send_ack ( mdev , P_NEG_RS_DREPLY , peer_req ) ;
2009-09-26 03:07:19 +04:00
if ( __ratelimit ( & drbd_ratelimit_state ) )
dev_err ( DEV , " Sending NegDReply. I guess it gets messy. \n " ) ;
}
dec_unacked ( mdev ) ;
2011-02-04 17:57:48 +03:00
move_to_net_ee_or_free ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
if ( unlikely ( ! ok ) )
dev_err ( DEV , " drbd_send_block/ack() failed \n " ) ;
return ok ;
}
2011-02-09 20:09:48 +03:00
int w_e_end_ov_req ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req = container_of ( w , struct drbd_peer_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2011-02-04 17:57:48 +03:00
sector_t sector = peer_req - > i . sector ;
unsigned int size = peer_req - > i . size ;
2009-09-26 03:07:19 +04:00
int digest_size ;
void * digest ;
int ok = 1 ;
if ( unlikely ( cancel ) )
goto out ;
2011-03-23 16:31:09 +03:00
digest_size = crypto_hash_digestsize ( mdev - > tconn - > verify_tfm ) ;
2009-09-26 03:07:19 +04:00
digest = kmalloc ( digest_size , GFP_NOIO ) ;
2011-03-01 17:52:35 +03:00
if ( ! digest ) {
ok = 0 ; /* terminate the connection in case the allocation failed */
goto out ;
2009-09-26 03:07:19 +04:00
}
2011-02-04 17:57:48 +03:00
if ( likely ( ! ( peer_req - > flags & EE_WAS_ERROR ) ) )
2011-03-23 16:31:09 +03:00
drbd_csum_ee ( mdev , mdev - > tconn - > verify_tfm , peer_req , digest ) ;
2011-03-01 17:52:35 +03:00
else
memset ( digest , 0 , digest_size ) ;
2011-03-08 19:11:40 +03:00
/* Free e and pages before send.
* In case we block on congestion , we could otherwise run into
* some distributed deadlock , if the other side blocks on
* congestion as well , because our receiver blocks in
* drbd_pp_alloc due to pp_in_use > max_buffers . */
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
peer_req = NULL ;
2011-03-01 17:52:35 +03:00
inc_rs_pending ( mdev ) ;
2011-03-08 19:11:40 +03:00
ok = drbd_send_drequest_csum ( mdev , sector , size ,
digest , digest_size ,
P_OV_REPLY ) ;
2011-03-01 17:52:35 +03:00
if ( ! ok )
dec_rs_pending ( mdev ) ;
kfree ( digest ) ;
2009-09-26 03:07:19 +04:00
out :
2011-02-04 17:57:48 +03:00
if ( peer_req )
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
dec_unacked ( mdev ) ;
return ok ;
}
void drbd_ov_oos_found ( struct drbd_conf * mdev , sector_t sector , int size )
{
if ( mdev - > ov_last_oos_start + mdev - > ov_last_oos_size = = sector ) {
mdev - > ov_last_oos_size + = size > > 9 ;
} else {
mdev - > ov_last_oos_start = sector ;
mdev - > ov_last_oos_size = size > > 9 ;
}
drbd_set_out_of_sync ( mdev , sector , size ) ;
}
2011-02-09 20:09:48 +03:00
int w_e_end_ov_reply ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-04 17:57:48 +03:00
struct drbd_peer_request * peer_req = container_of ( w , struct drbd_peer_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
struct digest_info * di ;
void * digest ;
2011-02-04 17:57:48 +03:00
sector_t sector = peer_req - > i . sector ;
unsigned int size = peer_req - > i . size ;
2011-03-08 19:11:40 +03:00
int digest_size ;
2009-09-26 03:07:19 +04:00
int ok , eq = 0 ;
if ( unlikely ( cancel ) ) {
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
dec_unacked ( mdev ) ;
return 1 ;
}
/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
* the resync lru has been cleaned up already */
2010-09-05 03:13:24 +04:00
if ( get_ldev ( mdev ) ) {
2011-02-04 17:57:48 +03:00
drbd_rs_complete_io ( mdev , peer_req - > i . sector ) ;
2010-09-05 03:13:24 +04:00
put_ldev ( mdev ) ;
}
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
di = peer_req - > digest ;
2009-09-26 03:07:19 +04:00
2011-02-04 17:57:48 +03:00
if ( likely ( ( peer_req - > flags & EE_WAS_ERROR ) = = 0 ) ) {
2011-03-23 16:31:09 +03:00
digest_size = crypto_hash_digestsize ( mdev - > tconn - > verify_tfm ) ;
2009-09-26 03:07:19 +04:00
digest = kmalloc ( digest_size , GFP_NOIO ) ;
if ( digest ) {
2011-03-23 16:31:09 +03:00
drbd_csum_ee ( mdev , mdev - > tconn - > verify_tfm , peer_req , digest ) ;
2009-09-26 03:07:19 +04:00
D_ASSERT ( digest_size = = di - > digest_size ) ;
eq = ! memcmp ( digest , di - > digest , digest_size ) ;
kfree ( digest ) ;
}
}
2011-02-22 16:02:31 +03:00
/* Free peer_req and pages before send.
* In case we block on congestion , we could otherwise run into
* some distributed deadlock , if the other side blocks on
* congestion as well , because our receiver blocks in
* drbd_pp_alloc due to pp_in_use > max_buffers . */
2011-02-04 17:57:48 +03:00
drbd_free_ee ( mdev , peer_req ) ;
2009-09-26 03:07:19 +04:00
if ( ! eq )
2011-03-08 19:11:40 +03:00
drbd_ov_oos_found ( mdev , sector , size ) ;
2009-09-26 03:07:19 +04:00
else
ov_oos_print ( mdev ) ;
2011-03-16 03:31:39 +03:00
ok = ! drbd_send_ack_ex ( mdev , P_OV_RESULT , sector , size ,
eq ? ID_IN_SYNC : ID_OUT_OF_SYNC ) ;
2009-09-26 03:07:19 +04:00
2011-03-08 19:11:40 +03:00
dec_unacked ( mdev ) ;
2009-09-26 03:07:19 +04:00
2010-11-05 11:48:01 +03:00
- - mdev - > ov_left ;
/* let's advance progress step marks only for every other megabyte */
if ( ( mdev - > ov_left & 0x200 ) = = 0x200 )
drbd_advance_rs_marks ( mdev , mdev - > ov_left ) ;
if ( mdev - > ov_left = = 0 ) {
2009-09-26 03:07:19 +04:00
ov_oos_print ( mdev ) ;
drbd_resync_finished ( mdev ) ;
}
return ok ;
}
2011-02-09 20:09:48 +03:00
int w_prev_work_done ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
struct drbd_wq_barrier * b = container_of ( w , struct drbd_wq_barrier , w ) ;
2011-02-09 20:09:48 +03:00
2009-09-26 03:07:19 +04:00
complete ( & b - > done ) ;
return 1 ;
}
2011-02-09 20:09:48 +03:00
int w_send_barrier ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
struct drbd_tl_epoch * b = container_of ( w , struct drbd_tl_epoch , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2011-01-19 15:55:45 +03:00
struct p_barrier * p = & mdev - > tconn - > data . sbuf . barrier ;
2009-09-26 03:07:19 +04:00
int ok = 1 ;
/* really avoid racing with tl_clear. w.cb may have been referenced
* just before it was reassigned and re - queued , so double check that .
* actually , this race was harmless , since we only try to send the
* barrier packet here , and otherwise do nothing with the object .
* but compare with the head of w_clear_epoch */
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
if ( w - > cb ! = w_send_barrier | | mdev - > state . conn < C_CONNECTED )
cancel = 1 ;
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2009-09-26 03:07:19 +04:00
if ( cancel )
return 1 ;
2011-03-15 18:15:10 +03:00
if ( drbd_get_data_sock ( mdev - > tconn ) )
2009-09-26 03:07:19 +04:00
return 0 ;
p - > barrier = b - > br_number ;
/* inc_ap_pending was done where this was queued.
* dec_ap_pending will be done in got_BarrierAck
* or ( on connection loss ) in w_clear_epoch . */
2011-03-16 01:51:21 +03:00
ok = ! _drbd_send_cmd ( mdev , mdev - > tconn - > data . socket , P_BARRIER ,
& p - > head , sizeof ( * p ) , 0 ) ;
2011-02-08 11:50:54 +03:00
drbd_put_data_sock ( mdev - > tconn ) ;
2009-09-26 03:07:19 +04:00
return ok ;
}
2011-02-09 20:09:48 +03:00
int w_send_write_hint ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
if ( cancel )
return 1 ;
return drbd_send_short_cmd ( mdev , P_UNPLUG_REMOTE ) ;
}
2011-02-09 20:09:48 +03:00
int w_send_oos ( struct drbd_work * w , int cancel )
2010-10-27 16:33:00 +04:00
{
struct drbd_request * req = container_of ( w , struct drbd_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2010-10-27 16:33:00 +04:00
int ok ;
if ( unlikely ( cancel ) ) {
2011-01-25 17:37:43 +03:00
req_mod ( req , SEND_CANCELED ) ;
2010-10-27 16:33:00 +04:00
return 1 ;
}
ok = drbd_send_oos ( mdev , req ) ;
2011-01-25 17:37:43 +03:00
req_mod ( req , OOS_HANDED_TO_NETWORK ) ;
2010-10-27 16:33:00 +04:00
return ok ;
}
2009-09-26 03:07:19 +04:00
/**
* w_send_dblock ( ) - Worker callback to send a P_DATA packet in order to mirror a write request
* @ mdev : DRBD device .
* @ w : work object .
* @ cancel : The connection will be closed anyways
*/
2011-02-09 20:09:48 +03:00
int w_send_dblock ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
struct drbd_request * req = container_of ( w , struct drbd_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
int ok ;
if ( unlikely ( cancel ) ) {
2011-01-25 17:37:43 +03:00
req_mod ( req , SEND_CANCELED ) ;
2009-09-26 03:07:19 +04:00
return 1 ;
}
ok = drbd_send_dblock ( mdev , req ) ;
2011-01-25 17:37:43 +03:00
req_mod ( req , ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED ) ;
2009-09-26 03:07:19 +04:00
return ok ;
}
/**
* w_send_read_req ( ) - Worker callback to send a read request ( P_DATA_REQUEST ) packet
* @ mdev : DRBD device .
* @ w : work object .
* @ cancel : The connection will be closed anyways
*/
2011-02-09 20:09:48 +03:00
int w_send_read_req ( struct drbd_work * w , int cancel )
2009-09-26 03:07:19 +04:00
{
struct drbd_request * req = container_of ( w , struct drbd_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2009-09-26 03:07:19 +04:00
int ok ;
if ( unlikely ( cancel ) ) {
2011-01-25 17:37:43 +03:00
req_mod ( req , SEND_CANCELED ) ;
2009-09-26 03:07:19 +04:00
return 1 ;
}
2011-01-03 19:09:58 +03:00
ok = drbd_send_drequest ( mdev , P_DATA_REQUEST , req - > i . sector , req - > i . size ,
2009-09-26 03:07:19 +04:00
( unsigned long ) req ) ;
2011-01-25 17:37:43 +03:00
req_mod ( req , ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED ) ;
2009-09-26 03:07:19 +04:00
return ok ;
}
2011-02-09 20:09:48 +03:00
int w_restart_disk_io ( struct drbd_work * w , int cancel )
2010-05-31 12:14:17 +04:00
{
struct drbd_request * req = container_of ( w , struct drbd_request , w ) ;
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2010-05-31 12:14:17 +04:00
2010-08-31 14:00:50 +04:00
if ( bio_data_dir ( req - > master_bio ) = = WRITE & & req - > rq_state & RQ_IN_ACT_LOG )
2011-01-03 19:09:58 +03:00
drbd_al_begin_io ( mdev , req - > i . sector ) ;
2010-05-31 12:14:17 +04:00
/* Calling drbd_al_begin_io() out of the worker might deadlocks
theoretically . Practically it can not deadlock , since this is
only used when unfreezing IOs . All the extents of the requests
that made it into the TL are already active */
drbd_req_make_private_bio ( req , req - > master_bio ) ;
req - > private_bio - > bi_bdev = mdev - > ldev - > backing_bdev ;
generic_make_request ( req - > private_bio ) ;
return 1 ;
}
2009-09-26 03:07:19 +04:00
static int _drbd_may_sync_now ( struct drbd_conf * mdev )
{
struct drbd_conf * odev = mdev ;
while ( 1 ) {
2011-03-23 16:31:09 +03:00
if ( odev - > ldev - > dc . resync_after = = - 1 )
2009-09-26 03:07:19 +04:00
return 1 ;
2011-03-23 16:31:09 +03:00
odev = minor_to_mdev ( odev - > ldev - > dc . resync_after ) ;
2010-12-15 21:31:20 +03:00
if ( ! expect ( odev ) )
return 1 ;
2009-09-26 03:07:19 +04:00
if ( ( odev - > state . conn > = C_SYNC_SOURCE & &
odev - > state . conn < = C_PAUSED_SYNC_T ) | |
odev - > state . aftr_isp | | odev - > state . peer_isp | |
odev - > state . user_isp )
return 0 ;
}
}
/**
* _drbd_pause_after ( ) - Pause resync on all devices that may not resync now
* @ mdev : DRBD device .
*
* Called from process context only ( admin command and after_state_ch ) .
*/
static int _drbd_pause_after ( struct drbd_conf * mdev )
{
struct drbd_conf * odev ;
int i , rv = 0 ;
2011-02-23 03:53:16 +03:00
idr_for_each_entry ( & minors , odev , i ) {
2009-09-26 03:07:19 +04:00
if ( odev - > state . conn = = C_STANDALONE & & odev - > state . disk = = D_DISKLESS )
continue ;
if ( ! _drbd_may_sync_now ( odev ) )
rv | = ( __drbd_set_state ( _NS ( odev , aftr_isp , 1 ) , CS_HARD , NULL )
! = SS_NOTHING_TO_DO ) ;
}
return rv ;
}
/**
* _drbd_resume_next ( ) - Resume resync on all devices that may resync now
* @ mdev : DRBD device .
*
* Called from process context only ( admin command and worker ) .
*/
static int _drbd_resume_next ( struct drbd_conf * mdev )
{
struct drbd_conf * odev ;
int i , rv = 0 ;
2011-02-23 03:53:16 +03:00
idr_for_each_entry ( & minors , odev , i ) {
2009-09-26 03:07:19 +04:00
if ( odev - > state . conn = = C_STANDALONE & & odev - > state . disk = = D_DISKLESS )
continue ;
if ( odev - > state . aftr_isp ) {
if ( _drbd_may_sync_now ( odev ) )
rv | = ( __drbd_set_state ( _NS ( odev , aftr_isp , 0 ) ,
CS_HARD , NULL )
! = SS_NOTHING_TO_DO ) ;
}
}
return rv ;
}
void resume_next_sg ( struct drbd_conf * mdev )
{
write_lock_irq ( & global_state_lock ) ;
_drbd_resume_next ( mdev ) ;
write_unlock_irq ( & global_state_lock ) ;
}
void suspend_other_sg ( struct drbd_conf * mdev )
{
write_lock_irq ( & global_state_lock ) ;
_drbd_pause_after ( mdev ) ;
write_unlock_irq ( & global_state_lock ) ;
}
static int sync_after_error ( struct drbd_conf * mdev , int o_minor )
{
struct drbd_conf * odev ;
if ( o_minor = = - 1 )
return NO_ERROR ;
if ( o_minor < - 1 | | minor_to_mdev ( o_minor ) = = NULL )
return ERR_SYNC_AFTER ;
/* check for loops */
odev = minor_to_mdev ( o_minor ) ;
while ( 1 ) {
if ( odev = = mdev )
return ERR_SYNC_AFTER_CYCLE ;
/* dependency chain ends here, no cycles. */
2011-03-23 16:31:09 +03:00
if ( odev - > ldev - > dc . resync_after = = - 1 )
2009-09-26 03:07:19 +04:00
return NO_ERROR ;
/* follow the dependency chain */
2011-03-23 16:31:09 +03:00
odev = minor_to_mdev ( odev - > ldev - > dc . resync_after ) ;
2009-09-26 03:07:19 +04:00
}
}
int drbd_alter_sa ( struct drbd_conf * mdev , int na )
{
int changes ;
int retcode ;
write_lock_irq ( & global_state_lock ) ;
retcode = sync_after_error ( mdev , na ) ;
if ( retcode = = NO_ERROR ) {
2011-03-23 16:31:09 +03:00
mdev - > ldev - > dc . resync_after = na ;
2009-09-26 03:07:19 +04:00
do {
changes = _drbd_pause_after ( mdev ) ;
changes | = _drbd_resume_next ( mdev ) ;
} while ( changes ) ;
}
write_unlock_irq ( & global_state_lock ) ;
return retcode ;
}
2010-11-05 11:55:18 +03:00
void drbd_rs_controller_reset ( struct drbd_conf * mdev )
{
atomic_set ( & mdev - > rs_sect_in , 0 ) ;
atomic_set ( & mdev - > rs_sect_ev , 0 ) ;
mdev - > rs_in_flight = 0 ;
mdev - > rs_planed = 0 ;
spin_lock ( & mdev - > peer_seq_lock ) ;
fifo_set ( & mdev - > rs_plan_s , 0 ) ;
spin_unlock ( & mdev - > peer_seq_lock ) ;
}
2011-02-07 13:33:59 +03:00
void start_resync_timer_fn ( unsigned long data )
{
struct drbd_conf * mdev = ( struct drbd_conf * ) data ;
drbd_queue_work ( & mdev - > tconn - > data . work , & mdev - > start_resync_work ) ;
}
2011-02-09 20:09:48 +03:00
int w_start_resync ( struct drbd_work * w , int cancel )
2011-02-07 13:33:59 +03:00
{
2011-02-09 20:09:48 +03:00
struct drbd_conf * mdev = w - > mdev ;
2011-02-07 13:33:59 +03:00
if ( atomic_read ( & mdev - > unacked_cnt ) | | atomic_read ( & mdev - > rs_pending_cnt ) ) {
dev_warn ( DEV , " w_start_resync later... \n " ) ;
mdev - > start_resync_timer . expires = jiffies + HZ / 10 ;
add_timer ( & mdev - > start_resync_timer ) ;
return 1 ;
}
drbd_start_resync ( mdev , C_SYNC_SOURCE ) ;
clear_bit ( AHEAD_TO_SYNC_SOURCE , & mdev - > current_epoch - > flags ) ;
return 1 ;
}
2009-09-26 03:07:19 +04:00
/**
* drbd_start_resync ( ) - Start the resync process
* @ mdev : DRBD device .
* @ side : Either C_SYNC_SOURCE or C_SYNC_TARGET
*
* This function might bring you directly into one of the
* C_PAUSED_SYNC_ * states .
*/
void drbd_start_resync ( struct drbd_conf * mdev , enum drbd_conns side )
{
union drbd_state ns ;
int r ;
2010-10-27 19:32:36 +04:00
if ( mdev - > state . conn > = C_SYNC_SOURCE & & mdev - > state . conn < C_AHEAD ) {
2009-09-26 03:07:19 +04:00
dev_err ( DEV , " Resync already running! \n " ) ;
return ;
}
2010-10-29 14:44:20 +04:00
if ( mdev - > state . conn < C_AHEAD ) {
/* In case a previous resync run was aborted by an IO error/detach on the peer. */
drbd_rs_cancel_all ( mdev ) ;
/* This should be done when we abort the resync. We definitely do not
want to have this for connections going back and forth between
Ahead / Behind and SyncSource / SyncTarget */
}
2009-09-26 03:07:19 +04:00
2011-02-05 19:34:11 +03:00
if ( ! test_bit ( B_RS_H_DONE , & mdev - > flags ) ) {
if ( side = = C_SYNC_TARGET ) {
/* Since application IO was locked out during C_WF_BITMAP_T and
C_WF_SYNC_UUID we are still unmodified . Before going to C_SYNC_TARGET
we check that we might make the data inconsistent . */
r = drbd_khelper ( mdev , " before-resync-target " ) ;
r = ( r > > 8 ) & 0xff ;
if ( r > 0 ) {
dev_info ( DEV , " before-resync-target handler returned %d, "
2010-12-03 18:04:24 +03:00
" dropping connection. \n " , r ) ;
2011-03-15 20:24:49 +03:00
conn_request_state ( mdev - > tconn , NS ( conn , C_DISCONNECTING ) , CS_HARD ) ;
2010-12-03 18:04:24 +03:00
return ;
}
2011-02-05 19:34:11 +03:00
} else /* C_SYNC_SOURCE */ {
r = drbd_khelper ( mdev , " before-resync-source " ) ;
r = ( r > > 8 ) & 0xff ;
if ( r > 0 ) {
if ( r = = 3 ) {
dev_info ( DEV , " before-resync-source handler returned %d, "
" ignoring. Old userland tools? " , r ) ;
} else {
dev_info ( DEV , " before-resync-source handler returned %d, "
" dropping connection. \n " , r ) ;
2011-03-15 20:24:49 +03:00
conn_request_state ( mdev - > tconn , NS ( conn , C_DISCONNECTING ) , CS_HARD ) ;
2011-02-05 19:34:11 +03:00
return ;
}
}
2010-12-03 18:04:24 +03:00
}
2009-09-26 03:07:19 +04:00
}
2011-02-05 19:34:11 +03:00
if ( current = = mdev - > tconn - > worker . task ) {
2011-02-11 21:43:55 +03:00
/* The worker should not sleep waiting for state_mutex,
2011-02-05 19:34:11 +03:00
that can take long */
2011-02-11 22:11:10 +03:00
if ( ! mutex_trylock ( mdev - > state_mutex ) ) {
2011-02-05 19:34:11 +03:00
set_bit ( B_RS_H_DONE , & mdev - > flags ) ;
mdev - > start_resync_timer . expires = jiffies + HZ / 5 ;
add_timer ( & mdev - > start_resync_timer ) ;
return ;
}
} else {
2011-02-11 22:11:10 +03:00
mutex_lock ( mdev - > state_mutex ) ;
2011-02-05 19:34:11 +03:00
}
clear_bit ( B_RS_H_DONE , & mdev - > flags ) ;
2009-09-26 03:07:19 +04:00
if ( ! get_ldev_if_state ( mdev , D_NEGOTIATING ) ) {
2011-02-11 22:11:10 +03:00
mutex_unlock ( mdev - > state_mutex ) ;
2009-09-26 03:07:19 +04:00
return ;
}
write_lock_irq ( & global_state_lock ) ;
ns = mdev - > state ;
ns . aftr_isp = ! _drbd_may_sync_now ( mdev ) ;
ns . conn = side ;
if ( side = = C_SYNC_TARGET )
ns . disk = D_INCONSISTENT ;
else /* side == C_SYNC_SOURCE */
ns . pdsk = D_INCONSISTENT ;
r = __drbd_set_state ( mdev , ns , CS_VERBOSE , NULL ) ;
ns = mdev - > state ;
if ( ns . conn < C_CONNECTED )
r = SS_UNKNOWN_ERROR ;
if ( r = = SS_SUCCESS ) {
2010-08-11 23:21:50 +04:00
unsigned long tw = drbd_bm_total_weight ( mdev ) ;
unsigned long now = jiffies ;
int i ;
2009-09-26 03:07:19 +04:00
mdev - > rs_failed = 0 ;
mdev - > rs_paused = 0 ;
mdev - > rs_same_csum = 0 ;
2010-08-12 01:40:24 +04:00
mdev - > rs_last_events = 0 ;
mdev - > rs_last_sect_ev = 0 ;
2010-08-11 23:21:50 +04:00
mdev - > rs_total = tw ;
mdev - > rs_start = now ;
for ( i = 0 ; i < DRBD_SYNC_MARKS ; i + + ) {
mdev - > rs_mark_left [ i ] = tw ;
mdev - > rs_mark_time [ i ] = now ;
}
2009-09-26 03:07:19 +04:00
_drbd_pause_after ( mdev ) ;
}
write_unlock_irq ( & global_state_lock ) ;
2010-12-17 23:14:23 +03:00
2009-09-26 03:07:19 +04:00
if ( r = = SS_SUCCESS ) {
dev_info ( DEV , " Began resync as %s (will sync %lu KB [%lu bits set]). \n " ,
drbd_conn_str ( ns . conn ) ,
( unsigned long ) mdev - > rs_total < < ( BM_BLOCK_SHIFT - 10 ) ,
( unsigned long ) mdev - > rs_total ) ;
2011-01-12 13:51:13 +03:00
if ( side = = C_SYNC_TARGET )
mdev - > bm_resync_fo = 0 ;
/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
* with w_send_oos , or the sync target will get confused as to
* how much bits to resync . We cannot do that always , because for an
* empty resync and protocol < 95 , we need to do it here , as we call
* drbd_resync_finished from here in that case .
* We drbd_gen_and_send_sync_uuid here for protocol < 96 ,
* and from after_state_ch otherwise . */
2011-01-19 16:12:51 +03:00
if ( side = = C_SYNC_SOURCE & & mdev - > tconn - > agreed_pro_version < 96 )
2011-01-12 13:51:13 +03:00
drbd_gen_and_send_sync_uuid ( mdev ) ;
2009-09-26 03:07:19 +04:00
2011-01-19 16:12:51 +03:00
if ( mdev - > tconn - > agreed_pro_version < 95 & & mdev - > rs_total = = 0 ) {
2010-10-07 18:07:55 +04:00
/* This still has a race (about when exactly the peers
* detect connection loss ) that can lead to a full sync
* on next handshake . In 8.3 .9 we fixed this with explicit
* resync - finished notifications , but the fix
* introduces a protocol change . Sleeping for some
* time longer than the ping interval + timeout on the
* SyncSource , to give the SyncTarget the chance to
* detect connection loss , then waiting for a ping
* response ( implicit in drbd_resync_finished ) reduces
* the race considerably , but does not solve it . */
if ( side = = C_SYNC_SOURCE )
schedule_timeout_interruptible (
2011-01-19 15:12:45 +03:00
mdev - > tconn - > net_conf - > ping_int * HZ +
mdev - > tconn - > net_conf - > ping_timeo * HZ / 9 ) ;
2009-09-26 03:07:19 +04:00
drbd_resync_finished ( mdev ) ;
}
2010-11-05 11:55:18 +03:00
drbd_rs_controller_reset ( mdev ) ;
2009-09-26 03:07:19 +04:00
/* ns.conn may already be != mdev->state.conn,
* we may have been paused in between , or become paused until
* the timer triggers .
* No matter , that is handled in resync_timer_fn ( ) */
if ( ns . conn = = C_SYNC_TARGET )
mod_timer ( & mdev - > resync_timer , jiffies ) ;
drbd_md_sync ( mdev ) ;
}
2010-12-17 23:14:23 +03:00
put_ldev ( mdev ) ;
2011-02-11 22:11:10 +03:00
mutex_unlock ( mdev - > state_mutex ) ;
2009-09-26 03:07:19 +04:00
}
int drbd_worker ( struct drbd_thread * thi )
{
2011-02-09 12:33:31 +03:00
struct drbd_tconn * tconn = thi - > tconn ;
2009-09-26 03:07:19 +04:00
struct drbd_work * w = NULL ;
2011-02-18 16:23:11 +03:00
struct drbd_conf * mdev ;
2009-09-26 03:07:19 +04:00
LIST_HEAD ( work_list ) ;
2011-03-23 16:31:09 +03:00
int vnr , intr = 0 ;
2009-09-26 03:07:19 +04:00
2011-01-25 17:43:39 +03:00
while ( get_t_state ( thi ) = = RUNNING ) {
2011-02-08 14:46:30 +03:00
drbd_thread_current_set_cpu ( thi ) ;
2009-09-26 03:07:19 +04:00
2011-02-09 12:09:07 +03:00
if ( down_trylock ( & tconn - > data . work . s ) ) {
mutex_lock ( & tconn - > data . mutex ) ;
if ( tconn - > data . socket & & ! tconn - > net_conf - > no_cork )
drbd_tcp_uncork ( tconn - > data . socket ) ;
mutex_unlock ( & tconn - > data . mutex ) ;
2009-09-26 03:07:19 +04:00
2011-02-09 12:09:07 +03:00
intr = down_interruptible ( & tconn - > data . work . s ) ;
2009-09-26 03:07:19 +04:00
2011-02-09 12:09:07 +03:00
mutex_lock ( & tconn - > data . mutex ) ;
if ( tconn - > data . socket & & ! tconn - > net_conf - > no_cork )
drbd_tcp_cork ( tconn - > data . socket ) ;
mutex_unlock ( & tconn - > data . mutex ) ;
2009-09-26 03:07:19 +04:00
}
if ( intr ) {
flush_signals ( current ) ;
2011-02-09 12:09:07 +03:00
if ( get_t_state ( thi ) = = RUNNING ) {
conn_warn ( tconn , " Worker got an unexpected signal \n " ) ;
2009-09-26 03:07:19 +04:00
continue ;
2011-02-09 12:09:07 +03:00
}
2009-09-26 03:07:19 +04:00
break ;
}
2011-01-25 17:43:39 +03:00
if ( get_t_state ( thi ) ! = RUNNING )
2009-09-26 03:07:19 +04:00
break ;
/* With this break, we have done a down() but not consumed
the entry from the list . The cleanup code takes care of
this . . . */
w = NULL ;
2011-02-09 12:09:07 +03:00
spin_lock_irq ( & tconn - > data . work . q_lock ) ;
if ( list_empty ( & tconn - > data . work . q ) ) {
2009-09-26 03:07:19 +04:00
/* something terribly wrong in our logic.
* we were able to down ( ) the semaphore ,
* but the list is empty . . . doh .
*
* what is the best thing to do now ?
* try again from scratch , restarting the receiver ,
* asender , whatnot ? could break even more ugly ,
* e . g . when we are primary , but no good local data .
*
* I ' ll try to get away just starting over this loop .
*/
2011-02-09 12:09:07 +03:00
conn_warn ( tconn , " Work list unexpectedly empty \n " ) ;
spin_unlock_irq ( & tconn - > data . work . q_lock ) ;
2009-09-26 03:07:19 +04:00
continue ;
}
2011-02-09 12:09:07 +03:00
w = list_entry ( tconn - > data . work . q . next , struct drbd_work , list ) ;
2009-09-26 03:07:19 +04:00
list_del_init ( & w - > list ) ;
2011-02-09 12:09:07 +03:00
spin_unlock_irq ( & tconn - > data . work . q_lock ) ;
2009-09-26 03:07:19 +04:00
2011-02-10 15:45:46 +03:00
if ( ! w - > cb ( w , tconn - > cstate < C_WF_REPORT_PARAMS ) ) {
2009-09-26 03:07:19 +04:00
/* dev_warn(DEV, "worker: a callback failed! \n"); */
2011-02-10 15:45:46 +03:00
if ( tconn - > cstate > = C_WF_REPORT_PARAMS )
conn_request_state ( tconn , NS ( conn , C_NETWORK_FAILURE ) , CS_HARD ) ;
2009-09-26 03:07:19 +04:00
}
}
2011-02-09 12:09:07 +03:00
spin_lock_irq ( & tconn - > data . work . q_lock ) ;
while ( ! list_empty ( & tconn - > data . work . q ) ) {
list_splice_init ( & tconn - > data . work . q , & work_list ) ;
spin_unlock_irq ( & tconn - > data . work . q_lock ) ;
2009-09-26 03:07:19 +04:00
while ( ! list_empty ( & work_list ) ) {
w = list_entry ( work_list . next , struct drbd_work , list ) ;
list_del_init ( & w - > list ) ;
2011-02-09 20:09:48 +03:00
w - > cb ( w , 1 ) ;
2009-09-26 03:07:19 +04:00
}
2011-02-09 12:09:07 +03:00
spin_lock_irq ( & tconn - > data . work . q_lock ) ;
2009-09-26 03:07:19 +04:00
}
2011-02-09 12:09:07 +03:00
sema_init ( & tconn - > data . work . s , 0 ) ;
2009-09-26 03:07:19 +04:00
/* DANGEROUS race: if someone did queue his work within the spinlock,
* but up ( ) ed outside the spinlock , we could get an up ( ) on the
* semaphore without corresponding list entry .
* So don ' t do that .
*/
2011-02-09 12:09:07 +03:00
spin_unlock_irq ( & tconn - > data . work . q_lock ) ;
2009-09-26 03:07:19 +04:00
2011-02-18 16:23:11 +03:00
drbd_thread_stop ( & tconn - > receiver ) ;
2011-03-23 16:31:09 +03:00
idr_for_each_entry ( & tconn - > volumes , mdev , vnr ) {
2011-02-18 16:23:11 +03:00
D_ASSERT ( mdev - > state . disk = = D_DISKLESS & & mdev - > state . conn = = C_STANDALONE ) ;
/* _drbd_set_state only uses stop_nowait.
* wait here for the exiting receiver . */
drbd_mdev_cleanup ( mdev ) ;
}
clear_bit ( OBJECT_DYING , & tconn - > flags ) ;
clear_bit ( CONFIG_PENDING , & tconn - > flags ) ;
wake_up ( & tconn - > ping_wait ) ;
2009-09-26 03:07:19 +04:00
return 0 ;
}