2006-01-18 12:30:29 +03:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2011-11-02 23:30:58 +04:00
* * Copyright ( C ) 2004 - 2011 Red Hat , Inc . All rights reserved .
2006-01-18 12:30:29 +03:00
* *
* * This copyrighted material is made available to anyone wishing to use ,
* * modify , copy , or redistribute it subject to the terms and conditions
* * of the GNU General Public License v .2 .
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include "dlm_internal.h"
# include "lockspace.h"
# include "member.h"
# include "dir.h"
# include "ast.h"
# include "recover.h"
# include "lowcomms.h"
# include "lock.h"
# include "requestqueue.h"
# include "recoverd.h"
/* If the start for which we're re-enabling locking (seq) has been superseded
2007-09-28 00:53:38 +04:00
by a newer stop ( ls_recover_seq ) , we need to leave locking disabled .
We suspend dlm_recv threads here to avoid the race where dlm_recv a ) sees
locking stopped and b ) adds a message to the requestqueue , but dlm_recoverd
enables locking and clears the requestqueue between a and b . */
2006-01-18 12:30:29 +03:00
static int enable_locking ( struct dlm_ls * ls , uint64_t seq )
{
int error = - EINTR ;
2007-09-28 00:53:38 +04:00
down_write ( & ls - > ls_recv_active ) ;
2006-01-18 12:30:29 +03:00
spin_lock ( & ls - > ls_recover_lock ) ;
if ( ls - > ls_recover_seq = = seq ) {
set_bit ( LSFL_RUNNING , & ls - > ls_flags ) ;
2007-09-28 00:53:38 +04:00
/* unblocks processes waiting to enter the dlm */
2006-01-18 12:30:29 +03:00
up_write ( & ls - > ls_in_recovery ) ;
2012-08-02 20:08:21 +04:00
clear_bit ( LSFL_RECOVER_LOCK , & ls - > ls_flags ) ;
2006-01-18 12:30:29 +03:00
error = 0 ;
}
spin_unlock ( & ls - > ls_recover_lock ) ;
2007-09-28 00:53:38 +04:00
up_write ( & ls - > ls_recv_active ) ;
2006-01-18 12:30:29 +03:00
return error ;
}
static int ls_recover ( struct dlm_ls * ls , struct dlm_recover * rv )
{
unsigned long start ;
int error , neg = 0 ;
2012-04-24 01:36:01 +04:00
log_debug ( ls , " dlm_recover %llu " , ( unsigned long long ) rv - > seq ) ;
2006-01-18 12:30:29 +03:00
2006-01-20 11:47:07 +03:00
mutex_lock ( & ls - > ls_recoverd_active ) ;
2006-01-18 12:30:29 +03:00
2011-04-05 22:16:24 +04:00
dlm_callback_suspend ( ls ) ;
2006-01-18 12:30:29 +03:00
2012-05-10 19:18:07 +04:00
dlm_clear_toss ( ls ) ;
2006-01-18 12:30:29 +03:00
/*
2008-01-16 22:02:31 +03:00
* This list of root rsb ' s will be the basis of most of the recovery
* routines .
2006-01-18 12:30:29 +03:00
*/
2008-01-16 22:02:31 +03:00
dlm_create_root_list ( ls ) ;
2006-01-18 12:30:29 +03:00
/*
* Add or remove nodes from the lockspace ' s ls_nodes list .
*/
error = dlm_recover_members ( ls , rv , & neg ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_members error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
2011-10-14 21:34:58 +04:00
2012-05-10 19:18:07 +04:00
dlm_recover_dir_nodeid ( ls ) ;
ls - > ls_recover_dir_sent_res = 0 ;
ls - > ls_recover_dir_sent_msg = 0 ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
ls - > ls_recover_locks_in = 0 ;
2011-10-14 21:34:58 +04:00
dlm_set_recover_status ( ls , DLM_RS_NODES ) ;
error = dlm_recover_members_wait ( ls ) ;
if ( error ) {
log_debug ( ls , " dlm_recover_members_wait error %d " , error ) ;
goto fail ;
}
2006-01-18 12:30:29 +03:00
start = jiffies ;
/*
* Rebuild our own share of the directory by collecting from all other
* nodes their master rsb names that hash to us .
*/
error = dlm_recover_directory ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_directory error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
2011-10-14 21:34:58 +04:00
dlm_set_recover_status ( ls , DLM_RS_DIR ) ;
2006-01-18 12:30:29 +03:00
error = dlm_recover_directory_wait ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_directory_wait error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
2012-05-10 19:18:07 +04:00
log_debug ( ls , " dlm_recover_directory %u out %u messages " ,
ls - > ls_recover_dir_sent_res , ls - > ls_recover_dir_sent_msg ) ;
2006-01-18 12:30:29 +03:00
/*
* We may have outstanding operations that are waiting for a reply from
* a failed node . Mark these to be resent after recovery . Unlock and
* cancel ops can just be completed .
*/
dlm_recover_waiters_pre ( ls ) ;
error = dlm_recovery_stopped ( ls ) ;
if ( error )
goto fail ;
if ( neg | | dlm_no_directory ( ls ) ) {
/*
* Clear lkb ' s for departed nodes .
*/
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
dlm_recover_purge ( ls ) ;
2006-01-18 12:30:29 +03:00
/*
* Get new master nodeid ' s for rsb ' s that were mastered on
* departed nodes .
*/
error = dlm_recover_masters ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_masters error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
/*
* Send our locks on remastered rsb ' s to the new masters .
*/
error = dlm_recover_locks ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_locks error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
2011-10-14 21:34:58 +04:00
dlm_set_recover_status ( ls , DLM_RS_LOCKS ) ;
2006-01-18 12:30:29 +03:00
error = dlm_recover_locks_wait ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_locks_wait error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
log_debug ( ls , " dlm_recover_locks %u in " ,
ls - > ls_recover_locks_in ) ;
2006-01-18 12:30:29 +03:00
/*
* Finalize state in master rsb ' s now that all locks can be
* checked . This includes conversion resolution and lvb
* settings .
*/
dlm_recover_rsbs ( ls ) ;
2006-10-31 20:56:01 +03:00
} else {
/*
* Other lockspace members may be going through the " neg " steps
* while also adding us to the lockspace , in which case they ' ll
2006-11-01 18:31:48 +03:00
* be doing the recover_locks ( RS_LOCKS ) barrier .
2006-10-31 20:56:01 +03:00
*/
dlm_set_recover_status ( ls , DLM_RS_LOCKS ) ;
2006-11-01 18:31:48 +03:00
error = dlm_recover_locks_wait ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_locks_wait error %d " , error ) ;
2006-11-01 18:31:48 +03:00
goto fail ;
}
2006-01-18 12:30:29 +03:00
}
dlm_release_root_list ( ls ) ;
2006-11-27 20:31:22 +03:00
/*
* Purge directory - related requests that are saved in requestqueue .
* All dir requests from before recovery are invalid now due to the dir
* rebuild and will be resent by the requesting nodes .
*/
dlm_purge_requestqueue ( ls ) ;
2006-01-18 12:30:29 +03:00
dlm_set_recover_status ( ls , DLM_RS_DONE ) ;
2011-10-14 21:34:58 +04:00
2006-01-18 12:30:29 +03:00
error = dlm_recover_done_wait ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_done_wait error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
dlm_clear_members_gone ( ls ) ;
2007-05-18 17:59:31 +04:00
dlm_adjust_timeouts ( ls ) ;
2011-04-05 22:16:24 +04:00
dlm_callback_resume ( ls ) ;
2006-01-18 12:30:29 +03:00
error = enable_locking ( ls , rv - > seq ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " enable_locking error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
error = dlm_process_requestqueue ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_process_requestqueue error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
error = dlm_recover_waiters_post ( ls ) ;
if ( error ) {
2011-10-14 21:34:58 +04:00
log_debug ( ls , " dlm_recover_waiters_post error %d " , error ) ;
2006-01-18 12:30:29 +03:00
goto fail ;
}
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
dlm_recover_grant ( ls ) ;
2006-01-18 12:30:29 +03:00
2012-04-24 01:36:01 +04:00
log_debug ( ls , " dlm_recover %llu generation %u done: %u ms " ,
2011-11-02 23:30:58 +04:00
( unsigned long long ) rv - > seq , ls - > ls_generation ,
2006-01-18 12:30:29 +03:00
jiffies_to_msecs ( jiffies - start ) ) ;
2006-01-20 11:47:07 +03:00
mutex_unlock ( & ls - > ls_recoverd_active ) ;
2006-01-18 12:30:29 +03:00
2011-11-02 23:30:58 +04:00
dlm_lsop_recover_done ( ls ) ;
2006-01-18 12:30:29 +03:00
return 0 ;
fail :
dlm_release_root_list ( ls ) ;
2012-04-24 01:36:01 +04:00
log_debug ( ls , " dlm_recover %llu error %d " ,
2006-11-29 17:33:48 +03:00
( unsigned long long ) rv - > seq , error ) ;
2006-01-20 11:47:07 +03:00
mutex_unlock ( & ls - > ls_recoverd_active ) ;
2006-01-18 12:30:29 +03:00
return error ;
}
2006-10-31 20:56:08 +03:00
/* The dlm_ls_start() that created the rv we take here may already have been
stopped via dlm_ls_stop ( ) ; in that case we need to leave the RECOVERY_STOP
flag set . */
2006-01-18 12:30:29 +03:00
static void do_ls_recovery ( struct dlm_ls * ls )
{
struct dlm_recover * rv = NULL ;
spin_lock ( & ls - > ls_recover_lock ) ;
rv = ls - > ls_recover_args ;
ls - > ls_recover_args = NULL ;
2006-10-31 20:56:08 +03:00
if ( rv & & ls - > ls_recover_seq = = rv - > seq )
2012-08-02 20:08:21 +04:00
clear_bit ( LSFL_RECOVER_STOP , & ls - > ls_flags ) ;
2006-01-18 12:30:29 +03:00
spin_unlock ( & ls - > ls_recover_lock ) ;
if ( rv ) {
ls_recover ( ls , rv ) ;
2011-11-02 23:30:58 +04:00
kfree ( rv - > nodes ) ;
2006-01-18 12:30:29 +03:00
kfree ( rv ) ;
}
}
static int dlm_recoverd ( void * arg )
{
struct dlm_ls * ls ;
ls = dlm_find_lockspace_local ( arg ) ;
2006-08-24 23:47:20 +04:00
if ( ! ls ) {
log_print ( " dlm_recoverd: no lockspace %p " , arg ) ;
return - 1 ;
}
2006-01-18 12:30:29 +03:00
2012-08-02 20:08:21 +04:00
down_write ( & ls - > ls_in_recovery ) ;
set_bit ( LSFL_RECOVER_LOCK , & ls - > ls_flags ) ;
wake_up ( & ls - > ls_recover_lock_wait ) ;
2006-01-18 12:30:29 +03:00
while ( ! kthread_should_stop ( ) ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
2012-08-02 20:08:21 +04:00
if ( ! test_bit ( LSFL_RECOVER_WORK , & ls - > ls_flags ) & &
! test_bit ( LSFL_RECOVER_DOWN , & ls - > ls_flags ) )
2006-01-18 12:30:29 +03:00
schedule ( ) ;
set_current_state ( TASK_RUNNING ) ;
2012-08-02 20:08:21 +04:00
if ( test_and_clear_bit ( LSFL_RECOVER_DOWN , & ls - > ls_flags ) ) {
down_write ( & ls - > ls_in_recovery ) ;
set_bit ( LSFL_RECOVER_LOCK , & ls - > ls_flags ) ;
wake_up ( & ls - > ls_recover_lock_wait ) ;
}
if ( test_and_clear_bit ( LSFL_RECOVER_WORK , & ls - > ls_flags ) )
2006-01-18 12:30:29 +03:00
do_ls_recovery ( ls ) ;
}
2012-08-02 20:08:21 +04:00
if ( test_bit ( LSFL_RECOVER_LOCK , & ls - > ls_flags ) )
up_write ( & ls - > ls_in_recovery ) ;
2006-01-18 12:30:29 +03:00
dlm_put_lockspace ( ls ) ;
return 0 ;
}
int dlm_recoverd_start ( struct dlm_ls * ls )
{
struct task_struct * p ;
int error = 0 ;
p = kthread_run ( dlm_recoverd , ls , " dlm_recoverd " ) ;
if ( IS_ERR ( p ) )
error = PTR_ERR ( p ) ;
else
ls - > ls_recoverd_task = p ;
return error ;
}
void dlm_recoverd_stop ( struct dlm_ls * ls )
{
kthread_stop ( ls - > ls_recoverd_task ) ;
}
void dlm_recoverd_suspend ( struct dlm_ls * ls )
{
2006-08-09 02:06:07 +04:00
wake_up ( & ls - > ls_wait_general ) ;
2006-01-20 11:47:07 +03:00
mutex_lock ( & ls - > ls_recoverd_active ) ;
2006-01-18 12:30:29 +03:00
}
void dlm_recoverd_resume ( struct dlm_ls * ls )
{
2006-01-20 11:47:07 +03:00
mutex_unlock ( & ls - > ls_recoverd_active ) ;
2006-01-18 12:30:29 +03:00
}