2019-05-28 19:57:20 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2006-01-18 12:30:29 +03:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
* * Copyright ( C ) 2004 - 2005 Red Hat , Inc . All rights reserved .
* *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include "dlm_internal.h"
# include "lockspace.h"
# include "dir.h"
# include "config.h"
# include "ast.h"
# include "memory.h"
# include "rcom.h"
# include "lock.h"
# include "lowcomms.h"
# include "member.h"
# include "recover.h"
/*
* Recovery waiting routines : these functions wait for a particular reply from
* a remote node , or for the remote node to report a certain status . They need
* to abort if the lockspace is stopped indicating a node has failed ( perhaps
* the one being waited for ) .
*/
/*
* Wait until given function returns non - zero or lockspace is stopped
* ( LS_RECOVERY_STOP set due to failure of a node in ls_nodes ) . When another
* function thinks it could have completed the waited - on task , they should wake
* up ls_wait_general to get an immediate response rather than waiting for the
2012-06-05 20:23:21 +04:00
* timeout . This uses a timeout so it can check periodically if the wait
* should abort due to node failure ( which doesn ' t cause a wake_up ) .
* This should only be called by the dlm_recoverd thread .
2006-01-18 12:30:29 +03:00
*/
int dlm_wait_function ( struct dlm_ls * ls , int ( * testfn ) ( struct dlm_ls * ls ) )
{
int error = 0 ;
2012-06-05 20:23:21 +04:00
int rv ;
2006-01-18 12:30:29 +03:00
2012-06-05 20:23:21 +04:00
while ( 1 ) {
rv = wait_event_timeout ( ls - > ls_wait_general ,
testfn ( ls ) | | dlm_recovery_stopped ( ls ) ,
dlm_config . ci_recover_timer * HZ ) ;
if ( rv )
break ;
2017-09-12 11:56:08 +03:00
if ( test_bit ( LSFL_RCOM_WAIT , & ls - > ls_flags ) ) {
log_debug ( ls , " dlm_wait_function timed out " ) ;
return - ETIMEDOUT ;
}
2012-06-05 20:23:21 +04:00
}
2006-01-18 12:30:29 +03:00
if ( dlm_recovery_stopped ( ls ) ) {
log_debug ( ls , " dlm_wait_function aborted " ) ;
error = - EINTR ;
}
return error ;
}
/*
* An efficient way for all nodes to wait for all others to have a certain
* status . The node with the lowest nodeid polls all the others for their
* status ( wait_status_all ) and all the others poll the node with the low id
* for its accumulated result ( wait_status_low ) . When all nodes have set
* status flag X , then status flag X_ALL will be set on the low nodeid .
*/
uint32_t dlm_recover_status ( struct dlm_ls * ls )
{
uint32_t status ;
spin_lock ( & ls - > ls_recover_lock ) ;
status = ls - > ls_recover_status ;
spin_unlock ( & ls - > ls_recover_lock ) ;
return status ;
}
2011-10-20 22:26:28 +04:00
static void _set_recover_status ( struct dlm_ls * ls , uint32_t status )
{
ls - > ls_recover_status | = status ;
}
2006-01-18 12:30:29 +03:00
void dlm_set_recover_status ( struct dlm_ls * ls , uint32_t status )
{
spin_lock ( & ls - > ls_recover_lock ) ;
2011-10-20 22:26:28 +04:00
_set_recover_status ( ls , status ) ;
2006-01-18 12:30:29 +03:00
spin_unlock ( & ls - > ls_recover_lock ) ;
}
2011-10-20 22:26:28 +04:00
static int wait_status_all ( struct dlm_ls * ls , uint32_t wait_status ,
int save_slots )
2006-01-18 12:30:29 +03:00
{
2008-01-25 11:01:51 +03:00
struct dlm_rcom * rc = ls - > ls_recover_buf ;
2006-01-18 12:30:29 +03:00
struct dlm_member * memb ;
int error = 0 , delay ;
list_for_each_entry ( memb , & ls - > ls_nodes , list ) {
delay = 0 ;
for ( ; ; ) {
if ( dlm_recovery_stopped ( ls ) ) {
error = - EINTR ;
goto out ;
}
2011-10-20 22:26:28 +04:00
error = dlm_rcom_status ( ls , memb - > nodeid , 0 ) ;
2006-01-18 12:30:29 +03:00
if ( error )
goto out ;
2011-10-20 22:26:28 +04:00
if ( save_slots )
dlm_slot_save ( ls , rc , memb ) ;
2006-01-18 12:30:29 +03:00
if ( rc - > rc_result & wait_status )
break ;
if ( delay < 1000 )
delay + = 20 ;
msleep ( delay ) ;
}
}
out :
return error ;
}
2011-10-20 22:26:28 +04:00
static int wait_status_low ( struct dlm_ls * ls , uint32_t wait_status ,
uint32_t status_flags )
2006-01-18 12:30:29 +03:00
{
2008-01-25 11:01:51 +03:00
struct dlm_rcom * rc = ls - > ls_recover_buf ;
2006-01-18 12:30:29 +03:00
int error = 0 , delay = 0 , nodeid = ls - > ls_low_nodeid ;
for ( ; ; ) {
if ( dlm_recovery_stopped ( ls ) ) {
error = - EINTR ;
goto out ;
}
2011-10-20 22:26:28 +04:00
error = dlm_rcom_status ( ls , nodeid , status_flags ) ;
2006-01-18 12:30:29 +03:00
if ( error )
break ;
if ( rc - > rc_result & wait_status )
break ;
if ( delay < 1000 )
delay + = 20 ;
msleep ( delay ) ;
}
out :
return error ;
}
static int wait_status ( struct dlm_ls * ls , uint32_t status )
{
uint32_t status_all = status < < 1 ;
int error ;
if ( ls - > ls_low_nodeid = = dlm_our_nodeid ( ) ) {
2011-10-20 22:26:28 +04:00
error = wait_status_all ( ls , status , 0 ) ;
2006-01-18 12:30:29 +03:00
if ( ! error )
dlm_set_recover_status ( ls , status_all ) ;
} else
2011-10-20 22:26:28 +04:00
error = wait_status_low ( ls , status_all , 0 ) ;
2006-01-18 12:30:29 +03:00
return error ;
}
int dlm_recover_members_wait ( struct dlm_ls * ls )
{
2011-10-20 22:26:28 +04:00
struct dlm_member * memb ;
struct dlm_slot * slots ;
int num_slots , slots_size ;
int error , rv ;
uint32_t gen ;
list_for_each_entry ( memb , & ls - > ls_nodes , list ) {
memb - > slot = - 1 ;
memb - > generation = 0 ;
}
if ( ls - > ls_low_nodeid = = dlm_our_nodeid ( ) ) {
error = wait_status_all ( ls , DLM_RS_NODES , 1 ) ;
if ( error )
goto out ;
/* slots array is sparse, slots_size may be > num_slots */
rv = dlm_slots_assign ( ls , & num_slots , & slots_size , & slots , & gen ) ;
if ( ! rv ) {
spin_lock ( & ls - > ls_recover_lock ) ;
_set_recover_status ( ls , DLM_RS_NODES_ALL ) ;
ls - > ls_num_slots = num_slots ;
ls - > ls_slots_size = slots_size ;
ls - > ls_slots = slots ;
ls - > ls_generation = gen ;
spin_unlock ( & ls - > ls_recover_lock ) ;
} else {
dlm_set_recover_status ( ls , DLM_RS_NODES_ALL ) ;
}
} else {
error = wait_status_low ( ls , DLM_RS_NODES_ALL , DLM_RSF_NEED_SLOTS ) ;
if ( error )
goto out ;
dlm_slots_copy_in ( ls ) ;
}
out :
return error ;
2006-01-18 12:30:29 +03:00
}
int dlm_recover_directory_wait ( struct dlm_ls * ls )
{
return wait_status ( ls , DLM_RS_DIR ) ;
}
int dlm_recover_locks_wait ( struct dlm_ls * ls )
{
return wait_status ( ls , DLM_RS_LOCKS ) ;
}
int dlm_recover_done_wait ( struct dlm_ls * ls )
{
return wait_status ( ls , DLM_RS_DONE ) ;
}
/*
* The recover_list contains all the rsb ' s for which we ' ve requested the new
* master nodeid . As replies are returned from the resource directories the
* rsb ' s are removed from the list . When the list is empty we ' re done .
*
* The recover_list is later similarly used for all rsb ' s for which we ' ve sent
* new lkb ' s and need to receive new corresponding lkid ' s .
*
* We use the address of the rsb struct as a simple local identifier for the
* rsb so we can match an rcom reply with the rsb it was sent for .
*/
static int recover_list_empty ( struct dlm_ls * ls )
{
int empty ;
spin_lock ( & ls - > ls_recover_list_lock ) ;
empty = list_empty ( & ls - > ls_recover_list ) ;
spin_unlock ( & ls - > ls_recover_list_lock ) ;
return empty ;
}
static void recover_list_add ( struct dlm_rsb * r )
{
struct dlm_ls * ls = r - > res_ls ;
spin_lock ( & ls - > ls_recover_list_lock ) ;
if ( list_empty ( & r - > res_recover_list ) ) {
list_add_tail ( & r - > res_recover_list , & ls - > ls_recover_list ) ;
ls - > ls_recover_list_count + + ;
dlm_hold_rsb ( r ) ;
}
spin_unlock ( & ls - > ls_recover_list_lock ) ;
}
static void recover_list_del ( struct dlm_rsb * r )
{
struct dlm_ls * ls = r - > res_ls ;
spin_lock ( & ls - > ls_recover_list_lock ) ;
list_del_init ( & r - > res_recover_list ) ;
ls - > ls_recover_list_count - - ;
spin_unlock ( & ls - > ls_recover_list_lock ) ;
dlm_put_rsb ( r ) ;
}
static void recover_list_clear ( struct dlm_ls * ls )
{
struct dlm_rsb * r , * s ;
spin_lock ( & ls - > ls_recover_list_lock ) ;
list_for_each_entry_safe ( r , s , & ls - > ls_recover_list , res_recover_list ) {
list_del_init ( & r - > res_recover_list ) ;
2006-11-02 18:49:02 +03:00
r - > res_recover_locks_count = 0 ;
2006-01-18 12:30:29 +03:00
dlm_put_rsb ( r ) ;
ls - > ls_recover_list_count - - ;
}
if ( ls - > ls_recover_list_count ! = 0 ) {
log_error ( ls , " warning: recover_list_count %d " ,
ls - > ls_recover_list_count ) ;
ls - > ls_recover_list_count = 0 ;
}
spin_unlock ( & ls - > ls_recover_list_lock ) ;
}
2012-05-16 01:07:49 +04:00
static int recover_idr_empty ( struct dlm_ls * ls )
{
int empty = 1 ;
spin_lock ( & ls - > ls_recover_idr_lock ) ;
if ( ls - > ls_recover_list_count )
empty = 0 ;
spin_unlock ( & ls - > ls_recover_idr_lock ) ;
return empty ;
}
static int recover_idr_add ( struct dlm_rsb * r )
{
struct dlm_ls * ls = r - > res_ls ;
2013-02-28 05:04:49 +04:00
int rv ;
2012-05-16 01:07:49 +04:00
2013-02-28 05:04:49 +04:00
idr_preload ( GFP_NOFS ) ;
2012-05-16 01:07:49 +04:00
spin_lock ( & ls - > ls_recover_idr_lock ) ;
if ( r - > res_id ) {
2013-02-28 05:04:49 +04:00
rv = - 1 ;
goto out_unlock ;
2012-05-16 01:07:49 +04:00
}
2013-02-28 05:04:49 +04:00
rv = idr_alloc ( & ls - > ls_recover_idr , r , 1 , 0 , GFP_NOWAIT ) ;
if ( rv < 0 )
goto out_unlock ;
r - > res_id = rv ;
2012-05-16 01:07:49 +04:00
ls - > ls_recover_list_count + + ;
dlm_hold_rsb ( r ) ;
2013-02-28 05:04:49 +04:00
rv = 0 ;
out_unlock :
2012-05-16 01:07:49 +04:00
spin_unlock ( & ls - > ls_recover_idr_lock ) ;
2013-02-28 05:04:49 +04:00
idr_preload_end ( ) ;
return rv ;
2012-05-16 01:07:49 +04:00
}
static void recover_idr_del ( struct dlm_rsb * r )
{
struct dlm_ls * ls = r - > res_ls ;
spin_lock ( & ls - > ls_recover_idr_lock ) ;
idr_remove ( & ls - > ls_recover_idr , r - > res_id ) ;
r - > res_id = 0 ;
ls - > ls_recover_list_count - - ;
spin_unlock ( & ls - > ls_recover_idr_lock ) ;
dlm_put_rsb ( r ) ;
}
static struct dlm_rsb * recover_idr_find ( struct dlm_ls * ls , uint64_t id )
{
struct dlm_rsb * r ;
spin_lock ( & ls - > ls_recover_idr_lock ) ;
r = idr_find ( & ls - > ls_recover_idr , ( int ) id ) ;
spin_unlock ( & ls - > ls_recover_idr_lock ) ;
return r ;
}
2013-02-28 05:03:44 +04:00
static void recover_idr_clear ( struct dlm_ls * ls )
2012-05-16 01:07:49 +04:00
{
2013-02-28 05:03:44 +04:00
struct dlm_rsb * r ;
int id ;
2012-05-16 01:07:49 +04:00
2013-02-28 05:03:44 +04:00
spin_lock ( & ls - > ls_recover_idr_lock ) ;
2012-05-16 01:07:49 +04:00
2013-02-28 05:03:44 +04:00
idr_for_each_entry ( & ls - > ls_recover_idr , r , id ) {
2013-02-28 05:03:45 +04:00
idr_remove ( & ls - > ls_recover_idr , id ) ;
2013-02-28 05:03:44 +04:00
r - > res_id = 0 ;
r - > res_recover_locks_count = 0 ;
ls - > ls_recover_list_count - - ;
2012-05-16 01:07:49 +04:00
2013-02-28 05:03:44 +04:00
dlm_put_rsb ( r ) ;
}
2012-05-16 01:07:49 +04:00
if ( ls - > ls_recover_list_count ! = 0 ) {
log_error ( ls , " warning: recover_list_count %d " ,
ls - > ls_recover_list_count ) ;
ls - > ls_recover_list_count = 0 ;
}
spin_unlock ( & ls - > ls_recover_idr_lock ) ;
}
2006-01-18 12:30:29 +03:00
/* Master recovery: find new master node for rsb's that were
mastered on nodes that have been removed .
dlm_recover_masters
recover_master
dlm_send_rcom_lookup - > receive_rcom_lookup
dlm_dir_lookup
receive_rcom_lookup_reply < -
dlm_recover_master_reply
set_new_master
set_master_lkbs
set_lock_master
*/
/*
* Set the lock master for all LKBs in a lock queue
* If we are the new master of the rsb , we may have received new
* MSTCPY locks from other nodes already which we need to ignore
* when setting the new nodeid .
*/
static void set_lock_master ( struct list_head * queue , int nodeid )
{
struct dlm_lkb * lkb ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
list_for_each_entry ( lkb , queue , lkb_statequeue ) {
if ( ! ( lkb - > lkb_flags & DLM_IFL_MSTCPY ) ) {
2006-01-18 12:30:29 +03:00
lkb - > lkb_nodeid = nodeid ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
lkb - > lkb_remid = 0 ;
}
}
2006-01-18 12:30:29 +03:00
}
static void set_master_lkbs ( struct dlm_rsb * r )
{
set_lock_master ( & r - > res_grantqueue , r - > res_nodeid ) ;
set_lock_master ( & r - > res_convertqueue , r - > res_nodeid ) ;
set_lock_master ( & r - > res_waitqueue , r - > res_nodeid ) ;
}
/*
2011-03-31 05:57:33 +04:00
* Propagate the new master nodeid to locks
2006-01-18 12:30:29 +03:00
* The NEW_MASTER flag tells dlm_recover_locks ( ) which rsb ' s to consider .
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
* The NEW_MASTER2 flag tells recover_lvb ( ) and recover_grant ( ) which
2006-07-25 22:53:33 +04:00
* rsb ' s to consider .
2006-01-18 12:30:29 +03:00
*/
2012-05-10 19:18:07 +04:00
static void set_new_master ( struct dlm_rsb * r )
2006-01-18 12:30:29 +03:00
{
set_master_lkbs ( r ) ;
rsb_set_flag ( r , RSB_NEW_MASTER ) ;
rsb_set_flag ( r , RSB_NEW_MASTER2 ) ;
}
/*
* We do async lookups on rsb ' s that need new masters . The rsb ' s
* waiting for a lookup reply are kept on the recover_list .
2012-05-10 19:18:07 +04:00
*
* Another node recovering the master may have sent us a rcom lookup ,
* and our dlm_master_lookup ( ) set it as the new master , along with
* NEW_MASTER so that we ' ll recover it here ( this implies dir_nodeid
* equals our_nodeid below ) .
2006-01-18 12:30:29 +03:00
*/
2012-05-10 19:18:07 +04:00
static int recover_master ( struct dlm_rsb * r , unsigned int * count )
2006-01-18 12:30:29 +03:00
{
struct dlm_ls * ls = r - > res_ls ;
2012-05-10 19:18:07 +04:00
int our_nodeid , dir_nodeid ;
int is_removed = 0 ;
int error ;
if ( is_master ( r ) )
return 0 ;
is_removed = dlm_is_removed ( ls , r - > res_nodeid ) ;
if ( ! is_removed & & ! rsb_flag ( r , RSB_NEW_MASTER ) )
return 0 ;
our_nodeid = dlm_our_nodeid ( ) ;
dir_nodeid = dlm_dir_nodeid ( r ) ;
2006-01-18 12:30:29 +03:00
if ( dir_nodeid = = our_nodeid ) {
2012-05-10 19:18:07 +04:00
if ( is_removed ) {
r - > res_master_nodeid = our_nodeid ;
r - > res_nodeid = 0 ;
}
2006-01-18 12:30:29 +03:00
2012-05-10 19:18:07 +04:00
/* set master of lkbs to ourself when is_removed, or to
another new master which we set along with NEW_MASTER
in dlm_master_lookup */
set_new_master ( r ) ;
error = 0 ;
2006-01-18 12:30:29 +03:00
} else {
2012-05-16 01:07:49 +04:00
recover_idr_add ( r ) ;
2006-01-18 12:30:29 +03:00
error = dlm_send_rcom_lookup ( r , dir_nodeid ) ;
}
2012-05-10 19:18:07 +04:00
( * count ) + + ;
2006-01-18 12:30:29 +03:00
return error ;
}
/*
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
* All MSTCPY locks are purged and rebuilt , even if the master stayed the same .
* This is necessary because recovery can be started , aborted and restarted ,
* causing the master nodeid to briefly change during the aborted recovery , and
* change back to the original value in the second recovery . The MSTCPY locks
* may or may not have been purged during the aborted recovery . Another node
* with an outstanding request in waiters list and a request reply saved in the
* requestqueue , cannot know whether it should ignore the reply and resend the
* request , or accept the reply and complete the request . It must do the
* former if the remote node purged MSTCPY locks , and it must do the later if
* the remote node did not . This is solved by always purging MSTCPY locks , in
* which case , the request reply would always be ignored and the request
* resent .
2006-01-18 12:30:29 +03:00
*/
2012-05-10 19:18:07 +04:00
static int recover_master_static ( struct dlm_rsb * r , unsigned int * count )
2006-01-18 12:30:29 +03:00
{
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
int dir_nodeid = dlm_dir_nodeid ( r ) ;
int new_master = dir_nodeid ;
2006-01-18 12:30:29 +03:00
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
if ( dir_nodeid = = dlm_our_nodeid ( ) )
new_master = 0 ;
2006-01-18 12:30:29 +03:00
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
dlm_purge_mstcpy_locks ( r ) ;
2012-05-10 19:18:07 +04:00
r - > res_master_nodeid = dir_nodeid ;
r - > res_nodeid = new_master ;
set_new_master ( r ) ;
( * count ) + + ;
return 0 ;
2006-01-18 12:30:29 +03:00
}
/*
* Go through local root resources and for each rsb which has a master which
* has departed , get the new master nodeid from the directory . The dir will
* assign mastery to the first node to look up the new master . That means
* we ' ll discover in this lookup if we ' re the new master of any rsb ' s .
*
* We fire off all the dir lookup requests individually and asynchronously to
* the correct dir node .
*/
int dlm_recover_masters ( struct dlm_ls * ls )
{
struct dlm_rsb * r ;
2012-05-10 19:18:07 +04:00
unsigned int total = 0 ;
unsigned int count = 0 ;
int nodir = dlm_no_directory ( ls ) ;
int error ;
2006-01-18 12:30:29 +03:00
2014-02-14 21:54:44 +04:00
log_rinfo ( ls , " dlm_recover_masters " ) ;
2006-01-18 12:30:29 +03:00
down_read ( & ls - > ls_root_sem ) ;
list_for_each_entry ( r , & ls - > ls_root_list , res_root_list ) {
if ( dlm_recovery_stopped ( ls ) ) {
up_read ( & ls - > ls_root_sem ) ;
error = - EINTR ;
goto out ;
}
2012-05-10 19:18:07 +04:00
lock_rsb ( r ) ;
if ( nodir )
error = recover_master_static ( r , & count ) ;
else
error = recover_master ( r , & count ) ;
unlock_rsb ( r ) ;
cond_resched ( ) ;
total + + ;
2006-01-18 12:30:29 +03:00
2012-05-10 19:18:07 +04:00
if ( error ) {
up_read ( & ls - > ls_root_sem ) ;
goto out ;
}
2006-01-18 12:30:29 +03:00
}
up_read ( & ls - > ls_root_sem ) ;
2014-02-14 21:54:44 +04:00
log_rinfo ( ls , " dlm_recover_masters %u of %u " , count , total ) ;
2006-01-18 12:30:29 +03:00
2012-05-16 01:07:49 +04:00
error = dlm_wait_function ( ls , & recover_idr_empty ) ;
2006-01-18 12:30:29 +03:00
out :
if ( error )
2012-05-16 01:07:49 +04:00
recover_idr_clear ( ls ) ;
2006-01-18 12:30:29 +03:00
return error ;
}
int dlm_recover_master_reply ( struct dlm_ls * ls , struct dlm_rcom * rc )
{
struct dlm_rsb * r ;
2012-05-10 19:18:07 +04:00
int ret_nodeid , new_master ;
2006-01-18 12:30:29 +03:00
2012-05-16 01:07:49 +04:00
r = recover_idr_find ( ls , rc - > rc_id ) ;
2006-01-18 12:30:29 +03:00
if ( ! r ) {
2006-01-20 11:47:07 +03:00
log_error ( ls , " dlm_recover_master_reply no id %llx " ,
2006-05-24 17:21:30 +04:00
( unsigned long long ) rc - > rc_id ) ;
2006-01-18 12:30:29 +03:00
goto out ;
}
2012-05-10 19:18:07 +04:00
ret_nodeid = rc - > rc_result ;
if ( ret_nodeid = = dlm_our_nodeid ( ) )
new_master = 0 ;
else
new_master = ret_nodeid ;
2006-01-18 12:30:29 +03:00
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
lock_rsb ( r ) ;
2012-05-10 19:18:07 +04:00
r - > res_master_nodeid = ret_nodeid ;
r - > res_nodeid = new_master ;
set_new_master ( r ) ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
unlock_rsb ( r ) ;
2012-05-16 01:07:49 +04:00
recover_idr_del ( r ) ;
2006-01-18 12:30:29 +03:00
2012-05-16 01:07:49 +04:00
if ( recover_idr_empty ( ls ) )
2006-01-18 12:30:29 +03:00
wake_up ( & ls - > ls_wait_general ) ;
out :
return 0 ;
}
/* Lock recovery: rebuild the process-copy locks we hold on a
remastered rsb on the new rsb master .
dlm_recover_locks
recover_locks
recover_locks_queue
dlm_send_rcom_lock - > receive_rcom_lock
dlm_recover_master_copy
receive_rcom_lock_reply < -
dlm_recover_process_copy
*/
/*
* keep a count of the number of lkb ' s we send to the new master ; when we get
* an equal number of replies then recovery for the rsb is done
*/
static int recover_locks_queue ( struct dlm_rsb * r , struct list_head * head )
{
struct dlm_lkb * lkb ;
int error = 0 ;
list_for_each_entry ( lkb , head , lkb_statequeue ) {
error = dlm_send_rcom_lock ( r , lkb ) ;
if ( error )
break ;
r - > res_recover_locks_count + + ;
}
return error ;
}
static int recover_locks ( struct dlm_rsb * r )
{
int error = 0 ;
lock_rsb ( r ) ;
2006-08-18 20:54:25 +04:00
DLM_ASSERT ( ! r - > res_recover_locks_count , dlm_dump_rsb ( r ) ; ) ;
2006-01-18 12:30:29 +03:00
error = recover_locks_queue ( r , & r - > res_grantqueue ) ;
if ( error )
goto out ;
error = recover_locks_queue ( r , & r - > res_convertqueue ) ;
if ( error )
goto out ;
error = recover_locks_queue ( r , & r - > res_waitqueue ) ;
if ( error )
goto out ;
if ( r - > res_recover_locks_count )
recover_list_add ( r ) ;
else
rsb_clear_flag ( r , RSB_NEW_MASTER ) ;
out :
unlock_rsb ( r ) ;
return error ;
}
int dlm_recover_locks ( struct dlm_ls * ls )
{
struct dlm_rsb * r ;
int error , count = 0 ;
down_read ( & ls - > ls_root_sem ) ;
list_for_each_entry ( r , & ls - > ls_root_list , res_root_list ) {
if ( is_master ( r ) ) {
rsb_clear_flag ( r , RSB_NEW_MASTER ) ;
continue ;
}
if ( ! rsb_flag ( r , RSB_NEW_MASTER ) )
continue ;
if ( dlm_recovery_stopped ( ls ) ) {
error = - EINTR ;
up_read ( & ls - > ls_root_sem ) ;
goto out ;
}
error = recover_locks ( r ) ;
if ( error ) {
up_read ( & ls - > ls_root_sem ) ;
goto out ;
}
count + = r - > res_recover_locks_count ;
}
up_read ( & ls - > ls_root_sem ) ;
2014-02-14 21:54:44 +04:00
log_rinfo ( ls , " dlm_recover_locks %d out " , count ) ;
2006-01-18 12:30:29 +03:00
error = dlm_wait_function ( ls , & recover_list_empty ) ;
out :
if ( error )
recover_list_clear ( ls ) ;
return error ;
}
void dlm_recovered_lock ( struct dlm_rsb * r )
{
2006-08-18 20:54:25 +04:00
DLM_ASSERT ( rsb_flag ( r , RSB_NEW_MASTER ) , dlm_dump_rsb ( r ) ; ) ;
2006-01-18 12:30:29 +03:00
r - > res_recover_locks_count - - ;
if ( ! r - > res_recover_locks_count ) {
rsb_clear_flag ( r , RSB_NEW_MASTER ) ;
recover_list_del ( r ) ;
}
if ( recover_list_empty ( r - > res_ls ) )
wake_up ( & r - > res_ls - > ls_wait_general ) ;
}
/*
* The lvb needs to be recovered on all master rsb ' s . This includes setting
* the VALNOTVALID flag if necessary , and determining the correct lvb contents
* based on the lvb ' s of the locks held on the rsb .
*
2012-11-16 01:01:51 +04:00
* RSB_VALNOTVALID is set in two cases :
*
* 1. we are master , but not new , and we purged an EX / PW lock held by a
* failed node ( in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL )
*
* 2. we are a new master , and there are only NL / CR locks left .
* ( We could probably improve this by only invaliding in this way when
* the previous master left uncleanly . VMS docs mention that . )
2006-01-18 12:30:29 +03:00
*
* The LVB contents are only considered for changing when this is a new master
* of the rsb ( NEW_MASTER2 ) . Then , the rsb ' s lvb is taken from any lkb with
* mode > CR . If no lkb ' s exist with mode above CR , the lvb contents are taken
* from the lkb with the largest lvb sequence number .
*/
static void recover_lvb ( struct dlm_rsb * r )
{
struct dlm_lkb * lkb , * high_lkb = NULL ;
uint32_t high_seq = 0 ;
2006-01-20 11:47:07 +03:00
int lock_lvb_exists = 0 ;
int big_lock_exists = 0 ;
2006-01-18 12:30:29 +03:00
int lvblen = r - > res_ls - > ls_lvblen ;
2012-11-16 01:01:51 +04:00
if ( ! rsb_flag ( r , RSB_NEW_MASTER2 ) & &
rsb_flag ( r , RSB_RECOVER_LVB_INVAL ) ) {
/* case 1 above */
rsb_set_flag ( r , RSB_VALNOTVALID ) ;
return ;
}
if ( ! rsb_flag ( r , RSB_NEW_MASTER2 ) )
return ;
/* we are the new master, so figure out if VALNOTVALID should
be set , and set the rsb lvb from the best lkb available . */
2006-01-18 12:30:29 +03:00
list_for_each_entry ( lkb , & r - > res_grantqueue , lkb_statequeue ) {
if ( ! ( lkb - > lkb_exflags & DLM_LKF_VALBLK ) )
continue ;
2006-01-20 11:47:07 +03:00
lock_lvb_exists = 1 ;
2006-01-18 12:30:29 +03:00
if ( lkb - > lkb_grmode > DLM_LOCK_CR ) {
2006-01-20 11:47:07 +03:00
big_lock_exists = 1 ;
2006-01-18 12:30:29 +03:00
goto setflag ;
}
if ( ( ( int ) lkb - > lkb_lvbseq - ( int ) high_seq ) > = 0 ) {
high_lkb = lkb ;
high_seq = lkb - > lkb_lvbseq ;
}
}
list_for_each_entry ( lkb , & r - > res_convertqueue , lkb_statequeue ) {
if ( ! ( lkb - > lkb_exflags & DLM_LKF_VALBLK ) )
continue ;
2006-01-20 11:47:07 +03:00
lock_lvb_exists = 1 ;
2006-01-18 12:30:29 +03:00
if ( lkb - > lkb_grmode > DLM_LOCK_CR ) {
2006-01-20 11:47:07 +03:00
big_lock_exists = 1 ;
2006-01-18 12:30:29 +03:00
goto setflag ;
}
if ( ( ( int ) lkb - > lkb_lvbseq - ( int ) high_seq ) > = 0 ) {
high_lkb = lkb ;
high_seq = lkb - > lkb_lvbseq ;
}
}
setflag :
if ( ! lock_lvb_exists )
goto out ;
2012-11-16 01:01:51 +04:00
/* lvb is invalidated if only NL/CR locks remain */
2006-01-18 12:30:29 +03:00
if ( ! big_lock_exists )
rsb_set_flag ( r , RSB_VALNOTVALID ) ;
if ( ! r - > res_lvbptr ) {
2007-11-07 18:06:49 +03:00
r - > res_lvbptr = dlm_allocate_lvb ( r - > res_ls ) ;
2006-01-18 12:30:29 +03:00
if ( ! r - > res_lvbptr )
goto out ;
}
if ( big_lock_exists ) {
r - > res_lvbseq = lkb - > lkb_lvbseq ;
memcpy ( r - > res_lvbptr , lkb - > lkb_lvbptr , lvblen ) ;
} else if ( high_lkb ) {
r - > res_lvbseq = high_lkb - > lkb_lvbseq ;
memcpy ( r - > res_lvbptr , high_lkb - > lkb_lvbptr , lvblen ) ;
} else {
r - > res_lvbseq = 0 ;
memset ( r - > res_lvbptr , 0 , lvblen ) ;
}
out :
return ;
}
/* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks
converting PR - > CW or CW - > PR need to have their lkb_grmode set . */
static void recover_conversion ( struct dlm_rsb * r )
{
2012-06-06 00:55:19 +04:00
struct dlm_ls * ls = r - > res_ls ;
2006-01-18 12:30:29 +03:00
struct dlm_lkb * lkb ;
int grmode = - 1 ;
list_for_each_entry ( lkb , & r - > res_grantqueue , lkb_statequeue ) {
if ( lkb - > lkb_grmode = = DLM_LOCK_PR | |
lkb - > lkb_grmode = = DLM_LOCK_CW ) {
grmode = lkb - > lkb_grmode ;
break ;
}
}
list_for_each_entry ( lkb , & r - > res_convertqueue , lkb_statequeue ) {
if ( lkb - > lkb_grmode ! = DLM_LOCK_IV )
continue ;
2012-06-06 00:55:19 +04:00
if ( grmode = = - 1 ) {
log_debug ( ls , " recover_conversion %x set gr to rq %d " ,
lkb - > lkb_id , lkb - > lkb_rqmode ) ;
2006-01-18 12:30:29 +03:00
lkb - > lkb_grmode = lkb - > lkb_rqmode ;
2012-06-06 00:55:19 +04:00
} else {
log_debug ( ls , " recover_conversion %x set gr %d " ,
lkb - > lkb_id , grmode ) ;
2006-01-18 12:30:29 +03:00
lkb - > lkb_grmode = grmode ;
2012-06-06 00:55:19 +04:00
}
2006-01-18 12:30:29 +03:00
}
}
2006-07-25 22:53:33 +04:00
/* We've become the new master for this rsb and waiting/converting locks may
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
need to be granted in dlm_recover_grant ( ) due to locks that may have
2006-07-25 22:53:33 +04:00
existed from a removed node . */
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
static void recover_grant ( struct dlm_rsb * r )
2006-07-25 22:53:33 +04:00
{
if ( ! list_empty ( & r - > res_waitqueue ) | | ! list_empty ( & r - > res_convertqueue ) )
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
rsb_set_flag ( r , RSB_RECOVER_GRANT ) ;
2006-07-25 22:53:33 +04:00
}
2006-01-18 12:30:29 +03:00
void dlm_recover_rsbs ( struct dlm_ls * ls )
{
struct dlm_rsb * r ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
unsigned int count = 0 ;
2006-01-18 12:30:29 +03:00
down_read ( & ls - > ls_root_sem ) ;
list_for_each_entry ( r , & ls - > ls_root_list , res_root_list ) {
lock_rsb ( r ) ;
if ( is_master ( r ) ) {
if ( rsb_flag ( r , RSB_RECOVER_CONVERT ) )
recover_conversion ( r ) ;
2012-11-16 01:01:51 +04:00
/* recover lvb before granting locks so the updated
lvb / VALNOTVALID is presented in the completion */
recover_lvb ( r ) ;
2006-07-25 22:53:33 +04:00
if ( rsb_flag ( r , RSB_NEW_MASTER2 ) )
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
recover_grant ( r ) ;
2006-01-18 12:30:29 +03:00
count + + ;
2012-11-16 01:01:51 +04:00
} else {
rsb_clear_flag ( r , RSB_VALNOTVALID ) ;
2006-01-18 12:30:29 +03:00
}
rsb_clear_flag ( r , RSB_RECOVER_CONVERT ) ;
2012-11-16 01:01:51 +04:00
rsb_clear_flag ( r , RSB_RECOVER_LVB_INVAL ) ;
2006-07-25 22:53:33 +04:00
rsb_clear_flag ( r , RSB_NEW_MASTER2 ) ;
2006-01-18 12:30:29 +03:00
unlock_rsb ( r ) ;
}
up_read ( & ls - > ls_root_sem ) ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
if ( count )
2014-02-14 21:54:44 +04:00
log_rinfo ( ls , " dlm_recover_rsbs %d done " , count ) ;
2006-01-18 12:30:29 +03:00
}
/* Create a single list of all root rsb's to be used during recovery */
int dlm_create_root_list ( struct dlm_ls * ls )
{
2011-10-27 00:24:55 +04:00
struct rb_node * n ;
2006-01-18 12:30:29 +03:00
struct dlm_rsb * r ;
int i , error = 0 ;
down_write ( & ls - > ls_root_sem ) ;
if ( ! list_empty ( & ls - > ls_root_list ) ) {
log_error ( ls , " root list not empty " ) ;
error = - EINVAL ;
goto out ;
}
for ( i = 0 ; i < ls - > ls_rsbtbl_size ; i + + ) {
2009-01-08 01:50:41 +03:00
spin_lock ( & ls - > ls_rsbtbl [ i ] . lock ) ;
2011-10-27 00:24:55 +04:00
for ( n = rb_first ( & ls - > ls_rsbtbl [ i ] . keep ) ; n ; n = rb_next ( n ) ) {
r = rb_entry ( n , struct dlm_rsb , res_hashnode ) ;
2006-01-18 12:30:29 +03:00
list_add ( & r - > res_root_list , & ls - > ls_root_list ) ;
dlm_hold_rsb ( r ) ;
}
2008-01-16 22:02:31 +03:00
2012-05-10 19:18:07 +04:00
if ( ! RB_EMPTY_ROOT ( & ls - > ls_rsbtbl [ i ] . toss ) )
log_error ( ls , " dlm_create_root_list toss not empty " ) ;
2009-01-08 01:50:41 +03:00
spin_unlock ( & ls - > ls_rsbtbl [ i ] . lock ) ;
2006-01-18 12:30:29 +03:00
}
out :
up_write ( & ls - > ls_root_sem ) ;
return error ;
}
void dlm_release_root_list ( struct dlm_ls * ls )
{
struct dlm_rsb * r , * safe ;
down_write ( & ls - > ls_root_sem ) ;
list_for_each_entry_safe ( r , safe , & ls - > ls_root_list , res_root_list ) {
list_del_init ( & r - > res_root_list ) ;
dlm_put_rsb ( r ) ;
}
up_write ( & ls - > ls_root_sem ) ;
}
2012-05-10 19:18:07 +04:00
void dlm_clear_toss ( struct dlm_ls * ls )
2006-01-18 12:30:29 +03:00
{
2011-10-27 00:24:55 +04:00
struct rb_node * n , * next ;
2012-05-10 19:18:07 +04:00
struct dlm_rsb * r ;
unsigned int count = 0 ;
2006-01-18 12:30:29 +03:00
int i ;
for ( i = 0 ; i < ls - > ls_rsbtbl_size ; i + + ) {
2009-01-08 01:50:41 +03:00
spin_lock ( & ls - > ls_rsbtbl [ i ] . lock ) ;
2011-10-27 00:24:55 +04:00
for ( n = rb_first ( & ls - > ls_rsbtbl [ i ] . toss ) ; n ; n = next ) {
2012-05-10 19:18:07 +04:00
next = rb_next ( n ) ;
r = rb_entry ( n , struct dlm_rsb , res_hashnode ) ;
rb_erase ( n , & ls - > ls_rsbtbl [ i ] . toss ) ;
dlm_free_rsb ( r ) ;
count + + ;
2006-01-18 12:30:29 +03:00
}
2009-01-08 01:50:41 +03:00
spin_unlock ( & ls - > ls_rsbtbl [ i ] . lock ) ;
2006-01-18 12:30:29 +03:00
}
2012-05-10 19:18:07 +04:00
if ( count )
2014-02-14 21:54:44 +04:00
log_rinfo ( ls , " dlm_clear_toss %u done " , count ) ;
2006-01-18 12:30:29 +03:00
}