2006-01-18 12:30:29 +03:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2008-01-29 23:52:10 +03:00
* * Copyright ( C ) 2005 - 2008 Red Hat , Inc . All rights reserved .
2006-01-18 12:30:29 +03:00
* *
* * This copyrighted material is made available to anyone wishing to use ,
* * modify , copy , or redistribute it subject to the terms and conditions
* * of the GNU General Public License v .2 .
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include "dlm_internal.h"
# include "lockspace.h"
# include "member.h"
# include "lowcomms.h"
# include "midcomms.h"
# include "rcom.h"
# include "recover.h"
# include "dir.h"
# include "config.h"
# include "memory.h"
# include "lock.h"
# include "util.h"
static int rcom_response ( struct dlm_ls * ls )
{
return test_bit ( LSFL_RCOM_READY , & ls - > ls_flags ) ;
}
static int create_rcom ( struct dlm_ls * ls , int to_nodeid , int type , int len ,
struct dlm_rcom * * rc_ret , struct dlm_mhandle * * mh_ret )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
char * mb ;
int mb_len = sizeof ( struct dlm_rcom ) + len ;
2009-12-01 01:34:43 +03:00
mh = dlm_lowcomms_get_buffer ( to_nodeid , mb_len , GFP_NOFS , & mb ) ;
2006-01-18 12:30:29 +03:00
if ( ! mh ) {
log_print ( " create_rcom to %d type %d len %d ENOBUFS " ,
to_nodeid , type , len ) ;
return - ENOBUFS ;
}
memset ( mb , 0 , mb_len ) ;
rc = ( struct dlm_rcom * ) mb ;
rc - > rc_header . h_version = ( DLM_HEADER_MAJOR | DLM_HEADER_MINOR ) ;
rc - > rc_header . h_lockspace = ls - > ls_global_id ;
rc - > rc_header . h_nodeid = dlm_our_nodeid ( ) ;
rc - > rc_header . h_length = mb_len ;
rc - > rc_header . h_cmd = DLM_RCOM ;
rc - > rc_type = type ;
2006-12-13 19:37:16 +03:00
spin_lock ( & ls - > ls_recover_lock ) ;
rc - > rc_seq = ls - > ls_recover_seq ;
spin_unlock ( & ls - > ls_recover_lock ) ;
2006-01-18 12:30:29 +03:00
* mh_ret = mh ;
* rc_ret = rc ;
return 0 ;
}
static void send_rcom ( struct dlm_ls * ls , struct dlm_mhandle * mh ,
struct dlm_rcom * rc )
{
dlm_rcom_out ( rc ) ;
dlm_lowcomms_commit_buffer ( mh ) ;
}
2011-10-20 22:26:28 +04:00
static void set_rcom_status ( struct dlm_ls * ls , struct rcom_status * rs ,
uint32_t flags )
{
rs - > rs_flags = cpu_to_le32 ( flags ) ;
}
2006-01-18 12:30:29 +03:00
/* When replying to a status request, a node also sends back its
configuration values . The requesting node then checks that the remote
node is configured the same way as itself . */
2011-10-20 22:26:28 +04:00
static void set_rcom_config ( struct dlm_ls * ls , struct rcom_config * rf ,
uint32_t num_slots )
2006-01-18 12:30:29 +03:00
{
2008-01-25 10:34:00 +03:00
rf - > rf_lvblen = cpu_to_le32 ( ls - > ls_lvblen ) ;
rf - > rf_lsflags = cpu_to_le32 ( ls - > ls_exflags ) ;
2011-10-20 22:26:28 +04:00
rf - > rf_our_slot = cpu_to_le16 ( ls - > ls_slot ) ;
rf - > rf_num_slots = cpu_to_le16 ( num_slots ) ;
rf - > rf_generation = cpu_to_le32 ( ls - > ls_generation ) ;
2006-01-18 12:30:29 +03:00
}
2011-10-20 22:26:28 +04:00
static int check_rcom_config ( struct dlm_ls * ls , struct dlm_rcom * rc , int nodeid )
2006-01-18 12:30:29 +03:00
{
2006-12-13 19:37:55 +03:00
struct rcom_config * rf = ( struct rcom_config * ) rc - > rc_buf ;
if ( ( rc - > rc_header . h_version & 0xFFFF0000 ) ! = DLM_HEADER_MAJOR ) {
log_error ( ls , " version mismatch: %x nodeid %d: %x " ,
DLM_HEADER_MAJOR | DLM_HEADER_MINOR , nodeid ,
rc - > rc_header . h_version ) ;
2007-05-18 18:03:35 +04:00
return - EPROTO ;
2006-12-13 19:37:55 +03:00
}
2008-01-25 10:34:00 +03:00
if ( le32_to_cpu ( rf - > rf_lvblen ) ! = ls - > ls_lvblen | |
le32_to_cpu ( rf - > rf_lsflags ) ! = ls - > ls_exflags ) {
2006-01-18 12:30:29 +03:00
log_error ( ls , " config mismatch: %d,%x nodeid %d: %d,%x " ,
2008-01-25 10:34:00 +03:00
ls - > ls_lvblen , ls - > ls_exflags , nodeid ,
le32_to_cpu ( rf - > rf_lvblen ) ,
le32_to_cpu ( rf - > rf_lsflags ) ) ;
2007-05-18 18:03:35 +04:00
return - EPROTO ;
2006-01-18 12:30:29 +03:00
}
return 0 ;
}
2006-11-27 22:19:28 +03:00
static void allow_sync_reply ( struct dlm_ls * ls , uint64_t * new_seq )
{
spin_lock ( & ls - > ls_rcom_spin ) ;
* new_seq = + + ls - > ls_rcom_seq ;
set_bit ( LSFL_RCOM_WAIT , & ls - > ls_flags ) ;
spin_unlock ( & ls - > ls_rcom_spin ) ;
}
static void disallow_sync_reply ( struct dlm_ls * ls )
{
spin_lock ( & ls - > ls_rcom_spin ) ;
clear_bit ( LSFL_RCOM_WAIT , & ls - > ls_flags ) ;
clear_bit ( LSFL_RCOM_READY , & ls - > ls_flags ) ;
spin_unlock ( & ls - > ls_rcom_spin ) ;
}
2011-10-20 22:26:28 +04:00
/*
* low nodeid gathers one slot value at a time from each node .
* it sets need_slots = 0 , and saves rf_our_slot returned from each
* rcom_config .
*
* other nodes gather all slot values at once from the low nodeid .
* they set need_slots = 1 , and ignore the rf_our_slot returned from each
* rcom_config . they use the rf_num_slots returned from the low
* node ' s rcom_config .
*/
int dlm_rcom_status ( struct dlm_ls * ls , int nodeid , uint32_t status_flags )
2006-01-18 12:30:29 +03:00
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
int error = 0 ;
2006-08-09 02:08:42 +04:00
ls - > ls_recover_nodeid = nodeid ;
2006-01-18 12:30:29 +03:00
if ( nodeid = = dlm_our_nodeid ( ) ) {
2008-01-25 11:01:51 +03:00
rc = ls - > ls_recover_buf ;
2006-01-18 12:30:29 +03:00
rc - > rc_result = dlm_recover_status ( ls ) ;
goto out ;
}
2011-10-20 22:26:28 +04:00
error = create_rcom ( ls , nodeid , DLM_RCOM_STATUS ,
sizeof ( struct rcom_status ) , & rc , & mh ) ;
2006-01-18 12:30:29 +03:00
if ( error )
goto out ;
2006-11-27 22:19:28 +03:00
2011-10-20 22:26:28 +04:00
set_rcom_status ( ls , ( struct rcom_status * ) rc - > rc_buf , status_flags ) ;
2006-11-27 22:19:28 +03:00
allow_sync_reply ( ls , & rc - > rc_id ) ;
2007-01-09 18:41:48 +03:00
memset ( ls - > ls_recover_buf , 0 , dlm_config . ci_buffer_size ) ;
2006-01-18 12:30:29 +03:00
send_rcom ( ls , mh , rc ) ;
error = dlm_wait_function ( ls , & rcom_response ) ;
2006-11-27 22:19:28 +03:00
disallow_sync_reply ( ls ) ;
2006-01-18 12:30:29 +03:00
if ( error )
goto out ;
2008-01-25 11:01:51 +03:00
rc = ls - > ls_recover_buf ;
2006-01-18 12:30:29 +03:00
if ( rc - > rc_result = = - ESRCH ) {
/* we pretend the remote lockspace exists with 0 status */
log_debug ( ls , " remote node %d not ready " , nodeid ) ;
rc - > rc_result = 0 ;
2011-10-20 22:26:28 +04:00
error = 0 ;
} else {
error = check_rcom_config ( ls , rc , nodeid ) ;
}
2006-01-18 12:30:29 +03:00
/* the caller looks at rc_result for the remote recovery status */
out :
return error ;
}
static void receive_rcom_status ( struct dlm_ls * ls , struct dlm_rcom * rc_in )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
2011-10-20 22:26:28 +04:00
struct rcom_status * rs ;
uint32_t status ;
int nodeid = rc_in - > rc_header . h_nodeid ;
int len = sizeof ( struct rcom_config ) ;
int num_slots = 0 ;
int error ;
if ( ! dlm_slots_version ( & rc_in - > rc_header ) ) {
status = dlm_recover_status ( ls ) ;
goto do_create ;
}
rs = ( struct rcom_status * ) rc_in - > rc_buf ;
2006-01-18 12:30:29 +03:00
2014-10-15 00:10:48 +04:00
if ( ! ( le32_to_cpu ( rs - > rs_flags ) & DLM_RSF_NEED_SLOTS ) ) {
2011-10-20 22:26:28 +04:00
status = dlm_recover_status ( ls ) ;
goto do_create ;
}
spin_lock ( & ls - > ls_recover_lock ) ;
status = ls - > ls_recover_status ;
num_slots = ls - > ls_num_slots ;
spin_unlock ( & ls - > ls_recover_lock ) ;
len + = num_slots * sizeof ( struct rcom_slot ) ;
do_create :
2006-01-18 12:30:29 +03:00
error = create_rcom ( ls , nodeid , DLM_RCOM_STATUS_REPLY ,
2011-10-20 22:26:28 +04:00
len , & rc , & mh ) ;
2006-01-18 12:30:29 +03:00
if ( error )
return ;
2011-10-20 22:26:28 +04:00
2006-08-09 20:20:15 +04:00
rc - > rc_id = rc_in - > rc_id ;
2006-12-13 19:37:16 +03:00
rc - > rc_seq_reply = rc_in - > rc_seq ;
2011-10-20 22:26:28 +04:00
rc - > rc_result = status ;
set_rcom_config ( ls , ( struct rcom_config * ) rc - > rc_buf , num_slots ) ;
if ( ! num_slots )
goto do_send ;
spin_lock ( & ls - > ls_recover_lock ) ;
if ( ls - > ls_num_slots ! = num_slots ) {
spin_unlock ( & ls - > ls_recover_lock ) ;
log_debug ( ls , " receive_rcom_status num_slots %d to %d " ,
num_slots , ls - > ls_num_slots ) ;
rc - > rc_result = 0 ;
set_rcom_config ( ls , ( struct rcom_config * ) rc - > rc_buf , 0 ) ;
goto do_send ;
}
dlm_slots_copy_out ( ls , rc ) ;
spin_unlock ( & ls - > ls_recover_lock ) ;
2006-01-18 12:30:29 +03:00
2011-10-20 22:26:28 +04:00
do_send :
2006-01-18 12:30:29 +03:00
send_rcom ( ls , mh , rc ) ;
}
2006-08-09 20:20:15 +04:00
static void receive_sync_reply ( struct dlm_ls * ls , struct dlm_rcom * rc_in )
2006-01-18 12:30:29 +03:00
{
2006-11-27 22:19:28 +03:00
spin_lock ( & ls - > ls_rcom_spin ) ;
if ( ! test_bit ( LSFL_RCOM_WAIT , & ls - > ls_flags ) | |
rc_in - > rc_id ! = ls - > ls_rcom_seq ) {
log_debug ( ls , " reject reply %d from %d seq %llx expect %llx " ,
rc_in - > rc_type , rc_in - > rc_header . h_nodeid ,
2006-11-29 17:33:48 +03:00
( unsigned long long ) rc_in - > rc_id ,
( unsigned long long ) ls - > ls_rcom_seq ) ;
2006-11-27 22:19:28 +03:00
goto out ;
2006-08-09 20:20:15 +04:00
}
2006-01-18 12:30:29 +03:00
memcpy ( ls - > ls_recover_buf , rc_in , rc_in - > rc_header . h_length ) ;
set_bit ( LSFL_RCOM_READY , & ls - > ls_flags ) ;
2006-11-27 22:19:28 +03:00
clear_bit ( LSFL_RCOM_WAIT , & ls - > ls_flags ) ;
2006-01-18 12:30:29 +03:00
wake_up ( & ls - > ls_wait_general ) ;
2006-11-27 22:19:28 +03:00
out :
spin_unlock ( & ls - > ls_rcom_spin ) ;
2006-01-18 12:30:29 +03:00
}
int dlm_rcom_names ( struct dlm_ls * ls , int nodeid , char * last_name , int last_len )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
2008-01-25 11:01:51 +03:00
int error = 0 ;
2006-01-18 12:30:29 +03:00
2006-08-09 02:08:42 +04:00
ls - > ls_recover_nodeid = nodeid ;
2006-01-18 12:30:29 +03:00
error = create_rcom ( ls , nodeid , DLM_RCOM_NAMES , last_len , & rc , & mh ) ;
if ( error )
goto out ;
memcpy ( rc - > rc_buf , last_name , last_len ) ;
2006-11-27 22:19:28 +03:00
allow_sync_reply ( ls , & rc - > rc_id ) ;
2007-01-09 18:41:48 +03:00
memset ( ls - > ls_recover_buf , 0 , dlm_config . ci_buffer_size ) ;
2006-01-18 12:30:29 +03:00
send_rcom ( ls , mh , rc ) ;
error = dlm_wait_function ( ls , & rcom_response ) ;
2006-11-27 22:19:28 +03:00
disallow_sync_reply ( ls ) ;
2006-01-18 12:30:29 +03:00
out :
return error ;
}
static void receive_rcom_names ( struct dlm_ls * ls , struct dlm_rcom * rc_in )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
2006-12-13 19:37:16 +03:00
int error , inlen , outlen , nodeid ;
2006-01-18 12:30:29 +03:00
nodeid = rc_in - > rc_header . h_nodeid ;
inlen = rc_in - > rc_header . h_length - sizeof ( struct dlm_rcom ) ;
2007-01-09 18:41:48 +03:00
outlen = dlm_config . ci_buffer_size - sizeof ( struct dlm_rcom ) ;
2006-01-18 12:30:29 +03:00
error = create_rcom ( ls , nodeid , DLM_RCOM_NAMES_REPLY , outlen , & rc , & mh ) ;
if ( error )
return ;
2006-08-09 20:20:15 +04:00
rc - > rc_id = rc_in - > rc_id ;
2006-12-13 19:37:16 +03:00
rc - > rc_seq_reply = rc_in - > rc_seq ;
2006-01-18 12:30:29 +03:00
dlm_copy_master_names ( ls , rc_in - > rc_buf , inlen , rc - > rc_buf , outlen ,
nodeid ) ;
send_rcom ( ls , mh , rc ) ;
}
int dlm_send_rcom_lookup ( struct dlm_rsb * r , int dir_nodeid )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
struct dlm_ls * ls = r - > res_ls ;
int error ;
error = create_rcom ( ls , dir_nodeid , DLM_RCOM_LOOKUP , r - > res_length ,
& rc , & mh ) ;
if ( error )
goto out ;
memcpy ( rc - > rc_buf , r - > res_name , r - > res_length ) ;
2012-05-16 01:07:49 +04:00
rc - > rc_id = ( unsigned long ) r - > res_id ;
2006-01-18 12:30:29 +03:00
send_rcom ( ls , mh , rc ) ;
out :
return error ;
}
2012-05-10 19:18:07 +04:00
int dlm_send_rcom_lookup_dump ( struct dlm_rsb * r , int to_nodeid )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
struct dlm_ls * ls = r - > res_ls ;
int error ;
error = create_rcom ( ls , to_nodeid , DLM_RCOM_LOOKUP , r - > res_length ,
& rc , & mh ) ;
if ( error )
goto out ;
memcpy ( rc - > rc_buf , r - > res_name , r - > res_length ) ;
rc - > rc_id = 0xFFFFFFFF ;
send_rcom ( ls , mh , rc ) ;
out :
return error ;
}
2006-01-18 12:30:29 +03:00
static void receive_rcom_lookup ( struct dlm_ls * ls , struct dlm_rcom * rc_in )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
int error , ret_nodeid , nodeid = rc_in - > rc_header . h_nodeid ;
int len = rc_in - > rc_header . h_length - sizeof ( struct dlm_rcom ) ;
error = create_rcom ( ls , nodeid , DLM_RCOM_LOOKUP_REPLY , 0 , & rc , & mh ) ;
if ( error )
return ;
2012-05-10 19:18:07 +04:00
if ( rc_in - > rc_id = = 0xFFFFFFFF ) {
log_error ( ls , " receive_rcom_lookup dump from %d " , nodeid ) ;
dlm_dump_rsb_name ( ls , rc_in - > rc_buf , len ) ;
return ;
}
error = dlm_master_lookup ( ls , nodeid , rc_in - > rc_buf , len ,
DLM_LU_RECOVER_MASTER , & ret_nodeid , NULL ) ;
2006-01-18 12:30:29 +03:00
if ( error )
ret_nodeid = error ;
rc - > rc_result = ret_nodeid ;
rc - > rc_id = rc_in - > rc_id ;
2006-12-13 19:37:16 +03:00
rc - > rc_seq_reply = rc_in - > rc_seq ;
2006-01-18 12:30:29 +03:00
send_rcom ( ls , mh , rc ) ;
}
static void receive_rcom_lookup_reply ( struct dlm_ls * ls , struct dlm_rcom * rc_in )
{
dlm_recover_master_reply ( ls , rc_in ) ;
}
static void pack_rcom_lock ( struct dlm_rsb * r , struct dlm_lkb * lkb ,
struct rcom_lock * rl )
{
memset ( rl , 0 , sizeof ( * rl ) ) ;
2008-01-25 10:08:26 +03:00
rl - > rl_ownpid = cpu_to_le32 ( lkb - > lkb_ownpid ) ;
rl - > rl_lkid = cpu_to_le32 ( lkb - > lkb_id ) ;
rl - > rl_exflags = cpu_to_le32 ( lkb - > lkb_exflags ) ;
rl - > rl_flags = cpu_to_le32 ( lkb - > lkb_flags ) ;
rl - > rl_lvbseq = cpu_to_le32 ( lkb - > lkb_lvbseq ) ;
2006-01-18 12:30:29 +03:00
rl - > rl_rqmode = lkb - > lkb_rqmode ;
rl - > rl_grmode = lkb - > lkb_grmode ;
rl - > rl_status = lkb - > lkb_status ;
2008-01-25 10:08:26 +03:00
rl - > rl_wait_type = cpu_to_le16 ( lkb - > lkb_wait_type ) ;
2006-01-18 12:30:29 +03:00
2008-02-06 09:35:45 +03:00
if ( lkb - > lkb_bastfn )
2011-02-21 23:58:21 +03:00
rl - > rl_asts | = DLM_CB_BAST ;
2008-02-06 09:35:45 +03:00
if ( lkb - > lkb_astfn )
2011-02-21 23:58:21 +03:00
rl - > rl_asts | = DLM_CB_CAST ;
2006-01-18 12:30:29 +03:00
2008-01-25 10:08:26 +03:00
rl - > rl_namelen = cpu_to_le16 ( r - > res_length ) ;
2006-01-18 12:30:29 +03:00
memcpy ( rl - > rl_name , r - > res_name , r - > res_length ) ;
/* FIXME: might we have an lvb without DLM_LKF_VALBLK set ?
If so , receive_rcom_lock_args ( ) won ' t take this copy . */
if ( lkb - > lkb_lvbptr )
memcpy ( rl - > rl_lvb , lkb - > lkb_lvbptr , r - > res_ls - > ls_lvblen ) ;
}
int dlm_send_rcom_lock ( struct dlm_rsb * r , struct dlm_lkb * lkb )
{
struct dlm_ls * ls = r - > res_ls ;
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
struct rcom_lock * rl ;
int error , len = sizeof ( struct rcom_lock ) ;
if ( lkb - > lkb_lvbptr )
len + = ls - > ls_lvblen ;
error = create_rcom ( ls , r - > res_nodeid , DLM_RCOM_LOCK , len , & rc , & mh ) ;
if ( error )
goto out ;
rl = ( struct rcom_lock * ) rc - > rc_buf ;
pack_rcom_lock ( r , lkb , rl ) ;
rc - > rc_id = ( unsigned long ) r ;
send_rcom ( ls , mh , rc ) ;
out :
return error ;
}
2008-01-26 03:55:09 +03:00
/* needs at least dlm_rcom + rcom_lock */
2006-01-18 12:30:29 +03:00
static void receive_rcom_lock ( struct dlm_ls * ls , struct dlm_rcom * rc_in )
{
struct dlm_rcom * rc ;
struct dlm_mhandle * mh ;
int error , nodeid = rc_in - > rc_header . h_nodeid ;
dlm_recover_master_copy ( ls , rc_in ) ;
error = create_rcom ( ls , nodeid , DLM_RCOM_LOCK_REPLY ,
sizeof ( struct rcom_lock ) , & rc , & mh ) ;
if ( error )
return ;
/* We send back the same rcom_lock struct we received, but
dlm_recover_master_copy ( ) has filled in rl_remid and rl_result */
memcpy ( rc - > rc_buf , rc_in - > rc_buf , sizeof ( struct rcom_lock ) ) ;
rc - > rc_id = rc_in - > rc_id ;
2006-12-13 19:37:16 +03:00
rc - > rc_seq_reply = rc_in - > rc_seq ;
2006-01-18 12:30:29 +03:00
send_rcom ( ls , mh , rc ) ;
}
2007-09-28 00:53:38 +04:00
/* If the lockspace doesn't exist then still send a status message
back ; it ' s possible that it just doesn ' t have its global_id yet . */
int dlm_send_ls_not_ready ( int nodeid , struct dlm_rcom * rc_in )
2006-01-18 12:30:29 +03:00
{
struct dlm_rcom * rc ;
2006-11-27 22:18:41 +03:00
struct rcom_config * rf ;
2006-01-18 12:30:29 +03:00
struct dlm_mhandle * mh ;
char * mb ;
2006-11-27 22:18:41 +03:00
int mb_len = sizeof ( struct dlm_rcom ) + sizeof ( struct rcom_config ) ;
2006-01-18 12:30:29 +03:00
2007-07-13 23:49:06 +04:00
mh = dlm_lowcomms_get_buffer ( nodeid , mb_len , GFP_NOFS , & mb ) ;
2006-01-18 12:30:29 +03:00
if ( ! mh )
return - ENOBUFS ;
memset ( mb , 0 , mb_len ) ;
rc = ( struct dlm_rcom * ) mb ;
rc - > rc_header . h_version = ( DLM_HEADER_MAJOR | DLM_HEADER_MINOR ) ;
rc - > rc_header . h_lockspace = rc_in - > rc_header . h_lockspace ;
rc - > rc_header . h_nodeid = dlm_our_nodeid ( ) ;
rc - > rc_header . h_length = mb_len ;
rc - > rc_header . h_cmd = DLM_RCOM ;
rc - > rc_type = DLM_RCOM_STATUS_REPLY ;
2006-08-23 21:50:54 +04:00
rc - > rc_id = rc_in - > rc_id ;
2006-12-13 19:37:16 +03:00
rc - > rc_seq_reply = rc_in - > rc_seq ;
2006-01-18 12:30:29 +03:00
rc - > rc_result = - ESRCH ;
2006-11-27 22:18:41 +03:00
rf = ( struct rcom_config * ) rc - > rc_buf ;
2008-01-25 10:34:00 +03:00
rf - > rf_lvblen = cpu_to_le32 ( ~ 0U ) ;
2006-11-27 22:18:41 +03:00
2006-01-18 12:30:29 +03:00
dlm_rcom_out ( rc ) ;
dlm_lowcomms_commit_buffer ( mh ) ;
return 0 ;
}
2012-05-10 19:18:07 +04:00
/*
* Ignore messages for stage Y before we set
* recover_status bit for stage X :
*
* recover_status = 0
*
* dlm_recover_members ( )
* - send nothing
* - recv nothing
* - ignore NAMES , NAMES_REPLY
* - ignore LOOKUP , LOOKUP_REPLY
* - ignore LOCK , LOCK_REPLY
*
* recover_status | = NODES
*
* dlm_recover_members_wait ( )
*
* dlm_recover_directory ( )
* - send NAMES
* - recv NAMES_REPLY
* - ignore LOOKUP , LOOKUP_REPLY
* - ignore LOCK , LOCK_REPLY
*
* recover_status | = DIR
*
* dlm_recover_directory_wait ( )
*
* dlm_recover_masters ( )
* - send LOOKUP
* - recv LOOKUP_REPLY
*
* dlm_recover_locks ( )
* - send LOCKS
* - recv LOCKS_REPLY
*
* recover_status | = LOCKS
*
* dlm_recover_locks_wait ( )
*
* recover_status | = DONE
*/
2012-04-23 22:58:42 +04:00
/* Called by dlm_recv; corresponds to dlm_receive_message() but special
recovery - only comms are sent through here . */
void dlm_receive_rcom ( struct dlm_ls * ls , struct dlm_rcom * rc , int nodeid )
2006-12-13 19:37:16 +03:00
{
2012-04-23 22:58:42 +04:00
int lock_size = sizeof ( struct dlm_rcom ) + sizeof ( struct rcom_lock ) ;
2012-05-10 19:18:07 +04:00
int stop , reply = 0 , names = 0 , lookup = 0 , lock = 0 ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
uint32_t status ;
2006-12-13 19:37:16 +03:00
uint64_t seq ;
switch ( rc - > rc_type ) {
2012-05-10 19:18:07 +04:00
case DLM_RCOM_STATUS_REPLY :
reply = 1 ;
break ;
case DLM_RCOM_NAMES :
names = 1 ;
break ;
case DLM_RCOM_NAMES_REPLY :
names = 1 ;
reply = 1 ;
break ;
case DLM_RCOM_LOOKUP :
lookup = 1 ;
break ;
case DLM_RCOM_LOOKUP_REPLY :
lookup = 1 ;
reply = 1 ;
break ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
case DLM_RCOM_LOCK :
lock = 1 ;
break ;
case DLM_RCOM_LOCK_REPLY :
lock = 1 ;
reply = 1 ;
break ;
2012-04-23 22:58:42 +04:00
} ;
2006-12-13 19:37:16 +03:00
2012-04-23 22:58:42 +04:00
spin_lock ( & ls - > ls_recover_lock ) ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
status = ls - > ls_recover_status ;
2012-08-02 20:08:21 +04:00
stop = test_bit ( LSFL_RECOVER_STOP , & ls - > ls_flags ) ;
2012-04-23 22:58:42 +04:00
seq = ls - > ls_recover_seq ;
spin_unlock ( & ls - > ls_recover_lock ) ;
2008-01-26 03:55:09 +03:00
2012-05-10 19:18:07 +04:00
if ( stop & & ( rc - > rc_type ! = DLM_RCOM_STATUS ) )
goto ignore ;
if ( reply & & ( rc - > rc_seq_reply ! = seq ) )
goto ignore ;
if ( ! ( status & DLM_RS_NODES ) & & ( names | | lookup | | lock ) )
goto ignore ;
if ( ! ( status & DLM_RS_DIR ) & & ( lookup | | lock ) )
goto ignore ;
2006-01-18 12:30:29 +03:00
switch ( rc - > rc_type ) {
case DLM_RCOM_STATUS :
receive_rcom_status ( ls , rc ) ;
break ;
case DLM_RCOM_NAMES :
receive_rcom_names ( ls , rc ) ;
break ;
case DLM_RCOM_LOOKUP :
receive_rcom_lookup ( ls , rc ) ;
break ;
case DLM_RCOM_LOCK :
2008-01-26 03:55:09 +03:00
if ( rc - > rc_header . h_length < lock_size )
goto Eshort ;
2006-01-18 12:30:29 +03:00
receive_rcom_lock ( ls , rc ) ;
break ;
case DLM_RCOM_STATUS_REPLY :
2008-01-29 23:52:10 +03:00
receive_sync_reply ( ls , rc ) ;
2006-01-18 12:30:29 +03:00
break ;
case DLM_RCOM_NAMES_REPLY :
2008-01-29 23:52:10 +03:00
receive_sync_reply ( ls , rc ) ;
2006-01-18 12:30:29 +03:00
break ;
case DLM_RCOM_LOOKUP_REPLY :
receive_rcom_lookup_reply ( ls , rc ) ;
break ;
case DLM_RCOM_LOCK_REPLY :
2008-01-26 03:55:09 +03:00
if ( rc - > rc_header . h_length < lock_size )
goto Eshort ;
2008-01-29 23:52:10 +03:00
dlm_recover_process_copy ( ls , rc ) ;
2006-01-18 12:30:29 +03:00
break ;
default :
2008-01-29 23:52:10 +03:00
log_error ( ls , " receive_rcom bad type %d " , rc - > rc_type ) ;
2006-01-18 12:30:29 +03:00
}
2012-05-10 19:18:07 +04:00
return ;
ignore :
log_limit ( ls , " dlm_receive_rcom ignore msg %d "
" from %d %llu %llu recover seq %llu sts %x gen %u " ,
rc - > rc_type ,
nodeid ,
( unsigned long long ) rc - > rc_seq ,
( unsigned long long ) rc - > rc_seq_reply ,
( unsigned long long ) seq ,
status , ls - > ls_generation ) ;
2007-09-28 00:53:38 +04:00
return ;
2008-01-26 03:55:09 +03:00
Eshort :
2012-05-10 19:18:07 +04:00
log_error ( ls , " recovery message %d from %d is too short " ,
rc - > rc_type , nodeid ) ;
2006-01-18 12:30:29 +03:00
}