2019-05-28 09:57:20 -07:00
// SPDX-License-Identifier: GPL-2.0-only
2006-01-18 09:30:29 +00:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2021-05-21 15:08:46 -04:00
* * Copyright ( C ) 2004 - 2021 Red Hat , Inc . All rights reserved .
2006-01-18 09:30:29 +00:00
* *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* midcomms . c
*
2021-05-21 15:08:46 -04:00
* This is the appallingly named " mid-level " comms layer . It takes care about
* deliver an on application layer " reliable " communication above the used
* lowcomms transport layer .
2006-01-18 09:30:29 +00:00
*
2021-05-21 15:08:46 -04:00
* How it works :
2006-01-18 09:30:29 +00:00
*
2021-05-21 15:08:46 -04:00
* Each nodes keeps track of all send DLM messages in send_queue with a sequence
* number . The receive will send an DLM_ACK message back for every DLM message
* received at the other side . If a reconnect happens in lowcomms we will send
* all unacknowledged dlm messages again . The receiving side might drop any already
* received message by comparing sequence numbers .
*
* How version detection works :
*
* Due the fact that dlm has pre - configured node addresses on every side
* it is in it ' s nature that every side connects at starts to transmit
* dlm messages which ends in a race . However DLM_RCOM_NAMES , DLM_RCOM_STATUS
* and their replies are the first messages which are exchanges . Due backwards
* compatibility these messages are not covered by the midcomms re - transmission
* layer . These messages have their own re - transmission handling in the dlm
* application layer . The version field of every node will be set on these RCOM
* messages as soon as they arrived and the node isn ' t yet part of the nodes
* hash . There exists also logic to detect version mismatched if something weird
* going on or the first messages isn ' t an expected one .
*
* Termination :
*
* The midcomms layer does a 4 way handshake for termination on DLM protocol
* like TCP supports it with half - closed socket support . SCTP doesn ' t support
* half - closed socket , so we do it on DLM layer . Also socket shutdown ( ) can be
* interrupted by . e . g . tcp reset itself . Additional there exists the othercon
* paradigm in lowcomms which cannot be easily without breaking backwards
* compatibility . A node cannot send anything to another node when a DLM_FIN
* message was send . There exists additional logic to print a warning if
* DLM wants to do it . There exists a state handling like RFC 793 but reduced
* to termination only . The event " member removal event " describes the cluster
* manager removed the node from internal lists , at this point DLM does not
* send any message to the other node . There exists two cases :
*
* 1. The cluster member was removed and we received a FIN
* OR
* 2. We received a FIN but the member was not removed yet
*
* One of these cases will do the CLOSE_WAIT to LAST_ACK change .
*
*
* + - - - - - - - - - +
* | CLOSED |
* + - - - - - - - - - +
* | add member / receive RCOM version
* | detection msg
* V
* + - - - - - - - - - +
* | ESTAB |
* + - - - - - - - - - +
* CLOSE | | rcv FIN
* - - - - - - - | | - - - - - - -
* + - - - - - - - - - + snd FIN / \ snd ACK + - - - - - - - - - +
* | FIN | < - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > | CLOSE |
* | WAIT - 1 | - - - - - - - - - - - - - - - - - - | WAIT |
* + - - - - - - - - - + rcv FIN \ + - - - - - - - - - +
* | rcv ACK of FIN - - - - - - - | CLOSE | member
* | - - - - - - - - - - - - - - snd ACK | - - - - - - - | removal
* V x V snd FIN V event
* + - - - - - - - - - + + - - - - - - - - - + + - - - - - - - - - +
* | FINWAIT - 2 | | CLOSING | | LAST - ACK |
* + - - - - - - - - - + + - - - - - - - - - + + - - - - - - - - - +
* | rcv ACK of FIN | rcv ACK of FIN |
* | rcv FIN - - - - - - - - - - - - - - | - - - - - - - - - - - - - - |
* | - - - - - - - x V x V
* \ snd ACK + - - - - - - - - - + + - - - - - - - - - +
* - - - - - - - - - - - - - - - - - - - - - - - - > | CLOSED | | CLOSED |
* + - - - - - - - - - + + - - - - - - - - - +
*
* NOTE : any state can interrupted by midcomms_close ( ) and state will be
* switched to CLOSED in case of fencing . There exists also some timeout
* handling when we receive the version detection RCOM messages which is
* made by observation .
*
* Future improvements :
*
* There exists some known issues / improvements of the dlm handling . Some
* of them should be done in a next major dlm version bump which makes
* it incompatible with previous versions .
*
* Unaligned memory access :
*
* There exists cases when the dlm message buffer length is not aligned
* to 8 byte . However seems nobody detected any problem with it . This
* can be fixed in the next major version bump of dlm .
*
* Version detection :
*
* The version detection and how it ' s done is related to backwards
* compatibility . There exists better ways to make a better handling .
* However this should be changed in the next major version bump of dlm .
*
* Tail Size checking :
*
* There exists a message tail payload in e . g . DLM_MSG however we don ' t
* check it against the message length yet regarding to the receive buffer
* length . That need to be validated .
*
* Fencing bad nodes :
*
* At timeout places or weird sequence number behaviours we should send
* a fencing request to the cluster manager .
2006-01-18 09:30:29 +00:00
*/
2021-05-21 15:08:46 -04:00
/* Debug switch to enable a 5 seconds sleep waiting of a termination.
* This can be useful to test fencing while termination is running .
* This requires a setup with only gfs2 as dlm user , so that the
* last umount will terminate the connection .
*
* However it became useful to test , while the 5 seconds block in umount
* just press the reset button . In a lot of dropping the termination
* process can could take several seconds .
*/
# define DLM_DEBUG_FENCE_TERMINATION 0
2022-10-27 16:45:15 -04:00
# include <trace/events/dlm.h>
2021-05-21 15:08:46 -04:00
# include <net/tcp.h>
2006-01-18 09:30:29 +00:00
# include "dlm_internal.h"
# include "lowcomms.h"
# include "config.h"
2021-11-30 14:47:18 -05:00
# include "memory.h"
2006-01-18 09:30:29 +00:00
# include "lock.h"
2021-05-21 15:08:46 -04:00
# include "util.h"
2006-01-18 09:30:29 +00:00
# include "midcomms.h"
2021-05-21 15:08:46 -04:00
/* init value for sequence numbers for testing purpose only e.g. overflows */
# define DLM_SEQ_INIT 0
2023-01-12 17:18:46 -05:00
/* 5 seconds wait to sync ending of dlm */
# define DLM_SHUTDOWN_TIMEOUT msecs_to_jiffies(5000)
2021-05-21 15:08:46 -04:00
# define DLM_VERSION_NOT_SET 0
2023-05-29 17:44:40 -04:00
# define DLM_SEND_ACK_BACK_MSG_THRESHOLD 32
# define DLM_RECV_ACK_BACK_MSG_THRESHOLD (DLM_SEND_ACK_BACK_MSG_THRESHOLD * 8)
2021-05-21 15:08:46 -04:00
struct midcomms_node {
int nodeid ;
uint32_t version ;
2023-05-29 17:44:39 -04:00
atomic_t seq_send ;
atomic_t seq_next ;
2021-05-21 15:08:46 -04:00
/* These queues are unbound because we cannot drop any message in dlm.
* We could send a fence signal for a specific node to the cluster
* manager if queues hits some maximum value , however this handling
* not supported yet .
*/
struct list_head send_queue ;
spinlock_t send_queue_lock ;
atomic_t send_queue_cnt ;
# define DLM_NODE_FLAG_CLOSE 1
# define DLM_NODE_FLAG_STOP_TX 2
# define DLM_NODE_FLAG_STOP_RX 3
2023-05-29 17:44:40 -04:00
atomic_t ulp_delivered ;
2021-05-21 15:08:46 -04:00
unsigned long flags ;
wait_queue_head_t shutdown_wait ;
/* dlm tcp termination state */
# define DLM_CLOSED 1
# define DLM_ESTABLISHED 2
# define DLM_FIN_WAIT1 3
# define DLM_FIN_WAIT2 4
# define DLM_CLOSE_WAIT 5
# define DLM_LAST_ACK 6
# define DLM_CLOSING 7
int state ;
spinlock_t state_lock ;
/* counts how many lockspaces are using this node
* this refcount is necessary to determine if the
* node wants to disconnect .
*/
int users ;
2021-05-21 15:08:47 -04:00
/* not protected by srcu, node_hash lifetime */
void * debugfs ;
2021-05-21 15:08:46 -04:00
struct hlist_node hlist ;
struct rcu_head rcu ;
} ;
struct dlm_mhandle {
2022-10-27 16:45:14 -04:00
const union dlm_packet * inner_p ;
2021-05-21 15:08:46 -04:00
struct midcomms_node * node ;
struct dlm_opts * opts ;
struct dlm_msg * msg ;
bool committed ;
uint32_t seq ;
void ( * ack_rcv ) ( struct midcomms_node * node ) ;
/* get_mhandle/commit srcu idx exchange */
int idx ;
struct list_head list ;
struct rcu_head rcu ;
} ;
static struct hlist_head node_hash [ CONN_HASH_SIZE ] ;
static DEFINE_SPINLOCK ( nodes_lock ) ;
DEFINE_STATIC_SRCU ( nodes_srcu ) ;
/* This mutex prevents that midcomms_close() is running while
* stop ( ) or remove ( ) . As I experienced invalid memory access
* behaviours when DLM_DEBUG_FENCE_TERMINATION is enabled and
* resetting machines . I will end in some double deletion in nodes
* datastructure .
*/
static DEFINE_MUTEX ( close_lock ) ;
2021-11-30 14:47:18 -05:00
struct kmem_cache * dlm_midcomms_cache_create ( void )
{
2024-03-28 11:48:34 -04:00
return KMEM_CACHE ( dlm_mhandle , 0 ) ;
2021-11-30 14:47:18 -05:00
}
2021-05-21 15:08:46 -04:00
static inline const char * dlm_state_str ( int state )
2021-05-21 15:08:41 -04:00
{
2021-05-21 15:08:46 -04:00
switch ( state ) {
case DLM_CLOSED :
return " CLOSED " ;
case DLM_ESTABLISHED :
return " ESTABLISHED " ;
case DLM_FIN_WAIT1 :
return " FIN_WAIT1 " ;
case DLM_FIN_WAIT2 :
return " FIN_WAIT2 " ;
case DLM_CLOSE_WAIT :
return " CLOSE_WAIT " ;
case DLM_LAST_ACK :
return " LAST_ACK " ;
case DLM_CLOSING :
return " CLOSING " ;
default :
return " UNKNOWN " ;
}
2021-05-21 15:08:41 -04:00
}
2021-05-21 15:08:47 -04:00
const char * dlm_midcomms_state ( struct midcomms_node * node )
{
return dlm_state_str ( node - > state ) ;
}
unsigned long dlm_midcomms_flags ( struct midcomms_node * node )
{
return node - > flags ;
}
int dlm_midcomms_send_queue_cnt ( struct midcomms_node * node )
{
return atomic_read ( & node - > send_queue_cnt ) ;
}
uint32_t dlm_midcomms_version ( struct midcomms_node * node )
{
return node - > version ;
}
2021-05-21 15:08:46 -04:00
static struct midcomms_node * __find_node ( int nodeid , int r )
2021-05-21 15:08:41 -04:00
{
2021-05-21 15:08:46 -04:00
struct midcomms_node * node ;
hlist_for_each_entry_rcu ( node , & node_hash [ r ] , hlist ) {
if ( node - > nodeid = = nodeid )
return node ;
}
return NULL ;
2021-05-21 15:08:41 -04:00
}
2021-05-21 15:08:46 -04:00
static void dlm_mhandle_release ( struct rcu_head * rcu )
{
struct dlm_mhandle * mh = container_of ( rcu , struct dlm_mhandle , rcu ) ;
2021-05-21 15:08:41 -04:00
2021-05-21 15:08:46 -04:00
dlm_lowcomms_put_msg ( mh - > msg ) ;
2021-11-30 14:47:18 -05:00
dlm_free_mhandle ( mh ) ;
2021-05-21 15:08:46 -04:00
}
2021-05-21 15:08:41 -04:00
2021-06-11 12:55:40 -04:00
static void dlm_mhandle_delete ( struct midcomms_node * node ,
struct dlm_mhandle * mh )
{
list_del_rcu ( & mh - > list ) ;
atomic_dec ( & node - > send_queue_cnt ) ;
call_rcu ( & mh - > rcu , dlm_mhandle_release ) ;
}
2021-05-21 15:08:46 -04:00
static void dlm_send_queue_flush ( struct midcomms_node * node )
2021-05-21 15:08:41 -04:00
{
2021-05-21 15:08:46 -04:00
struct dlm_mhandle * mh ;
pr_debug ( " flush midcomms send queue of node %d \n " , node - > nodeid ) ;
rcu_read_lock ( ) ;
2022-11-17 17:11:57 -05:00
spin_lock_bh ( & node - > send_queue_lock ) ;
2021-05-21 15:08:46 -04:00
list_for_each_entry_rcu ( mh , & node - > send_queue , list ) {
2021-06-11 12:55:40 -04:00
dlm_mhandle_delete ( node , mh ) ;
2021-05-21 15:08:46 -04:00
}
2022-11-17 17:11:57 -05:00
spin_unlock_bh ( & node - > send_queue_lock ) ;
2021-05-21 15:08:46 -04:00
rcu_read_unlock ( ) ;
2021-05-21 15:08:41 -04:00
}
2021-05-21 15:08:46 -04:00
static void midcomms_node_reset ( struct midcomms_node * node )
2021-05-21 15:08:41 -04:00
{
2021-05-21 15:08:46 -04:00
pr_debug ( " reset node %d \n " , node - > nodeid ) ;
2023-05-29 17:44:39 -04:00
atomic_set ( & node - > seq_next , DLM_SEQ_INIT ) ;
atomic_set ( & node - > seq_send , DLM_SEQ_INIT ) ;
2023-05-29 17:44:40 -04:00
atomic_set ( & node - > ulp_delivered , 0 ) ;
2021-05-21 15:08:46 -04:00
node - > version = DLM_VERSION_NOT_SET ;
node - > flags = 0 ;
dlm_send_queue_flush ( node ) ;
node - > state = DLM_CLOSED ;
wake_up ( & node - > shutdown_wait ) ;
2021-05-21 15:08:41 -04:00
}
2023-08-01 14:09:49 -04:00
static struct midcomms_node * nodeid2node ( int nodeid )
2021-05-21 15:08:46 -04:00
{
2023-08-01 14:09:49 -04:00
return __find_node ( nodeid , nodeid_hash ( nodeid ) ) ;
}
int dlm_midcomms_addr ( int nodeid , struct sockaddr_storage * addr , int len )
{
2023-10-10 18:04:44 -04:00
int ret , idx , r = nodeid_hash ( nodeid ) ;
2023-08-01 14:09:49 -04:00
struct midcomms_node * node ;
2021-05-21 15:08:46 -04:00
2023-08-01 14:09:49 -04:00
ret = dlm_lowcomms_addr ( nodeid , addr , len ) ;
if ( ret )
return ret ;
2021-05-21 15:08:46 -04:00
2023-10-10 18:04:44 -04:00
idx = srcu_read_lock ( & nodes_srcu ) ;
node = __find_node ( nodeid , r ) ;
if ( node ) {
srcu_read_unlock ( & nodes_srcu , idx ) ;
return 0 ;
}
srcu_read_unlock ( & nodes_srcu , idx ) ;
2023-08-01 14:09:49 -04:00
node = kmalloc ( sizeof ( * node ) , GFP_NOFS ) ;
2021-05-21 15:08:46 -04:00
if ( ! node )
2023-08-01 14:09:49 -04:00
return - ENOMEM ;
2021-05-21 15:08:46 -04:00
node - > nodeid = nodeid ;
spin_lock_init ( & node - > state_lock ) ;
spin_lock_init ( & node - > send_queue_lock ) ;
atomic_set ( & node - > send_queue_cnt , 0 ) ;
INIT_LIST_HEAD ( & node - > send_queue ) ;
init_waitqueue_head ( & node - > shutdown_wait ) ;
node - > users = 0 ;
midcomms_node_reset ( node ) ;
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & nodes_lock ) ;
2021-05-21 15:08:46 -04:00
hlist_add_head_rcu ( & node - > hlist , & node_hash [ r ] ) ;
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & nodes_lock ) ;
2021-05-21 15:08:47 -04:00
node - > debugfs = dlm_create_debug_comms_file ( nodeid , node ) ;
2023-08-01 14:09:49 -04:00
return 0 ;
2021-05-21 15:08:46 -04:00
}
static int dlm_send_ack ( int nodeid , uint32_t seq )
{
int mb_len = sizeof ( struct dlm_header ) ;
struct dlm_header * m_header ;
struct dlm_msg * msg ;
char * ppc ;
2024-04-02 15:17:57 -04:00
msg = dlm_lowcomms_new_msg ( nodeid , mb_len , & ppc , NULL , NULL ) ;
2021-05-21 15:08:46 -04:00
if ( ! msg )
return - ENOMEM ;
m_header = ( struct dlm_header * ) ppc ;
2022-04-04 16:06:39 -04:00
m_header - > h_version = cpu_to_le32 ( DLM_HEADER_MAJOR | DLM_HEADER_MINOR ) ;
m_header - > h_nodeid = cpu_to_le32 ( dlm_our_nodeid ( ) ) ;
m_header - > h_length = cpu_to_le16 ( mb_len ) ;
2021-05-21 15:08:46 -04:00
m_header - > h_cmd = DLM_ACK ;
2022-04-04 16:06:39 -04:00
m_header - > u . h_seq = cpu_to_le32 ( seq ) ;
2021-05-21 15:08:46 -04:00
dlm_lowcomms_commit_msg ( msg ) ;
dlm_lowcomms_put_msg ( msg ) ;
return 0 ;
}
2023-05-29 17:44:40 -04:00
static void dlm_send_ack_threshold ( struct midcomms_node * node ,
uint32_t threshold )
{
uint32_t oval , nval ;
bool send_ack ;
/* let only send one user trigger threshold to send ack back */
do {
oval = atomic_read ( & node - > ulp_delivered ) ;
send_ack = ( oval > threshold ) ;
/* abort if threshold is not reached */
if ( ! send_ack )
break ;
nval = 0 ;
/* try to reset ulp_delivered counter */
} while ( atomic_cmpxchg ( & node - > ulp_delivered , oval , nval ) ! = oval ) ;
if ( send_ack )
dlm_send_ack ( node - > nodeid , atomic_read ( & node - > seq_next ) ) ;
}
2021-05-21 15:08:46 -04:00
static int dlm_send_fin ( struct midcomms_node * node ,
void ( * ack_rcv ) ( struct midcomms_node * node ) )
{
int mb_len = sizeof ( struct dlm_header ) ;
struct dlm_header * m_header ;
struct dlm_mhandle * mh ;
char * ppc ;
2024-04-02 15:17:57 -04:00
mh = dlm_midcomms_get_mhandle ( node - > nodeid , mb_len , & ppc ) ;
2021-05-21 15:08:46 -04:00
if ( ! mh )
return - ENOMEM ;
2023-01-12 17:10:34 -05:00
set_bit ( DLM_NODE_FLAG_STOP_TX , & node - > flags ) ;
2021-05-21 15:08:46 -04:00
mh - > ack_rcv = ack_rcv ;
m_header = ( struct dlm_header * ) ppc ;
2022-04-04 16:06:39 -04:00
m_header - > h_version = cpu_to_le32 ( DLM_HEADER_MAJOR | DLM_HEADER_MINOR ) ;
m_header - > h_nodeid = cpu_to_le32 ( dlm_our_nodeid ( ) ) ;
m_header - > h_length = cpu_to_le16 ( mb_len ) ;
2021-05-21 15:08:46 -04:00
m_header - > h_cmd = DLM_FIN ;
pr_debug ( " sending fin msg to node %d \n " , node - > nodeid ) ;
2022-10-27 16:45:15 -04:00
dlm_midcomms_commit_mhandle ( mh , NULL , 0 ) ;
2021-05-21 15:08:46 -04:00
return 0 ;
}
static void dlm_receive_ack ( struct midcomms_node * node , uint32_t seq )
{
struct dlm_mhandle * mh ;
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( mh , & node - > send_queue , list ) {
if ( before ( mh - > seq , seq ) ) {
if ( mh - > ack_rcv )
mh - > ack_rcv ( node ) ;
2021-06-11 12:55:40 -04:00
} else {
/* send queue should be ordered */
break ;
}
}
2021-05-21 15:08:46 -04:00
2022-11-17 17:11:57 -05:00
spin_lock_bh ( & node - > send_queue_lock ) ;
2021-06-11 12:55:40 -04:00
list_for_each_entry_rcu ( mh , & node - > send_queue , list ) {
if ( before ( mh - > seq , seq ) ) {
dlm_mhandle_delete ( node , mh ) ;
2021-05-21 15:08:46 -04:00
} else {
/* send queue should be ordered */
break ;
}
}
2022-11-17 17:11:57 -05:00
spin_unlock_bh ( & node - > send_queue_lock ) ;
2021-05-21 15:08:46 -04:00
rcu_read_unlock ( ) ;
}
static void dlm_pas_fin_ack_rcv ( struct midcomms_node * node )
{
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
pr_debug ( " receive passive fin ack from node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
switch ( node - > state ) {
case DLM_LAST_ACK :
/* DLM_CLOSED */
midcomms_node_reset ( node ) ;
break ;
case DLM_CLOSED :
/* not valid but somehow we got what we want */
wake_up ( & node - > shutdown_wait ) ;
break ;
default :
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2023-01-12 17:18:47 -05:00
log_print ( " %s: unexpected state: %d " ,
2021-05-21 15:08:46 -04:00
__func__ , node - > state ) ;
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( 1 ) ;
2021-05-21 15:08:46 -04:00
return ;
}
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
}
2023-08-01 14:09:48 -04:00
static void dlm_receive_buffer_3_2_trace ( uint32_t seq ,
const union dlm_packet * p )
2022-10-27 16:45:15 -04:00
{
switch ( p - > header . h_cmd ) {
case DLM_MSG :
2022-11-17 17:11:45 -05:00
trace_dlm_recv_message ( dlm_our_nodeid ( ) , seq , & p - > message ) ;
2022-10-27 16:45:15 -04:00
break ;
case DLM_RCOM :
2022-11-17 17:11:45 -05:00
trace_dlm_recv_rcom ( dlm_our_nodeid ( ) , seq , & p - > rcom ) ;
2022-10-27 16:45:15 -04:00
break ;
default :
break ;
}
}
2023-08-01 14:09:48 -04:00
static void dlm_midcomms_receive_buffer ( const union dlm_packet * p ,
2021-05-21 15:08:46 -04:00
struct midcomms_node * node ,
uint32_t seq )
{
2023-05-29 17:44:39 -04:00
bool is_expected_seq ;
uint32_t oval , nval ;
2021-05-21 15:08:46 -04:00
2023-05-29 17:44:39 -04:00
do {
oval = atomic_read ( & node - > seq_next ) ;
is_expected_seq = ( oval = = seq ) ;
if ( ! is_expected_seq )
break ;
nval = oval + 1 ;
} while ( atomic_cmpxchg ( & node - > seq_next , oval , nval ) ! = oval ) ;
if ( is_expected_seq ) {
2021-05-21 15:08:46 -04:00
switch ( p - > header . h_cmd ) {
case DLM_FIN :
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
pr_debug ( " receive fin msg from node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
switch ( node - > state ) {
case DLM_ESTABLISHED :
2023-05-29 17:44:39 -04:00
dlm_send_ack ( node - > nodeid , nval ) ;
2023-01-12 17:10:37 -05:00
2021-05-21 15:08:46 -04:00
/* passive shutdown DLM_LAST_ACK case 1
* additional we check if the node is used by
* cluster manager events at all .
*/
if ( node - > users = = 0 ) {
node - > state = DLM_LAST_ACK ;
pr_debug ( " switch node %d to state %s case 1 \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
2023-01-12 17:10:36 -05:00
set_bit ( DLM_NODE_FLAG_STOP_RX , & node - > flags ) ;
dlm_send_fin ( node , dlm_pas_fin_ack_rcv ) ;
2023-01-12 17:18:48 -05:00
} else {
node - > state = DLM_CLOSE_WAIT ;
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
2021-05-21 15:08:46 -04:00
}
break ;
case DLM_FIN_WAIT1 :
2023-05-29 17:44:39 -04:00
dlm_send_ack ( node - > nodeid , nval ) ;
2021-05-21 15:08:46 -04:00
node - > state = DLM_CLOSING ;
2023-01-12 17:10:35 -05:00
set_bit ( DLM_NODE_FLAG_STOP_RX , & node - > flags ) ;
2021-05-21 15:08:46 -04:00
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
break ;
case DLM_FIN_WAIT2 :
2023-05-29 17:44:39 -04:00
dlm_send_ack ( node - > nodeid , nval ) ;
2021-05-21 15:08:46 -04:00
midcomms_node_reset ( node ) ;
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
break ;
case DLM_LAST_ACK :
/* probably remove_member caught it, do nothing */
break ;
default :
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2023-01-12 17:18:47 -05:00
log_print ( " %s: unexpected state: %d " ,
2021-05-21 15:08:46 -04:00
__func__ , node - > state ) ;
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( 1 ) ;
2021-05-21 15:08:46 -04:00
return ;
}
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
break ;
default :
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( test_bit ( DLM_NODE_FLAG_STOP_RX , & node - > flags ) ) ;
2022-10-27 16:45:15 -04:00
dlm_receive_buffer_3_2_trace ( seq , p ) ;
2021-05-21 15:08:46 -04:00
dlm_receive_buffer ( p , node - > nodeid ) ;
2023-05-29 17:44:40 -04:00
atomic_inc ( & node - > ulp_delivered ) ;
/* unlikely case to send ack back when we don't transmit */
dlm_send_ack_threshold ( node , DLM_RECV_ACK_BACK_MSG_THRESHOLD ) ;
2021-05-21 15:08:46 -04:00
break ;
}
} else {
/* retry to ack message which we already have by sending back
* current node - > seq_next number as ack .
*/
2023-05-29 17:44:39 -04:00
if ( seq < oval )
dlm_send_ack ( node - > nodeid , oval ) ;
2021-05-21 15:08:46 -04:00
log_print_ratelimited ( " ignore dlm msg because seq mismatch, seq: %u, expected: %u, nodeid: %d " ,
2023-05-29 17:44:39 -04:00
seq , oval , node - > nodeid ) ;
2021-05-21 15:08:46 -04:00
}
}
2023-08-01 14:09:48 -04:00
static int dlm_opts_check_msglen ( const union dlm_packet * p , uint16_t msglen ,
int nodeid )
2021-05-21 15:08:46 -04:00
{
int len = msglen ;
/* we only trust outer header msglen because
* it ' s checked against receive buffer length .
*/
if ( len < sizeof ( struct dlm_opts ) )
return - 1 ;
len - = sizeof ( struct dlm_opts ) ;
if ( len < le16_to_cpu ( p - > opts . o_optlen ) )
return - 1 ;
len - = le16_to_cpu ( p - > opts . o_optlen ) ;
switch ( p - > opts . o_nextcmd ) {
case DLM_FIN :
if ( len < sizeof ( struct dlm_header ) ) {
log_print ( " fin too small: %d, will skip this message from node %d " ,
len , nodeid ) ;
return - 1 ;
}
break ;
case DLM_MSG :
if ( len < sizeof ( struct dlm_message ) ) {
log_print ( " msg too small: %d, will skip this message from node %d " ,
msglen , nodeid ) ;
return - 1 ;
}
break ;
case DLM_RCOM :
if ( len < sizeof ( struct dlm_rcom ) ) {
log_print ( " rcom msg too small: %d, will skip this message from node %d " ,
len , nodeid ) ;
return - 1 ;
}
break ;
default :
log_print ( " unsupported o_nextcmd received: %u, will skip this message from node %d " ,
p - > opts . o_nextcmd , nodeid ) ;
return - 1 ;
}
return 0 ;
}
2023-08-01 14:09:48 -04:00
static void dlm_midcomms_receive_buffer_3_2 ( const union dlm_packet * p , int nodeid )
2021-05-21 15:08:46 -04:00
{
uint16_t msglen = le16_to_cpu ( p - > header . h_length ) ;
struct midcomms_node * node ;
uint32_t seq ;
int ret , idx ;
idx = srcu_read_lock ( & nodes_srcu ) ;
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
if ( WARN_ON_ONCE ( ! node ) )
goto out ;
switch ( node - > version ) {
case DLM_VERSION_NOT_SET :
node - > version = DLM_VERSION_3_2 ;
wake_up ( & node - > shutdown_wait ) ;
log_print ( " version 0x%08x for node %d detected " , DLM_VERSION_3_2 ,
node - > nodeid ) ;
spin_lock ( & node - > state_lock ) ;
switch ( node - > state ) {
case DLM_CLOSED :
node - > state = DLM_ESTABLISHED ;
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
break ;
default :
break ;
}
spin_unlock ( & node - > state_lock ) ;
break ;
case DLM_VERSION_3_2 :
break ;
default :
log_print_ratelimited ( " version mismatch detected, assumed 0x%08x but node %d has 0x%08x " ,
DLM_VERSION_3_2 , node - > nodeid , node - > version ) ;
2021-05-21 15:08:46 -04:00
goto out ;
2023-08-01 14:09:49 -04:00
}
2021-05-21 15:08:46 -04:00
switch ( p - > header . h_cmd ) {
case DLM_RCOM :
/* these rcom message we use to determine version.
* they have their own retransmission handling and
* are the first messages of dlm .
*
* length already checked .
*/
2022-04-04 16:06:42 -04:00
switch ( p - > rcom . rc_type ) {
case cpu_to_le32 ( DLM_RCOM_NAMES ) :
2021-05-21 15:08:46 -04:00
fallthrough ;
2022-04-04 16:06:42 -04:00
case cpu_to_le32 ( DLM_RCOM_NAMES_REPLY ) :
2021-05-21 15:08:46 -04:00
fallthrough ;
2022-04-04 16:06:42 -04:00
case cpu_to_le32 ( DLM_RCOM_STATUS ) :
2021-05-21 15:08:46 -04:00
fallthrough ;
2022-04-04 16:06:42 -04:00
case cpu_to_le32 ( DLM_RCOM_STATUS_REPLY ) :
2021-05-21 15:08:46 -04:00
break ;
default :
log_print ( " unsupported rcom type received: %u, will skip this message from node %d " ,
le32_to_cpu ( p - > rcom . rc_type ) , nodeid ) ;
goto out ;
}
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( test_bit ( DLM_NODE_FLAG_STOP_RX , & node - > flags ) ) ;
2021-05-21 15:08:46 -04:00
dlm_receive_buffer ( p , nodeid ) ;
break ;
case DLM_OPTS :
seq = le32_to_cpu ( p - > header . u . h_seq ) ;
ret = dlm_opts_check_msglen ( p , msglen , nodeid ) ;
if ( ret < 0 ) {
log_print ( " opts msg too small: %u, will skip this message from node %d " ,
msglen , nodeid ) ;
goto out ;
}
p = ( union dlm_packet * ) ( ( unsigned char * ) p - > opts . o_opts +
le16_to_cpu ( p - > opts . o_optlen ) ) ;
/* recheck inner msglen just if it's not garbage */
msglen = le16_to_cpu ( p - > header . h_length ) ;
switch ( p - > header . h_cmd ) {
case DLM_RCOM :
if ( msglen < sizeof ( struct dlm_rcom ) ) {
log_print ( " inner rcom msg too small: %u, will skip this message from node %d " ,
msglen , nodeid ) ;
goto out ;
}
break ;
case DLM_MSG :
if ( msglen < sizeof ( struct dlm_message ) ) {
log_print ( " inner msg too small: %u, will skip this message from node %d " ,
msglen , nodeid ) ;
goto out ;
}
break ;
case DLM_FIN :
if ( msglen < sizeof ( struct dlm_header ) ) {
log_print ( " inner fin too small: %u, will skip this message from node %d " ,
msglen , nodeid ) ;
goto out ;
}
break ;
default :
log_print ( " unsupported inner h_cmd received: %u, will skip this message from node %d " ,
msglen , nodeid ) ;
goto out ;
}
dlm_midcomms_receive_buffer ( p , node , seq ) ;
break ;
case DLM_ACK :
seq = le32_to_cpu ( p - > header . u . h_seq ) ;
dlm_receive_ack ( node , seq ) ;
break ;
default :
log_print ( " unsupported h_cmd received: %u, will skip this message from node %d " ,
p - > header . h_cmd , nodeid ) ;
break ;
}
out :
srcu_read_unlock ( & nodes_srcu , idx ) ;
}
2023-08-01 14:09:49 -04:00
static void dlm_midcomms_receive_buffer_3_1 ( const union dlm_packet * p , int nodeid )
2021-05-21 15:08:41 -04:00
{
2023-08-01 14:09:49 -04:00
uint16_t msglen = le16_to_cpu ( p - > header . h_length ) ;
struct midcomms_node * node ;
int idx ;
idx = srcu_read_lock ( & nodes_srcu ) ;
node = nodeid2node ( nodeid ) ;
if ( WARN_ON_ONCE ( ! node ) ) {
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
2021-05-21 15:08:46 -04:00
switch ( node - > version ) {
case DLM_VERSION_NOT_SET :
node - > version = DLM_VERSION_3_1 ;
2023-01-12 17:18:44 -05:00
wake_up ( & node - > shutdown_wait ) ;
2021-05-21 15:08:46 -04:00
log_print ( " version 0x%08x for node %d detected " , DLM_VERSION_3_1 ,
node - > nodeid ) ;
break ;
case DLM_VERSION_3_1 :
break ;
default :
log_print_ratelimited ( " version mismatch detected, assumed 0x%08x but node %d has 0x%08x " ,
DLM_VERSION_3_1 , node - > nodeid , node - > version ) ;
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
srcu_read_unlock ( & nodes_srcu , idx ) ;
switch ( p - > header . h_cmd ) {
case DLM_RCOM :
/* length already checked */
break ;
case DLM_MSG :
if ( msglen < sizeof ( struct dlm_message ) ) {
log_print ( " msg too small: %u, will skip this message from node %d " ,
msglen , nodeid ) ;
return ;
}
break ;
default :
log_print ( " unsupported h_cmd received: %u, will skip this message from node %d " ,
p - > header . h_cmd , nodeid ) ;
return ;
}
dlm_receive_buffer ( p , nodeid ) ;
2021-05-21 15:08:41 -04:00
}
2022-11-17 17:11:57 -05:00
int dlm_validate_incoming_buffer ( int nodeid , unsigned char * buf , int len )
2006-01-18 09:30:29 +00:00
{
2020-09-24 10:31:26 -04:00
const unsigned char * ptr = buf ;
const struct dlm_header * hd ;
2006-01-18 09:30:29 +00:00
uint16_t msglen ;
2020-09-24 10:31:26 -04:00
int ret = 0 ;
2006-01-18 09:30:29 +00:00
2020-09-24 10:31:26 -04:00
while ( len > = sizeof ( struct dlm_header ) ) {
hd = ( struct dlm_header * ) ptr ;
2021-06-02 09:45:20 -04:00
/* no message should be more than DLM_MAX_SOCKET_BUFSIZE or
2021-03-01 17:05:18 -05:00
* less than dlm_header size .
*
* Some messages does not have a 8 byte length boundary yet
* which can occur in a unaligned memory access of some dlm
* messages . However this problem need to be fixed at the
* sending side , for now it seems nobody run into architecture
* related issues yet but it slows down some processing .
* Fixing this issue should be scheduled in future by doing
* the next major version bump .
2020-09-24 10:31:26 -04:00
*/
2021-03-01 17:05:18 -05:00
msglen = le16_to_cpu ( hd - > h_length ) ;
2021-06-02 09:45:20 -04:00
if ( msglen > DLM_MAX_SOCKET_BUFSIZE | |
2021-03-01 17:05:17 -05:00
msglen < sizeof ( struct dlm_header ) ) {
log_print ( " received invalid length header: %u from node %d, will abort message parsing " ,
msglen , nodeid ) ;
2020-09-24 10:31:26 -04:00
return - EBADMSG ;
2006-01-18 09:30:29 +00:00
}
2020-09-24 10:31:26 -04:00
/* caller will take care that leftover
* will be parsed next call with more data
*/
2006-01-18 09:30:29 +00:00
if ( msglen > len )
break ;
2022-11-17 17:11:57 -05:00
ret + = msglen ;
len - = msglen ;
ptr + = msglen ;
}
return ret ;
}
/*
* Called from the low - level comms layer to process a buffer of
* commands .
*/
int dlm_process_incoming_buffer ( int nodeid , unsigned char * buf , int len )
{
const unsigned char * ptr = buf ;
const struct dlm_header * hd ;
uint16_t msglen ;
int ret = 0 ;
while ( len > = sizeof ( struct dlm_header ) ) {
hd = ( struct dlm_header * ) ptr ;
msglen = le16_to_cpu ( hd - > h_length ) ;
if ( msglen > len )
break ;
2021-11-02 15:17:11 -04:00
switch ( hd - > h_version ) {
case cpu_to_le32 ( DLM_VERSION_3_1 ) :
2023-08-01 14:09:48 -04:00
dlm_midcomms_receive_buffer_3_1 ( ( const union dlm_packet * ) ptr , nodeid ) ;
2020-09-24 10:31:26 -04:00
break ;
2021-11-02 15:17:11 -04:00
case cpu_to_le32 ( DLM_VERSION_3_2 ) :
2023-08-01 14:09:48 -04:00
dlm_midcomms_receive_buffer_3_2 ( ( const union dlm_packet * ) ptr , nodeid ) ;
2020-09-24 10:31:26 -04:00
break ;
default :
2021-05-21 15:08:46 -04:00
log_print ( " received invalid version header: %u from node %d, will skip this message " ,
le32_to_cpu ( hd - > h_version ) , nodeid ) ;
break ;
2020-09-24 10:31:26 -04:00
}
2006-01-18 09:30:29 +00:00
ret + = msglen ;
len - = msglen ;
2020-09-24 10:31:26 -04:00
ptr + = msglen ;
2006-01-18 09:30:29 +00:00
}
2020-09-24 10:31:26 -04:00
return ret ;
2006-01-18 09:30:29 +00:00
}
2021-05-21 15:08:46 -04:00
void dlm_midcomms_unack_msg_resend ( int nodeid )
{
struct midcomms_node * node ;
struct dlm_mhandle * mh ;
int idx , ret ;
idx = srcu_read_lock ( & nodes_srcu ) ;
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
if ( WARN_ON_ONCE ( ! node ) ) {
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
/* old protocol, we don't support to retransmit on failure */
switch ( node - > version ) {
case DLM_VERSION_3_2 :
break ;
default :
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( mh , & node - > send_queue , list ) {
if ( ! mh - > committed )
continue ;
ret = dlm_lowcomms_resend_msg ( mh - > msg ) ;
if ( ! ret )
log_print_ratelimited ( " retransmit dlm msg, seq %u, nodeid %d " ,
mh - > seq , node - > nodeid ) ;
}
rcu_read_unlock ( ) ;
srcu_read_unlock ( & nodes_srcu , idx ) ;
}
static void dlm_fill_opts_header ( struct dlm_opts * opts , uint16_t inner_len ,
uint32_t seq )
{
opts - > o_header . h_cmd = DLM_OPTS ;
2022-04-04 16:06:39 -04:00
opts - > o_header . h_version = cpu_to_le32 ( DLM_HEADER_MAJOR | DLM_HEADER_MINOR ) ;
opts - > o_header . h_nodeid = cpu_to_le32 ( dlm_our_nodeid ( ) ) ;
opts - > o_header . h_length = cpu_to_le16 ( DLM_MIDCOMMS_OPT_LEN + inner_len ) ;
opts - > o_header . u . h_seq = cpu_to_le32 ( seq ) ;
2021-05-21 15:08:46 -04:00
}
2021-11-02 15:17:19 -04:00
static void midcomms_new_msg_cb ( void * data )
2021-05-21 15:08:46 -04:00
{
2021-11-02 15:17:19 -04:00
struct dlm_mhandle * mh = data ;
2021-05-21 15:08:46 -04:00
atomic_inc ( & mh - > node - > send_queue_cnt ) ;
2022-11-17 17:11:57 -05:00
spin_lock_bh ( & mh - > node - > send_queue_lock ) ;
2021-05-21 15:08:46 -04:00
list_add_tail_rcu ( & mh - > list , & mh - > node - > send_queue ) ;
2022-11-17 17:11:57 -05:00
spin_unlock_bh ( & mh - > node - > send_queue_lock ) ;
2021-05-21 15:08:46 -04:00
2023-05-29 17:44:39 -04:00
mh - > seq = atomic_fetch_inc ( & mh - > node - > seq_send ) ;
2021-05-21 15:08:46 -04:00
}
static struct dlm_msg * dlm_midcomms_get_msg_3_2 ( struct dlm_mhandle * mh , int nodeid ,
2024-04-02 15:17:57 -04:00
int len , char * * ppc )
2021-05-21 15:08:46 -04:00
{
struct dlm_opts * opts ;
struct dlm_msg * msg ;
msg = dlm_lowcomms_new_msg ( nodeid , len + DLM_MIDCOMMS_OPT_LEN ,
2024-04-02 15:17:57 -04:00
ppc , midcomms_new_msg_cb , mh ) ;
2021-05-21 15:08:46 -04:00
if ( ! msg )
return NULL ;
opts = ( struct dlm_opts * ) * ppc ;
mh - > opts = opts ;
/* add possible options here */
dlm_fill_opts_header ( opts , len , mh - > seq ) ;
* ppc + = sizeof ( * opts ) ;
2022-10-27 16:45:14 -04:00
mh - > inner_p = ( const union dlm_packet * ) * ppc ;
2021-05-21 15:08:46 -04:00
return msg ;
}
2022-04-04 16:06:37 -04:00
/* avoid false positive for nodes_srcu, unlock happens in
* dlm_midcomms_commit_mhandle which is a must call if success
*/
# ifndef __CHECKER__
2024-04-02 15:17:57 -04:00
struct dlm_mhandle * dlm_midcomms_get_mhandle ( int nodeid , int len , char * * ppc )
2021-05-21 15:08:46 -04:00
{
struct midcomms_node * node ;
struct dlm_mhandle * mh ;
struct dlm_msg * msg ;
int idx ;
idx = srcu_read_lock ( & nodes_srcu ) ;
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
if ( WARN_ON_ONCE ( ! node ) )
2021-05-21 15:08:46 -04:00
goto err ;
/* this is a bug, however we going on and hope it will be resolved */
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( test_bit ( DLM_NODE_FLAG_STOP_TX , & node - > flags ) ) ;
2021-05-21 15:08:46 -04:00
2024-04-02 15:17:57 -04:00
mh = dlm_allocate_mhandle ( ) ;
2021-05-21 15:08:46 -04:00
if ( ! mh )
goto err ;
2021-11-30 14:47:18 -05:00
mh - > committed = false ;
mh - > ack_rcv = NULL ;
2021-05-21 15:08:46 -04:00
mh - > idx = idx ;
mh - > node = node ;
switch ( node - > version ) {
case DLM_VERSION_3_1 :
2024-04-02 15:17:57 -04:00
msg = dlm_lowcomms_new_msg ( nodeid , len , ppc , NULL , NULL ) ;
2021-05-21 15:08:46 -04:00
if ( ! msg ) {
2021-11-30 14:47:18 -05:00
dlm_free_mhandle ( mh ) ;
2021-05-21 15:08:46 -04:00
goto err ;
}
break ;
case DLM_VERSION_3_2 :
2023-10-10 18:04:47 -04:00
/* send ack back if necessary */
dlm_send_ack_threshold ( node , DLM_SEND_ACK_BACK_MSG_THRESHOLD ) ;
2024-04-02 15:17:57 -04:00
msg = dlm_midcomms_get_msg_3_2 ( mh , nodeid , len , ppc ) ;
2021-05-21 15:08:46 -04:00
if ( ! msg ) {
2021-11-30 14:47:18 -05:00
dlm_free_mhandle ( mh ) ;
2021-05-21 15:08:46 -04:00
goto err ;
}
break ;
default :
2021-11-30 14:47:18 -05:00
dlm_free_mhandle ( mh ) ;
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( 1 ) ;
2021-05-21 15:08:46 -04:00
goto err ;
}
mh - > msg = msg ;
/* keep in mind that is a must to call
* dlm_midcomms_commit_msg ( ) which releases
* nodes_srcu using mh - > idx which is assumed
* here that the application will call it .
*/
return mh ;
err :
srcu_read_unlock ( & nodes_srcu , idx ) ;
return NULL ;
}
2022-04-04 16:06:37 -04:00
# endif
2021-05-21 15:08:46 -04:00
2022-10-27 16:45:15 -04:00
static void dlm_midcomms_commit_msg_3_2_trace ( const struct dlm_mhandle * mh ,
const void * name , int namelen )
{
switch ( mh - > inner_p - > header . h_cmd ) {
case DLM_MSG :
2022-11-17 17:11:45 -05:00
trace_dlm_send_message ( mh - > node - > nodeid , mh - > seq ,
& mh - > inner_p - > message ,
2022-10-27 16:45:15 -04:00
name , namelen ) ;
break ;
case DLM_RCOM :
2022-11-17 17:11:45 -05:00
trace_dlm_send_rcom ( mh - > node - > nodeid , mh - > seq ,
& mh - > inner_p - > rcom ) ;
2022-10-27 16:45:15 -04:00
break ;
default :
/* nothing to trace */
break ;
}
}
static void dlm_midcomms_commit_msg_3_2 ( struct dlm_mhandle * mh ,
const void * name , int namelen )
2021-05-21 15:08:46 -04:00
{
/* nexthdr chain for fast lookup */
2022-10-27 16:45:14 -04:00
mh - > opts - > o_nextcmd = mh - > inner_p - > header . h_cmd ;
2021-05-21 15:08:46 -04:00
mh - > committed = true ;
2022-10-27 16:45:15 -04:00
dlm_midcomms_commit_msg_3_2_trace ( mh , name , namelen ) ;
2021-05-21 15:08:46 -04:00
dlm_lowcomms_commit_msg ( mh - > msg ) ;
}
2022-04-04 16:06:37 -04:00
/* avoid false positive for nodes_srcu, lock was happen in
* dlm_midcomms_get_mhandle
*/
# ifndef __CHECKER__
2022-10-27 16:45:15 -04:00
void dlm_midcomms_commit_mhandle ( struct dlm_mhandle * mh ,
const void * name , int namelen )
2021-05-21 15:08:46 -04:00
{
2022-10-27 16:45:15 -04:00
2021-05-21 15:08:46 -04:00
switch ( mh - > node - > version ) {
case DLM_VERSION_3_1 :
srcu_read_unlock ( & nodes_srcu , mh - > idx ) ;
dlm_lowcomms_commit_msg ( mh - > msg ) ;
dlm_lowcomms_put_msg ( mh - > msg ) ;
/* mh is not part of rcu list in this case */
2021-11-30 14:47:18 -05:00
dlm_free_mhandle ( mh ) ;
2021-05-21 15:08:46 -04:00
break ;
case DLM_VERSION_3_2 :
2023-01-12 17:10:32 -05:00
/* held rcu read lock here, because we sending the
* dlm message out , when we do that we could receive
* an ack back which releases the mhandle and we
* get a use after free .
*/
rcu_read_lock ( ) ;
2022-10-27 16:45:15 -04:00
dlm_midcomms_commit_msg_3_2 ( mh , name , namelen ) ;
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , mh - > idx ) ;
2023-01-12 17:10:32 -05:00
rcu_read_unlock ( ) ;
2021-05-21 15:08:46 -04:00
break ;
default :
srcu_read_unlock ( & nodes_srcu , mh - > idx ) ;
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( 1 ) ;
2021-05-21 15:08:46 -04:00
break ;
}
}
2022-04-04 16:06:37 -04:00
# endif
2021-05-21 15:08:46 -04:00
int dlm_midcomms_start ( void )
2022-11-17 17:11:46 -05:00
{
return dlm_lowcomms_start ( ) ;
}
void dlm_midcomms_stop ( void )
{
dlm_lowcomms_stop ( ) ;
}
void dlm_midcomms_init ( void )
2021-05-21 15:08:46 -04:00
{
int i ;
for ( i = 0 ; i < CONN_HASH_SIZE ; i + + )
INIT_HLIST_HEAD ( & node_hash [ i ] ) ;
2022-11-17 17:11:46 -05:00
dlm_lowcomms_init ( ) ;
}
2023-08-01 14:09:49 -04:00
static void midcomms_node_release ( struct rcu_head * rcu )
{
struct midcomms_node * node = container_of ( rcu , struct midcomms_node , rcu ) ;
WARN_ON_ONCE ( atomic_read ( & node - > send_queue_cnt ) ) ;
dlm_send_queue_flush ( node ) ;
kfree ( node ) ;
}
2022-11-17 17:11:46 -05:00
void dlm_midcomms_exit ( void )
{
2023-08-01 14:09:49 -04:00
struct midcomms_node * node ;
int i , idx ;
idx = srcu_read_lock ( & nodes_srcu ) ;
for ( i = 0 ; i < CONN_HASH_SIZE ; i + + ) {
hlist_for_each_entry_rcu ( node , & node_hash [ i ] , hlist ) {
dlm_delete_debug_comms_file ( node - > debugfs ) ;
spin_lock ( & nodes_lock ) ;
hlist_del_rcu ( & node - > hlist ) ;
spin_unlock ( & nodes_lock ) ;
call_srcu ( & nodes_srcu , & node - > rcu , midcomms_node_release ) ;
}
}
srcu_read_unlock ( & nodes_srcu , idx ) ;
2022-11-17 17:11:46 -05:00
dlm_lowcomms_exit ( ) ;
2021-05-21 15:08:46 -04:00
}
static void dlm_act_fin_ack_rcv ( struct midcomms_node * node )
{
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
pr_debug ( " receive active fin ack from node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
switch ( node - > state ) {
case DLM_FIN_WAIT1 :
node - > state = DLM_FIN_WAIT2 ;
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
break ;
case DLM_CLOSING :
midcomms_node_reset ( node ) ;
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
break ;
case DLM_CLOSED :
/* not valid but somehow we got what we want */
wake_up ( & node - > shutdown_wait ) ;
break ;
default :
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2023-01-12 17:18:47 -05:00
log_print ( " %s: unexpected state: %d " ,
2021-05-21 15:08:46 -04:00
__func__ , node - > state ) ;
2022-10-27 16:45:27 -04:00
WARN_ON_ONCE ( 1 ) ;
2021-05-21 15:08:46 -04:00
return ;
}
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
}
void dlm_midcomms_add_member ( int nodeid )
{
struct midcomms_node * node ;
int idx ;
idx = srcu_read_lock ( & nodes_srcu ) ;
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
if ( WARN_ON_ONCE ( ! node ) ) {
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
if ( ! node - > users ) {
pr_debug ( " receive add member from node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
switch ( node - > state ) {
case DLM_ESTABLISHED :
break ;
case DLM_CLOSED :
node - > state = DLM_ESTABLISHED ;
pr_debug ( " switch node %d to state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
break ;
default :
/* some invalid state passive shutdown
* was failed , we try to reset and
* hope it will go on .
*/
2021-05-26 09:53:39 +01:00
log_print ( " reset node %d because shutdown stuck " ,
2021-05-21 15:08:46 -04:00
node - > nodeid ) ;
midcomms_node_reset ( node ) ;
node - > state = DLM_ESTABLISHED ;
break ;
}
}
node - > users + + ;
2021-11-02 15:17:09 -04:00
pr_debug ( " node %d users inc count %d \n " , nodeid , node - > users ) ;
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
}
void dlm_midcomms_remove_member ( int nodeid )
{
struct midcomms_node * node ;
int idx ;
idx = srcu_read_lock ( & nodes_srcu ) ;
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
2023-10-10 18:04:45 -04:00
/* in case of dlm_midcomms_close() removes node */
if ( ! node ) {
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & node - > state_lock ) ;
2023-10-10 18:04:45 -04:00
/* case of dlm_midcomms_addr() created node but
* was not added before because dlm_midcomms_close ( )
* removed the node
*/
if ( ! node - > users ) {
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2023-10-10 18:04:45 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
return ;
}
2021-05-21 15:08:46 -04:00
node - > users - - ;
2021-11-02 15:17:09 -04:00
pr_debug ( " node %d users dec count %d \n " , nodeid , node - > users ) ;
2021-05-21 15:08:46 -04:00
/* hitting users count to zero means the
* other side is running dlm_midcomms_stop ( )
* we meet us to have a clean disconnect .
*/
if ( node - > users = = 0 ) {
pr_debug ( " receive remove member from node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
switch ( node - > state ) {
case DLM_ESTABLISHED :
break ;
case DLM_CLOSE_WAIT :
/* passive shutdown DLM_LAST_ACK case 2 */
node - > state = DLM_LAST_ACK ;
pr_debug ( " switch node %d to state %s case 2 \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
2023-01-12 17:10:36 -05:00
set_bit ( DLM_NODE_FLAG_STOP_RX , & node - > flags ) ;
dlm_send_fin ( node , dlm_pas_fin_ack_rcv ) ;
break ;
2021-05-21 15:08:46 -04:00
case DLM_LAST_ACK :
/* probably receive fin caught it, do nothing */
break ;
case DLM_CLOSED :
/* already gone, do nothing */
break ;
default :
2023-01-12 17:18:47 -05:00
log_print ( " %s: unexpected state: %d " ,
2021-05-21 15:08:46 -04:00
__func__ , node - > state ) ;
break ;
}
}
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
}
2023-01-12 17:18:44 -05:00
void dlm_midcomms_version_wait ( void )
{
struct midcomms_node * node ;
int i , idx , ret ;
idx = srcu_read_lock ( & nodes_srcu ) ;
for ( i = 0 ; i < CONN_HASH_SIZE ; i + + ) {
hlist_for_each_entry_rcu ( node , & node_hash [ i ] , hlist ) {
ret = wait_event_timeout ( node - > shutdown_wait ,
node - > version ! = DLM_VERSION_NOT_SET | |
node - > state = = DLM_CLOSED | |
test_bit ( DLM_NODE_FLAG_CLOSE , & node - > flags ) ,
DLM_SHUTDOWN_TIMEOUT ) ;
if ( ! ret | | test_bit ( DLM_NODE_FLAG_CLOSE , & node - > flags ) )
pr_debug ( " version wait timed out for node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
}
}
srcu_read_unlock ( & nodes_srcu , idx ) ;
}
2021-05-21 15:08:46 -04:00
static void midcomms_shutdown ( struct midcomms_node * node )
{
int ret ;
/* old protocol, we don't wait for pending operations */
switch ( node - > version ) {
case DLM_VERSION_3_2 :
break ;
default :
return ;
}
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
pr_debug ( " receive active shutdown for node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
switch ( node - > state ) {
case DLM_ESTABLISHED :
node - > state = DLM_FIN_WAIT1 ;
pr_debug ( " switch node %d to state %s case 2 \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
2023-01-12 17:10:36 -05:00
dlm_send_fin ( node , dlm_act_fin_ack_rcv ) ;
2021-05-21 15:08:46 -04:00
break ;
case DLM_CLOSED :
/* we have what we want */
2023-01-12 17:18:42 -05:00
break ;
2021-05-21 15:08:46 -04:00
default :
/* busy to enter DLM_FIN_WAIT1, wait until passive
* done in shutdown_wait to enter DLM_CLOSED .
*/
break ;
}
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & node - > state_lock ) ;
2021-05-21 15:08:46 -04:00
2023-01-12 17:10:36 -05:00
if ( DLM_DEBUG_FENCE_TERMINATION )
msleep ( 5000 ) ;
2021-05-21 15:08:46 -04:00
/* wait for other side dlm + fin */
ret = wait_event_timeout ( node - > shutdown_wait ,
node - > state = = DLM_CLOSED | |
test_bit ( DLM_NODE_FLAG_CLOSE , & node - > flags ) ,
DLM_SHUTDOWN_TIMEOUT ) ;
2023-08-01 14:09:49 -04:00
if ( ! ret )
2021-05-21 15:08:46 -04:00
pr_debug ( " active shutdown timed out for node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
2023-01-12 17:18:42 -05:00
else
pr_debug ( " active shutdown done for node %d with state %s \n " ,
node - > nodeid , dlm_state_str ( node - > state ) ) ;
2021-05-21 15:08:46 -04:00
}
void dlm_midcomms_shutdown ( void )
{
struct midcomms_node * node ;
int i , idx ;
mutex_lock ( & close_lock ) ;
idx = srcu_read_lock ( & nodes_srcu ) ;
for ( i = 0 ; i < CONN_HASH_SIZE ; i + + ) {
hlist_for_each_entry_rcu ( node , & node_hash [ i ] , hlist ) {
midcomms_shutdown ( node ) ;
}
}
2023-01-12 17:18:42 -05:00
dlm_lowcomms_shutdown ( ) ;
2023-10-10 18:04:46 -04:00
for ( i = 0 ; i < CONN_HASH_SIZE ; i + + ) {
hlist_for_each_entry_rcu ( node , & node_hash [ i ] , hlist ) {
midcomms_node_reset ( node ) ;
}
}
srcu_read_unlock ( & nodes_srcu , idx ) ;
mutex_unlock ( & close_lock ) ;
2021-05-21 15:08:46 -04:00
}
int dlm_midcomms_close ( int nodeid )
{
struct midcomms_node * node ;
int idx , ret ;
idx = srcu_read_lock ( & nodes_srcu ) ;
/* Abort pending close/remove operation */
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
2021-05-21 15:08:46 -04:00
if ( node ) {
/* let shutdown waiters leave */
set_bit ( DLM_NODE_FLAG_CLOSE , & node - > flags ) ;
wake_up ( & node - > shutdown_wait ) ;
}
srcu_read_unlock ( & nodes_srcu , idx ) ;
synchronize_srcu ( & nodes_srcu ) ;
mutex_lock ( & close_lock ) ;
2023-08-01 14:09:44 -04:00
idx = srcu_read_lock ( & nodes_srcu ) ;
2023-08-01 14:09:49 -04:00
node = nodeid2node ( nodeid ) ;
2021-05-21 15:08:46 -04:00
if ( ! node ) {
srcu_read_unlock ( & nodes_srcu , idx ) ;
2023-08-01 14:09:44 -04:00
mutex_unlock ( & close_lock ) ;
2021-05-21 15:08:46 -04:00
return dlm_lowcomms_close ( nodeid ) ;
}
ret = dlm_lowcomms_close ( nodeid ) ;
2023-08-01 14:09:49 -04:00
dlm_delete_debug_comms_file ( node - > debugfs ) ;
2024-04-02 15:18:09 -04:00
spin_lock_bh ( & nodes_lock ) ;
2023-08-01 14:09:49 -04:00
hlist_del_rcu ( & node - > hlist ) ;
2024-04-02 15:18:09 -04:00
spin_unlock_bh ( & nodes_lock ) ;
2021-05-21 15:08:46 -04:00
srcu_read_unlock ( & nodes_srcu , idx ) ;
2023-08-01 14:09:49 -04:00
/* wait that all readers left until flush send queue */
synchronize_srcu ( & nodes_srcu ) ;
/* drop all pending dlm messages, this is fine as
* this function get called when the node is fenced
*/
dlm_send_queue_flush ( node ) ;
call_srcu ( & nodes_srcu , & node - > rcu , midcomms_node_release ) ;
2021-05-21 15:08:46 -04:00
mutex_unlock ( & close_lock ) ;
return ret ;
}
2021-11-02 15:17:20 -04:00
/* debug functionality to send raw dlm msg from user space */
struct dlm_rawmsg_data {
struct midcomms_node * node ;
void * buf ;
} ;
static void midcomms_new_rawmsg_cb ( void * data )
{
struct dlm_rawmsg_data * rd = data ;
struct dlm_header * h = rd - > buf ;
switch ( h - > h_version ) {
case cpu_to_le32 ( DLM_VERSION_3_1 ) :
break ;
default :
switch ( h - > h_cmd ) {
case DLM_OPTS :
if ( ! h - > u . h_seq )
2023-05-29 17:44:39 -04:00
h - > u . h_seq = cpu_to_le32 ( atomic_fetch_inc ( & rd - > node - > seq_send ) ) ;
2021-11-02 15:17:20 -04:00
break ;
default :
break ;
}
break ;
}
}
int dlm_midcomms_rawmsg_send ( struct midcomms_node * node , void * buf ,
int buflen )
{
struct dlm_rawmsg_data rd ;
struct dlm_msg * msg ;
char * msgbuf ;
rd . node = node ;
rd . buf = buf ;
2024-04-02 15:17:57 -04:00
msg = dlm_lowcomms_new_msg ( node - > nodeid , buflen , & msgbuf ,
midcomms_new_rawmsg_cb , & rd ) ;
2021-11-02 15:17:20 -04:00
if ( ! msg )
return - ENOMEM ;
memcpy ( msgbuf , buf , buflen ) ;
dlm_lowcomms_commit_msg ( msg ) ;
return 0 ;
}