2019-05-28 09:57:20 -07:00
// SPDX-License-Identifier: GPL-2.0-only
2006-01-18 09:30:29 +00:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
2007-09-27 15:53:38 -05:00
* * Copyright ( C ) 2005 - 2007 Red Hat , Inc . All rights reserved .
2006-01-18 09:30:29 +00:00
* *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include "dlm_internal.h"
# include "member.h"
# include "lock.h"
# include "dir.h"
# include "config.h"
# include "requestqueue.h"
2022-04-04 16:06:41 -04:00
# include "util.h"
2006-01-18 09:30:29 +00:00
struct rq_entry {
struct list_head list ;
2012-04-23 16:36:01 -05:00
uint32_t recover_seq ;
2006-01-18 09:30:29 +00:00
int nodeid ;
2008-01-25 00:28:28 -05:00
struct dlm_message request ;
2006-01-18 09:30:29 +00:00
} ;
/*
* Requests received while the lockspace is in recovery get added to the
* request queue and processed when recovery is complete . This happens when
* the lockspace is suspended on some nodes before it is on others , or the
* lockspace is enabled on some while still suspended on others .
*/
2023-08-01 14:09:48 -04:00
void dlm_add_requestqueue ( struct dlm_ls * ls , int nodeid ,
const struct dlm_message * ms )
2006-01-18 09:30:29 +00:00
{
struct rq_entry * e ;
2022-04-04 16:06:39 -04:00
int length = le16_to_cpu ( ms - > m_header . h_length ) -
sizeof ( struct dlm_message ) ;
2006-01-18 09:30:29 +00:00
2009-11-30 16:34:43 -06:00
e = kmalloc ( sizeof ( struct rq_entry ) + length , GFP_NOFS ) ;
2006-01-18 09:30:29 +00:00
if ( ! e ) {
2007-09-27 15:53:38 -05:00
log_print ( " dlm_add_requestqueue: out of memory len %d " , length ) ;
return ;
2006-01-18 09:30:29 +00:00
}
2012-04-23 16:36:01 -05:00
e - > recover_seq = ls - > ls_recover_seq & 0xFFFFFFFF ;
2006-01-18 09:30:29 +00:00
e - > nodeid = nodeid ;
2022-11-17 17:11:40 -05:00
memcpy ( & e - > request , ms , sizeof ( * ms ) ) ;
memcpy ( & e - > request . m_extra , ms - > m_extra , length ) ;
2006-01-18 09:30:29 +00:00
2021-11-02 15:17:17 -04:00
atomic_inc ( & ls - > ls_requestqueue_cnt ) ;
2006-01-20 08:47:07 +00:00
mutex_lock ( & ls - > ls_requestqueue_mutex ) ;
2007-09-27 15:53:38 -05:00
list_add_tail ( & e - > list , & ls - > ls_requestqueue ) ;
2006-01-20 08:47:07 +00:00
mutex_unlock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
}
2007-09-27 15:53:38 -05:00
/*
* Called by dlm_recoverd to process normal messages saved while recovery was
* happening . Normal locking has been enabled before this is called . dlm_recv
* upon receiving a message , will wait for all saved messages to be drained
* here before processing the message it got . If a new dlm_ls_stop ( ) arrives
* while we ' re processing these saved messages , it may block trying to suspend
* dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue . In that
* case , we don ' t abort since locking_stopped is still 0. If dlm_recv is not
* waiting for us , then this processing may be aborted due to locking_stopped .
*/
2006-01-18 09:30:29 +00:00
int dlm_process_requestqueue ( struct dlm_ls * ls )
{
struct rq_entry * e ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-26 15:54:29 -05:00
struct dlm_message * ms ;
2006-01-18 09:30:29 +00:00
int error = 0 ;
2006-01-20 08:47:07 +00:00
mutex_lock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
for ( ; ; ) {
if ( list_empty ( & ls - > ls_requestqueue ) ) {
2006-01-20 08:47:07 +00:00
mutex_unlock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
error = 0 ;
break ;
}
e = list_entry ( ls - > ls_requestqueue . next , struct rq_entry , list ) ;
2006-01-20 08:47:07 +00:00
mutex_unlock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-26 15:54:29 -05:00
ms = & e - > request ;
log_limit ( ls , " dlm_process_requestqueue msg %d from %d "
" lkid %x remid %x result %d seq %u " ,
2022-04-04 16:06:41 -04:00
le32_to_cpu ( ms - > m_type ) ,
le32_to_cpu ( ms - > m_header . h_nodeid ) ,
le32_to_cpu ( ms - > m_lkid ) , le32_to_cpu ( ms - > m_remid ) ,
from_dlm_errno ( le32_to_cpu ( ms - > m_result ) ) ,
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-26 15:54:29 -05:00
e - > recover_seq ) ;
2012-04-23 16:36:01 -05:00
dlm_receive_message_saved ( ls , & e - > request , e - > recover_seq ) ;
2006-01-18 09:30:29 +00:00
2006-01-20 08:47:07 +00:00
mutex_lock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
list_del ( & e - > list ) ;
2021-11-02 15:17:17 -04:00
if ( atomic_dec_and_test ( & ls - > ls_requestqueue_cnt ) )
wake_up ( & ls - > ls_requestqueue_wait ) ;
2006-01-18 09:30:29 +00:00
kfree ( e ) ;
if ( dlm_locking_stopped ( ls ) ) {
log_debug ( ls , " process_requestqueue abort running " ) ;
2006-01-20 08:47:07 +00:00
mutex_unlock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
error = - EINTR ;
break ;
}
schedule ( ) ;
}
return error ;
}
/*
* After recovery is done , locking is resumed and dlm_recoverd takes all the
2007-09-27 15:53:38 -05:00
* saved requests and processes them as they would have been by dlm_recv . At
* the same time , dlm_recv will start receiving new requests from remote nodes .
* We want to delay dlm_recv processing new requests until dlm_recoverd has
* finished processing the old saved requests . We don ' t check for locking
* stopped here because dlm_ls_stop won ' t stop locking until it ' s suspended us
* ( dlm_recv ) .
2006-01-18 09:30:29 +00:00
*/
void dlm_wait_requestqueue ( struct dlm_ls * ls )
{
2021-11-02 15:17:17 -04:00
wait_event ( ls - > ls_requestqueue_wait ,
atomic_read ( & ls - > ls_requestqueue_cnt ) = = 0 ) ;
2006-01-18 09:30:29 +00:00
}
static int purge_request ( struct dlm_ls * ls , struct dlm_message * ms , int nodeid )
{
2022-04-04 16:06:41 -04:00
__le32 type = ms - > m_type ;
2006-01-18 09:30:29 +00:00
2006-11-27 11:31:22 -06:00
/* the ls is being cleaned up and freed by release_lockspace */
2021-11-02 15:17:18 -04:00
if ( ! atomic_read ( & ls - > ls_count ) )
2006-11-27 11:31:22 -06:00
return 1 ;
2006-01-18 09:30:29 +00:00
if ( dlm_is_removed ( ls , nodeid ) )
return 1 ;
/* directory operations are always purged because the directory is
always rebuilt during recovery and the lookups resent */
2022-04-04 16:06:41 -04:00
if ( type = = cpu_to_le32 ( DLM_MSG_REMOVE ) | |
type = = cpu_to_le32 ( DLM_MSG_LOOKUP ) | |
type = = cpu_to_le32 ( DLM_MSG_LOOKUP_REPLY ) )
2006-01-18 09:30:29 +00:00
return 1 ;
if ( ! dlm_no_directory ( ls ) )
return 0 ;
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-26 15:54:29 -05:00
return 1 ;
2006-01-18 09:30:29 +00:00
}
void dlm_purge_requestqueue ( struct dlm_ls * ls )
{
struct dlm_message * ms ;
struct rq_entry * e , * safe ;
2006-01-20 08:47:07 +00:00
mutex_lock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
list_for_each_entry_safe ( e , safe , & ls - > ls_requestqueue , list ) {
2008-01-25 00:28:28 -05:00
ms = & e - > request ;
2006-01-18 09:30:29 +00:00
if ( purge_request ( ls , ms , e - > nodeid ) ) {
list_del ( & e - > list ) ;
2021-11-02 15:17:17 -04:00
if ( atomic_dec_and_test ( & ls - > ls_requestqueue_cnt ) )
wake_up ( & ls - > ls_requestqueue_wait ) ;
2006-01-18 09:30:29 +00:00
kfree ( e ) ;
}
}
2006-01-20 08:47:07 +00:00
mutex_unlock ( & ls - > ls_requestqueue_mutex ) ;
2006-01-18 09:30:29 +00:00
}