2006-01-18 12:30:29 +03:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2011-11-02 23:30:58 +04:00
* * Copyright ( C ) 2004 - 2011 Red Hat , Inc . All rights reserved .
2006-01-18 12:30:29 +03:00
* *
* * This copyrighted material is made available to anyone wishing to use ,
* * modify , copy , or redistribute it subject to the terms and conditions
* * of the GNU General Public License v .2 .
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef __DLM_INTERNAL_DOT_H__
# define __DLM_INTERNAL_DOT_H__
/*
* This is the main header file to be included in each DLM source file .
*/
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/sched.h>
# include <linux/types.h>
# include <linux/ctype.h>
# include <linux/spinlock.h>
# include <linux/vmalloc.h>
# include <linux/list.h>
# include <linux/errno.h>
# include <linux/random.h>
# include <linux/delay.h>
# include <linux/socket.h>
# include <linux/kthread.h>
# include <linux/kobject.h>
# include <linux/kref.h>
# include <linux/kernel.h>
# include <linux/jhash.h>
2006-07-13 01:44:04 +04:00
# include <linux/miscdevice.h>
2006-01-20 11:47:07 +03:00
# include <linux/mutex.h>
2011-07-07 02:00:54 +04:00
# include <linux/idr.h>
2012-04-23 22:58:42 +04:00
# include <linux/ratelimit.h>
2006-01-18 12:30:29 +03:00
# include <asm/uaccess.h>
# include <linux/dlm.h>
2007-01-09 18:44:01 +03:00
# include "config.h"
2006-01-18 12:30:29 +03:00
/* Size of the temp buffer midcomms allocates on the stack.
We try to make this large enough so most messages fit .
FIXME : should sctp make this unnecessary ? */
# define DLM_INBUF_LEN 148
struct dlm_ls ;
struct dlm_lkb ;
struct dlm_rsb ;
struct dlm_member ;
struct dlm_rsbtable ;
struct dlm_recover ;
struct dlm_header ;
struct dlm_message ;
struct dlm_rcom ;
struct dlm_mhandle ;
# define log_print(fmt, args...) \
printk ( KERN_ERR " dlm: " fmt " \n " , # # args )
# define log_error(ls, fmt, args...) \
printk ( KERN_ERR " dlm: %s: " fmt " \n " , ( ls ) - > ls_name , # # args )
2014-02-14 21:54:44 +04:00
# define log_rinfo(ls, fmt, args...) \
printk ( KERN_INFO " dlm: %s: " fmt " \n " , ( ls ) - > ls_name , # # args ) ;
2006-01-18 12:30:29 +03:00
2007-01-09 18:44:01 +03:00
# define log_debug(ls, fmt, args...) \
do { \
if ( dlm_config . ci_log_debug ) \
printk ( KERN_DEBUG " dlm: %s: " fmt " \n " , \
( ls ) - > ls_name , # # args ) ; \
} while ( 0 )
2006-01-18 12:30:29 +03:00
2012-04-23 22:58:42 +04:00
# define log_limit(ls, fmt, args...) \
do { \
if ( dlm_config . ci_log_debug ) \
printk_ratelimited ( KERN_DEBUG " dlm: %s: " fmt " \n " , \
( ls ) - > ls_name , # # args ) ; \
} while ( 0 )
2006-01-18 12:30:29 +03:00
# define DLM_ASSERT(x, do) \
{ \
if ( ! ( x ) ) \
{ \
printk ( KERN_ERR " \n DLM: Assertion failed on line %d of file %s \n " \
" DLM: assertion: \" %s \" \n " \
" DLM: time = %lu \n " , \
__LINE__ , __FILE__ , # x , jiffies ) ; \
{ do } \
printk ( " \n " ) ; \
BUG ( ) ; \
panic ( " DLM: Record message above and reboot. \n " ) ; \
} \
}
2013-01-07 21:03:42 +04:00
# define DLM_RTF_SHRINK 0x00000001
2006-01-18 12:30:29 +03:00
struct dlm_rsbtable {
2011-10-27 00:24:55 +04:00
struct rb_root keep ;
struct rb_root toss ;
2009-01-08 01:50:41 +03:00
spinlock_t lock ;
2013-01-07 21:03:42 +04:00
uint32_t flags ;
2006-01-18 12:30:29 +03:00
} ;
/*
* Lockspace member ( per node in a ls )
*/
struct dlm_member {
struct list_head list ;
int nodeid ;
int weight ;
2011-10-20 22:26:28 +04:00
int slot ;
int slot_prev ;
2011-11-02 23:30:58 +04:00
int comm_seq ;
2011-10-20 22:26:28 +04:00
uint32_t generation ;
} ;
2006-01-18 12:30:29 +03:00
/*
* Save and manage recovery state for a lockspace .
*/
struct dlm_recover {
struct list_head list ;
2011-11-02 23:30:58 +04:00
struct dlm_config_node * nodes ;
int nodes_count ;
2006-01-18 12:30:29 +03:00
uint64_t seq ;
} ;
/*
* Pass input args to second stage locking function .
*/
struct dlm_args {
uint32_t flags ;
2008-02-06 09:35:45 +03:00
void ( * astfn ) ( void * astparam ) ;
void * astparam ;
void ( * bastfn ) ( void * astparam , int mode ) ;
2006-01-18 12:30:29 +03:00
int mode ;
struct dlm_lksb * lksb ;
2007-05-18 18:00:32 +04:00
unsigned long timeout ;
2006-01-18 12:30:29 +03:00
} ;
/*
* Lock block
*
* A lock can be one of three types :
*
* local copy lock is mastered locally
* ( lkb_nodeid is zero and DLM_LKF_MSTCPY is not set )
* process copy lock is mastered on a remote node
* ( lkb_nodeid is non - zero and DLM_LKF_MSTCPY is not set )
* master copy master node ' s copy of a lock owned by remote node
* ( lkb_nodeid is non - zero and DLM_LKF_MSTCPY is set )
*
* lkb_exflags : a copy of the most recent flags arg provided to dlm_lock or
* dlm_unlock . The dlm does not modify these or use any private flags in
* this field ; it only contains DLM_LKF_ flags from dlm . h . These flags
* are sent as - is to the remote master when the lock is remote .
*
* lkb_flags : internal dlm flags ( DLM_IFL_ prefix ) from dlm_internal . h .
* Some internal flags are shared between the master and process nodes ;
* these shared flags are kept in the lower two bytes . One of these
* flags set on the master copy will be propagated to the process copy
* and v . v . Other internal flags are private to the master or process
* node ( e . g . DLM_IFL_MSTCPY ) . These are kept in the high two bytes .
*
* lkb_sbflags : status block flags . These flags are copied directly into
* the caller ' s lksb . sb_flags prior to the dlm_lock / dlm_unlock completion
* ast . All defined in dlm . h with DLM_SBF_ prefix .
*
* lkb_status : the lock status indicates which rsb queue the lock is
* on , grant , convert , or wait . DLM_LKSTS_ WAITING / GRANTED / CONVERT
*
* lkb_wait_type : the dlm message type ( DLM_MSG_ prefix ) for which a
* reply is needed . Only set when the lkb is on the lockspace waiters
* list awaiting a reply from a remote node .
*
* lkb_nodeid : when the lkb is a local copy , nodeid is 0 ; when the lkb
* is a master copy , nodeid specifies the remote lock holder , when the
* lkb is a process copy , the nodeid specifies the lock master .
*/
/* lkb_status */
# define DLM_LKSTS_WAITING 1
# define DLM_LKSTS_GRANTED 2
# define DLM_LKSTS_CONVERT 3
/* lkb_flags */
# define DLM_IFL_MSTCPY 0x00010000
# define DLM_IFL_RESEND 0x00020000
2006-07-13 01:44:04 +04:00
# define DLM_IFL_DEAD 0x00040000
2007-03-28 18:56:46 +04:00
# define DLM_IFL_OVERLAP_UNLOCK 0x00080000
# define DLM_IFL_OVERLAP_CANCEL 0x00100000
# define DLM_IFL_ENDOFLIFE 0x00200000
2007-05-18 17:59:31 +04:00
# define DLM_IFL_WATCH_TIMEWARN 0x00400000
2007-05-29 17:44:23 +04:00
# define DLM_IFL_TIMEOUT_CANCEL 0x00800000
2007-05-29 17:46:00 +04:00
# define DLM_IFL_DEADLOCK_CANCEL 0x01000000
2011-04-05 00:19:59 +04:00
# define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
2006-07-13 01:44:04 +04:00
# define DLM_IFL_USER 0x00000001
# define DLM_IFL_ORPHAN 0x00000002
2006-01-18 12:30:29 +03:00
2011-02-21 23:58:21 +03:00
# define DLM_CALLBACKS_SIZE 6
# define DLM_CB_CAST 0x00000001
# define DLM_CB_BAST 0x00000002
# define DLM_CB_SKIP 0x00000004
struct dlm_callback {
uint64_t seq ;
uint32_t flags ; /* DLM_CBF_ */
int sb_status ; /* copy to lksb status */
uint8_t sb_flags ; /* copy to lksb flags */
int8_t mode ; /* rq mode of bast, gr mode of cast */
} ;
2006-01-18 12:30:29 +03:00
struct dlm_lkb {
struct dlm_rsb * lkb_resource ; /* the rsb */
struct kref lkb_ref ;
int lkb_nodeid ; /* copied from rsb */
int lkb_ownpid ; /* pid of lock owner */
uint32_t lkb_id ; /* our lock ID */
uint32_t lkb_remid ; /* lock ID on remote partner */
uint32_t lkb_exflags ; /* external flags from caller */
uint32_t lkb_sbflags ; /* lksb flags */
uint32_t lkb_flags ; /* internal flags */
uint32_t lkb_lvbseq ; /* lvb sequence number */
int8_t lkb_status ; /* granted, waiting, convert */
int8_t lkb_rqmode ; /* requested lock mode */
int8_t lkb_grmode ; /* granted lock mode */
int8_t lkb_highbast ; /* highest mode bast sent for */
2010-02-24 20:08:18 +03:00
2006-01-18 12:30:29 +03:00
int8_t lkb_wait_type ; /* type of reply waiting for */
2007-03-28 18:56:46 +04:00
int8_t lkb_wait_count ;
2011-03-28 23:17:26 +04:00
int lkb_wait_nodeid ; /* for debugging */
2006-01-18 12:30:29 +03:00
struct list_head lkb_statequeue ; /* rsb g/c/w list */
struct list_head lkb_rsb_lookup ; /* waiting for rsb lookup */
struct list_head lkb_wait_reply ; /* waiting for remote reply */
2006-07-13 01:44:04 +04:00
struct list_head lkb_ownqueue ; /* list of locks for a process */
2007-05-18 17:59:31 +04:00
struct list_head lkb_time_list ;
2008-12-09 23:12:21 +03:00
ktime_t lkb_timestamp ;
2011-03-28 23:17:26 +04:00
ktime_t lkb_wait_time ;
2007-05-18 17:59:31 +04:00
unsigned long lkb_timeout_cs ;
2006-01-18 12:30:29 +03:00
2011-04-05 22:16:24 +04:00
struct mutex lkb_cb_mutex ;
struct work_struct lkb_cb_work ;
struct list_head lkb_cb_list ; /* for ls_cb_delay or proc->asts */
2011-02-21 23:58:21 +03:00
struct dlm_callback lkb_callbacks [ DLM_CALLBACKS_SIZE ] ;
struct dlm_callback lkb_last_cast ;
struct dlm_callback lkb_last_bast ;
ktime_t lkb_last_cast_time ; /* for debugging */
ktime_t lkb_last_bast_time ; /* for debugging */
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
uint64_t lkb_recover_seq ; /* from ls_recover_seq */
2006-01-18 12:30:29 +03:00
char * lkb_lvbptr ;
struct dlm_lksb * lkb_lksb ; /* caller's status block */
2008-02-06 09:35:45 +03:00
void ( * lkb_astfn ) ( void * astparam ) ;
void ( * lkb_bastfn ) ( void * astparam , int mode ) ;
2008-02-07 08:27:04 +03:00
union {
void * lkb_astparam ; /* caller's ast arg */
struct dlm_user_args * lkb_ua ;
} ;
2006-01-18 12:30:29 +03:00
} ;
2012-05-10 19:18:07 +04:00
/*
* res_master_nodeid is " normal " : 0 is unset / invalid , non - zero is the real
* nodeid , even when nodeid is our_nodeid .
*
* res_nodeid is " odd " : - 1 is unset / invalid , zero means our_nodeid ,
* greater than zero when another nodeid .
*
* ( TODO : remove res_nodeid and only use res_master_nodeid )
*/
2006-01-18 12:30:29 +03:00
struct dlm_rsb {
struct dlm_ls * res_ls ; /* the lockspace */
struct kref res_ref ;
2006-01-20 11:47:07 +03:00
struct mutex res_mutex ;
2006-01-18 12:30:29 +03:00
unsigned long res_flags ;
int res_length ; /* length of rsb name */
int res_nodeid ;
2012-05-10 19:18:07 +04:00
int res_master_nodeid ;
int res_dir_nodeid ;
2012-05-16 01:07:49 +04:00
int res_id ; /* for ls_recover_idr */
2006-01-18 12:30:29 +03:00
uint32_t res_lvbseq ;
uint32_t res_hash ;
uint32_t res_bucket ; /* rsbtbl */
unsigned long res_toss_time ;
uint32_t res_first_lkid ;
struct list_head res_lookup ; /* lkbs waiting on first */
2011-10-27 00:24:55 +04:00
union {
struct list_head res_hashchain ;
struct rb_node res_hashnode ; /* rsbtbl */
} ;
2006-01-18 12:30:29 +03:00
struct list_head res_grantqueue ;
struct list_head res_convertqueue ;
struct list_head res_waitqueue ;
struct list_head res_root_list ; /* used for recovery */
struct list_head res_recover_list ; /* used for recovery */
int res_recover_locks_count ;
char * res_lvbptr ;
2011-07-07 23:05:03 +04:00
char res_name [ DLM_RESNAME_MAXLEN + 1 ] ;
2006-01-18 12:30:29 +03:00
} ;
2012-05-10 19:18:07 +04:00
/* dlm_master_lookup() flags */
# define DLM_LU_RECOVER_DIR 1
# define DLM_LU_RECOVER_MASTER 2
/* dlm_master_lookup() results */
# define DLM_LU_MATCH 1
# define DLM_LU_ADD 2
2006-01-18 12:30:29 +03:00
/* find_rsb() flags */
2012-05-10 19:18:07 +04:00
# define R_REQUEST 0x00000001
# define R_RECEIVE_REQUEST 0x00000002
# define R_RECEIVE_RECOVER 0x00000004
2006-01-18 12:30:29 +03:00
/* rsb_flags */
enum rsb_flags {
RSB_MASTER_UNCERTAIN ,
RSB_VALNOTVALID ,
RSB_VALNOTVALID_PREV ,
RSB_NEW_MASTER ,
RSB_NEW_MASTER2 ,
RSB_RECOVER_CONVERT ,
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
RSB_RECOVER_GRANT ,
2012-11-16 01:01:51 +04:00
RSB_RECOVER_LVB_INVAL ,
2006-01-18 12:30:29 +03:00
} ;
static inline void rsb_set_flag ( struct dlm_rsb * r , enum rsb_flags flag )
{
__set_bit ( flag , & r - > res_flags ) ;
}
static inline void rsb_clear_flag ( struct dlm_rsb * r , enum rsb_flags flag )
{
__clear_bit ( flag , & r - > res_flags ) ;
}
static inline int rsb_flag ( struct dlm_rsb * r , enum rsb_flags flag )
{
return test_bit ( flag , & r - > res_flags ) ;
}
/* dlm_header is first element of all structs sent between nodes */
2006-12-13 19:37:16 +03:00
# define DLM_HEADER_MAJOR 0x00030000
2011-10-20 22:26:28 +04:00
# define DLM_HEADER_MINOR 0x00000001
# define DLM_HEADER_SLOTS 0x00000001
2006-01-18 12:30:29 +03:00
# define DLM_MSG 1
# define DLM_RCOM 2
struct dlm_header {
uint32_t h_version ;
uint32_t h_lockspace ;
uint32_t h_nodeid ; /* nodeid of sender */
uint16_t h_length ;
uint8_t h_cmd ; /* DLM_MSG, DLM_RCOM */
uint8_t h_pad ;
} ;
# define DLM_MSG_REQUEST 1
# define DLM_MSG_CONVERT 2
# define DLM_MSG_UNLOCK 3
# define DLM_MSG_CANCEL 4
# define DLM_MSG_REQUEST_REPLY 5
# define DLM_MSG_CONVERT_REPLY 6
# define DLM_MSG_UNLOCK_REPLY 7
# define DLM_MSG_CANCEL_REPLY 8
# define DLM_MSG_GRANT 9
# define DLM_MSG_BAST 10
# define DLM_MSG_LOOKUP 11
# define DLM_MSG_REMOVE 12
# define DLM_MSG_LOOKUP_REPLY 13
2007-03-31 00:02:40 +04:00
# define DLM_MSG_PURGE 14
2006-01-18 12:30:29 +03:00
struct dlm_message {
struct dlm_header m_header ;
uint32_t m_type ; /* DLM_MSG_ */
uint32_t m_nodeid ;
uint32_t m_pid ;
uint32_t m_lkid ; /* lkid on sender */
uint32_t m_remid ; /* lkid on receiver */
uint32_t m_parent_lkid ;
uint32_t m_parent_remid ;
uint32_t m_exflags ;
uint32_t m_sbflags ;
uint32_t m_flags ;
uint32_t m_lvbseq ;
uint32_t m_hash ;
int m_status ;
int m_grmode ;
int m_rqmode ;
int m_bastmode ;
int m_asts ;
int m_result ; /* 0 or -EXXX */
char m_extra [ 0 ] ; /* name or lvb */
} ;
# define DLM_RS_NODES 0x00000001
# define DLM_RS_NODES_ALL 0x00000002
# define DLM_RS_DIR 0x00000004
# define DLM_RS_DIR_ALL 0x00000008
# define DLM_RS_LOCKS 0x00000010
# define DLM_RS_LOCKS_ALL 0x00000020
# define DLM_RS_DONE 0x00000040
# define DLM_RS_DONE_ALL 0x00000080
# define DLM_RCOM_STATUS 1
# define DLM_RCOM_NAMES 2
# define DLM_RCOM_LOOKUP 3
# define DLM_RCOM_LOCK 4
# define DLM_RCOM_STATUS_REPLY 5
# define DLM_RCOM_NAMES_REPLY 6
# define DLM_RCOM_LOOKUP_REPLY 7
# define DLM_RCOM_LOCK_REPLY 8
struct dlm_rcom {
struct dlm_header rc_header ;
uint32_t rc_type ; /* DLM_RCOM_ */
int rc_result ; /* multi-purpose */
uint64_t rc_id ; /* match reply with request */
2006-12-13 19:37:16 +03:00
uint64_t rc_seq ; /* sender's ls_recover_seq */
uint64_t rc_seq_reply ; /* remote ls_recover_seq */
2006-01-18 12:30:29 +03:00
char rc_buf [ 0 ] ;
} ;
2008-01-25 08:58:46 +03:00
union dlm_packet {
struct dlm_header header ; /* common to other two */
struct dlm_message message ;
struct dlm_rcom rcom ;
} ;
2011-10-20 22:26:28 +04:00
# define DLM_RSF_NEED_SLOTS 0x00000001
/* RCOM_STATUS data */
struct rcom_status {
__le32 rs_flags ;
__le32 rs_unused1 ;
__le64 rs_unused2 ;
} ;
/* RCOM_STATUS_REPLY data */
2006-01-18 12:30:29 +03:00
struct rcom_config {
2008-01-25 10:34:00 +03:00
__le32 rf_lvblen ;
__le32 rf_lsflags ;
2011-10-20 22:26:28 +04:00
/* DLM_HEADER_SLOTS adds: */
__le32 rf_flags ;
__le16 rf_our_slot ;
__le16 rf_num_slots ;
__le32 rf_generation ;
__le32 rf_unused1 ;
__le64 rf_unused2 ;
} ;
struct rcom_slot {
__le32 ro_nodeid ;
__le16 ro_slot ;
__le16 ro_unused1 ;
__le64 ro_unused2 ;
2006-01-18 12:30:29 +03:00
} ;
struct rcom_lock {
2008-01-25 10:08:26 +03:00
__le32 rl_ownpid ;
__le32 rl_lkid ;
__le32 rl_remid ;
__le32 rl_parent_lkid ;
__le32 rl_parent_remid ;
__le32 rl_exflags ;
__le32 rl_flags ;
__le32 rl_lvbseq ;
__le32 rl_result ;
2006-01-18 12:30:29 +03:00
int8_t rl_rqmode ;
int8_t rl_grmode ;
int8_t rl_status ;
int8_t rl_asts ;
2008-01-25 10:08:26 +03:00
__le16 rl_wait_type ;
__le16 rl_namelen ;
2006-01-18 12:30:29 +03:00
char rl_name [ DLM_RESNAME_MAXLEN ] ;
char rl_lvb [ 0 ] ;
} ;
2012-06-14 21:17:32 +04:00
/*
* The max number of resources per rsbtbl bucket that shrink will attempt
* to remove in each iteration .
*/
# define DLM_REMOVE_NAMES_MAX 8
2006-01-18 12:30:29 +03:00
struct dlm_ls {
struct list_head ls_list ; /* list of lockspaces */
2006-07-13 01:44:04 +04:00
dlm_lockspace_t * ls_local_handle ;
2006-01-18 12:30:29 +03:00
uint32_t ls_global_id ; /* global unique lockspace ID */
2011-10-20 22:26:28 +04:00
uint32_t ls_generation ;
2006-01-18 12:30:29 +03:00
uint32_t ls_exflags ;
int ls_lvblen ;
2008-08-06 22:30:24 +04:00
int ls_count ; /* refcount of processes in
the dlm using this ls */
int ls_create_count ; /* create/release refcount */
2006-01-18 12:30:29 +03:00
unsigned long ls_flags ; /* LSFL_ */
2008-08-18 23:03:25 +04:00
unsigned long ls_scan_time ;
2006-01-18 12:30:29 +03:00
struct kobject ls_kobj ;
2011-07-07 02:00:54 +04:00
struct idr ls_lkbidr ;
spinlock_t ls_lkbidr_spin ;
2006-01-18 12:30:29 +03:00
struct dlm_rsbtable * ls_rsbtbl ;
uint32_t ls_rsbtbl_size ;
2006-01-20 11:47:07 +03:00
struct mutex ls_waiters_mutex ;
2006-01-18 12:30:29 +03:00
struct list_head ls_waiters ; /* lkbs needing a reply */
2007-03-28 18:56:46 +04:00
struct mutex ls_orphans_mutex ;
struct list_head ls_orphans ;
2007-05-18 17:59:31 +04:00
struct mutex ls_timeout_mutex ;
struct list_head ls_timeout ;
2011-07-07 23:05:03 +04:00
spinlock_t ls_new_rsb_spin ;
int ls_new_rsb_count ;
struct list_head ls_new_rsb ; /* new rsb structs */
2012-06-14 21:17:32 +04:00
spinlock_t ls_remove_spin ;
char ls_remove_name [ DLM_RESNAME_MAXLEN + 1 ] ;
char * ls_remove_names [ DLM_REMOVE_NAMES_MAX ] ;
int ls_remove_len ;
int ls_remove_lens [ DLM_REMOVE_NAMES_MAX ] ;
2006-01-18 12:30:29 +03:00
struct list_head ls_nodes ; /* current nodes in ls */
struct list_head ls_nodes_gone ; /* dead node list, recovery */
int ls_num_nodes ; /* number of nodes in ls */
int ls_low_nodeid ;
int ls_total_weight ;
int * ls_node_array ;
2011-10-20 22:26:28 +04:00
int ls_slot ;
int ls_num_slots ;
int ls_slots_size ;
struct dlm_slot * ls_slots ;
2006-01-18 12:30:29 +03:00
struct dlm_rsb ls_stub_rsb ; /* for returning errors */
struct dlm_lkb ls_stub_lkb ; /* for returning errors */
struct dlm_message ls_stub_ms ; /* for faking a reply */
2006-07-25 22:44:31 +04:00
struct dentry * ls_debug_rsb_dentry ; /* debugfs */
struct dentry * ls_debug_waiters_dentry ; /* debugfs */
2007-07-06 18:47:08 +04:00
struct dentry * ls_debug_locks_dentry ; /* debugfs */
2008-12-16 23:53:23 +03:00
struct dentry * ls_debug_all_dentry ; /* debugfs */
2012-05-10 19:18:07 +04:00
struct dentry * ls_debug_toss_dentry ; /* debugfs */
2006-01-18 12:30:29 +03:00
wait_queue_head_t ls_uevent_wait ; /* user part of join/leave */
int ls_uevent_result ;
2007-05-18 18:03:35 +04:00
struct completion ls_members_done ;
int ls_members_result ;
2006-01-18 12:30:29 +03:00
2006-07-13 01:44:04 +04:00
struct miscdevice ls_device ;
2011-04-05 22:16:24 +04:00
struct workqueue_struct * ls_callback_wq ;
2006-01-18 12:30:29 +03:00
/* recovery related */
2011-04-05 22:16:24 +04:00
struct mutex ls_cb_mutex ;
struct list_head ls_cb_delay ; /* save for queue_work later */
2006-01-18 12:30:29 +03:00
struct timer_list ls_timer ;
struct task_struct * ls_recoverd_task ;
2006-01-20 11:47:07 +03:00
struct mutex ls_recoverd_active ;
2006-01-18 12:30:29 +03:00
spinlock_t ls_recover_lock ;
2007-05-18 17:59:31 +04:00
unsigned long ls_recover_begin ; /* jiffies timestamp */
2006-01-18 12:30:29 +03:00
uint32_t ls_recover_status ; /* DLM_RS_ */
uint64_t ls_recover_seq ;
struct dlm_recover * ls_recover_args ;
struct rw_semaphore ls_in_recovery ; /* block local requests */
2007-09-28 00:53:38 +04:00
struct rw_semaphore ls_recv_active ; /* block dlm_recv */
2006-01-18 12:30:29 +03:00
struct list_head ls_requestqueue ; /* queue remote requests */
2006-01-20 11:47:07 +03:00
struct mutex ls_requestqueue_mutex ;
2008-01-25 11:01:51 +03:00
struct dlm_rcom * ls_recover_buf ;
2006-08-09 02:08:42 +04:00
int ls_recover_nodeid ; /* for debugging */
2012-05-10 19:18:07 +04:00
unsigned int ls_recover_dir_sent_res ; /* for log info */
unsigned int ls_recover_dir_sent_msg ; /* for log info */
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
unsigned int ls_recover_locks_in ; /* for log info */
2006-08-09 20:20:15 +04:00
uint64_t ls_rcom_seq ;
2006-11-27 22:19:28 +03:00
spinlock_t ls_rcom_spin ;
2006-01-18 12:30:29 +03:00
struct list_head ls_recover_list ;
spinlock_t ls_recover_list_lock ;
int ls_recover_list_count ;
2012-05-16 01:07:49 +04:00
struct idr ls_recover_idr ;
spinlock_t ls_recover_idr_lock ;
2006-01-18 12:30:29 +03:00
wait_queue_head_t ls_wait_general ;
2012-08-02 20:08:21 +04:00
wait_queue_head_t ls_recover_lock_wait ;
2006-07-13 01:44:04 +04:00
struct mutex ls_clear_proc_locks ;
2006-01-18 12:30:29 +03:00
struct list_head ls_root_list ; /* root resources */
struct rw_semaphore ls_root_sem ; /* protect root_list */
2011-11-02 23:30:58 +04:00
const struct dlm_lockspace_ops * ls_ops ;
void * ls_ops_arg ;
2006-01-18 12:30:29 +03:00
int ls_namelen ;
char ls_name [ 1 ] ;
} ;
2012-08-02 20:08:21 +04:00
/*
* LSFL_RECOVER_STOP - dlm_ls_stop ( ) sets this to tell dlm recovery routines
* that they should abort what they ' re doing so new recovery can be started .
*
* LSFL_RECOVER_DOWN - dlm_ls_stop ( ) sets this to tell dlm_recoverd that it
* should do down_write ( ) on the in_recovery rw_semaphore . ( doing down_write
* within dlm_ls_stop causes complaints about the lock acquired / released
* in different contexts . )
*
* LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore .
* It sets this after it is done with down_write ( ) on the in_recovery
* rw_semaphore and clears it after it has released the rw_semaphore .
*
* LSFL_RECOVER_WORK - dlm_ls_start ( ) sets this to tell dlm_recoverd that it
* should begin recovery of the lockspace .
*
* LSFL_RUNNING - set when normal locking activity is enabled .
* dlm_ls_stop ( ) clears this to tell dlm locking routines that they should
* quit what they are doing so recovery can run . dlm_recoverd sets
* this after recovery is finished .
*/
# define LSFL_RECOVER_STOP 0
# define LSFL_RECOVER_DOWN 1
# define LSFL_RECOVER_LOCK 2
# define LSFL_RECOVER_WORK 3
# define LSFL_RUNNING 4
# define LSFL_RCOM_READY 5
# define LSFL_RCOM_WAIT 6
# define LSFL_UEVENT_WAIT 7
# define LSFL_TIMEWARN 8
# define LSFL_CB_DELAY 9
# define LSFL_NODIR 10
2006-01-18 12:30:29 +03:00
2006-07-13 01:44:04 +04:00
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
struct dlm_user_args {
struct dlm_user_proc * proc ; /* each process that opens the lockspace
device has private data
( dlm_user_proc ) on the struct file ,
the process ' s locks point back to it */
struct dlm_lksb lksb ;
struct dlm_lksb __user * user_lksb ;
void __user * castparam ;
void __user * castaddr ;
void __user * bastparam ;
void __user * bastaddr ;
2007-05-18 18:00:32 +04:00
uint64_t xid ;
2006-07-13 01:44:04 +04:00
} ;
# define DLM_PROC_FLAGS_CLOSING 1
# define DLM_PROC_FLAGS_COMPAT 2
/* locks list is kept so we can remove all a process's locks when it
exits ( or orphan those that are persistent ) */
struct dlm_user_proc {
dlm_lockspace_t * lockspace ;
unsigned long flags ; /* DLM_PROC_FLAGS */
struct list_head asts ;
spinlock_t asts_spin ;
struct list_head locks ;
spinlock_t locks_spin ;
2007-01-15 19:34:52 +03:00
struct list_head unlocking ;
2006-07-13 01:44:04 +04:00
wait_queue_head_t wait ;
} ;
2006-01-18 12:30:29 +03:00
static inline int dlm_locking_stopped ( struct dlm_ls * ls )
{
return ! test_bit ( LSFL_RUNNING , & ls - > ls_flags ) ;
}
static inline int dlm_recovery_stopped ( struct dlm_ls * ls )
{
2012-08-02 20:08:21 +04:00
return test_bit ( LSFL_RECOVER_STOP , & ls - > ls_flags ) ;
2006-01-18 12:30:29 +03:00
}
static inline int dlm_no_directory ( struct dlm_ls * ls )
{
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-27 00:54:29 +04:00
return test_bit ( LSFL_NODIR , & ls - > ls_flags ) ;
2006-01-18 12:30:29 +03:00
}
2007-11-03 03:04:30 +03:00
int dlm_netlink_init ( void ) ;
void dlm_netlink_exit ( void ) ;
void dlm_timeout_warn ( struct dlm_lkb * lkb ) ;
2008-03-14 23:09:15 +03:00
int dlm_plock_init ( void ) ;
void dlm_plock_exit ( void ) ;
2007-11-03 03:04:30 +03:00
# ifdef CONFIG_DLM_DEBUG
int dlm_register_debugfs ( void ) ;
void dlm_unregister_debugfs ( void ) ;
int dlm_create_debug_file ( struct dlm_ls * ls ) ;
void dlm_delete_debug_file ( struct dlm_ls * ls ) ;
# else
static inline int dlm_register_debugfs ( void ) { return 0 ; }
static inline void dlm_unregister_debugfs ( void ) { }
static inline int dlm_create_debug_file ( struct dlm_ls * ls ) { return 0 ; }
static inline void dlm_delete_debug_file ( struct dlm_ls * ls ) { }
# endif
2006-01-18 12:30:29 +03:00
# endif /* __DLM_INTERNAL_DOT_H__ */