2019-05-31 11:09:56 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2006-01-16 19:50:04 +03:00
/*
* Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2006-05-18 23:09:15 +04:00
* Copyright ( C ) 2004 - 2006 Red Hat , Inc . All rights reserved .
2006-01-16 19:50:04 +03:00
*/
# ifndef __GLOCK_DOT_H__
# define __GLOCK_DOT_H__
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
# include <linux/sched.h>
2009-01-12 13:43:39 +03:00
# include <linux/parser.h>
2006-09-05 18:39:21 +04:00
# include "incore.h"
2017-07-18 19:35:04 +03:00
# include "util.h"
2006-09-05 18:39:21 +04:00
2009-01-12 13:43:39 +03:00
/* Options for hostdata parser */
enum {
Opt_jid ,
Opt_id ,
Opt_first ,
Opt_nodir ,
Opt_err ,
} ;
/*
* lm_lockname types
*/
# define LM_TYPE_RESERVED 0x00
# define LM_TYPE_NONDISK 0x01
# define LM_TYPE_INODE 0x02
# define LM_TYPE_RGRP 0x03
# define LM_TYPE_META 0x04
# define LM_TYPE_IOPEN 0x05
# define LM_TYPE_FLOCK 0x06
# define LM_TYPE_PLOCK 0x07
# define LM_TYPE_QUOTA 0x08
# define LM_TYPE_JOURNAL 0x09
/*
* lm_lock ( ) states
*
* SHARED is compatible with SHARED , not with DEFERRED or EX .
* DEFERRED is compatible with DEFERRED , not with SHARED or EX .
*/
# define LM_ST_UNLOCKED 0
# define LM_ST_EXCLUSIVE 1
# define LM_ST_DEFERRED 2
# define LM_ST_SHARED 3
/*
* lm_lock ( ) flags
*
* LM_FLAG_TRY
* Don ' t wait to acquire the lock if it can ' t be granted immediately .
*
* LM_FLAG_TRY_1CB
* Send one blocking callback if TRY is set and the lock is not granted .
*
* LM_FLAG_NOEXP
* GFS sets this flag on lock requests it makes while doing journal recovery .
* These special requests should not be blocked due to the recovery like
* ordinary locks would be .
*
* LM_FLAG_ANY
* A SHARED request may also be granted in DEFERRED , or a DEFERRED request may
* also be granted in SHARED . The preferred state is whichever is compatible
* with other granted locks , or the specified state if no other locks exist .
*
2018-04-18 23:58:19 +03:00
* LM_FLAG_NODE_SCOPE
* This holder agrees to share the lock within this node . In other words ,
* the glock is held in EX mode according to DLM , but local holders on the
* same node can share it .
2009-01-12 13:43:39 +03:00
*/
2015-07-24 17:45:43 +03:00
# define LM_FLAG_TRY 0x0001
# define LM_FLAG_TRY_1CB 0x0002
# define LM_FLAG_NOEXP 0x0004
# define LM_FLAG_ANY 0x0008
2018-04-18 23:58:19 +03:00
# define LM_FLAG_NODE_SCOPE 0x0020
2015-07-24 17:45:43 +03:00
# define GL_ASYNC 0x0040
# define GL_EXACT 0x0080
# define GL_SKIP 0x0100
2022-04-05 23:07:30 +03:00
# define GL_NOPID 0x0200
2015-07-24 17:45:43 +03:00
# define GL_NOCACHE 0x0400
2009-01-12 13:43:39 +03:00
/*
2010-11-29 15:50:38 +03:00
* lm_async_cb return flags
2009-01-12 13:43:39 +03:00
*
* LM_OUT_ST_MASK
* Masks the lower two bits of lock state in the returned value .
*
* LM_OUT_CANCELED
* The lock request was canceled .
*
*/
# define LM_OUT_ST_MASK 0x00000003
# define LM_OUT_CANCELED 0x00000008
2010-11-29 15:50:38 +03:00
# define LM_OUT_ERROR 0x00000004
2009-01-12 13:43:39 +03:00
/*
* lm_recovery_done ( ) messages
*/
# define LM_RD_GAVEUP 308
# define LM_RD_SUCCESS 309
# define GLR_TRYFAILED 13
2011-06-15 19:41:48 +04:00
# define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
# define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
# define GL_GLOCK_MIN_HOLD (long)(10)
# define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
# define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
2009-01-12 13:43:39 +03:00
struct lm_lockops {
const char * lm_proto_name ;
2012-01-10 02:18:05 +04:00
int ( * lm_mount ) ( struct gfs2_sbd * sdp , const char * table ) ;
void ( * lm_first_done ) ( struct gfs2_sbd * sdp ) ;
void ( * lm_recovery_result ) ( struct gfs2_sbd * sdp , unsigned int jid ,
unsigned int result ) ;
void ( * lm_unmount ) ( struct gfs2_sbd * sdp ) ;
2009-01-12 13:43:39 +03:00
void ( * lm_withdraw ) ( struct gfs2_sbd * sdp ) ;
2011-01-19 12:30:01 +03:00
void ( * lm_put_lock ) ( struct gfs2_glock * gl ) ;
2010-11-29 15:50:38 +03:00
int ( * lm_lock ) ( struct gfs2_glock * gl , unsigned int req_state ,
unsigned int flags ) ;
2009-01-12 13:43:39 +03:00
void ( * lm_cancel ) ( struct gfs2_glock * gl ) ;
const match_table_t * lm_tokens ;
} ;
2022-05-08 13:06:30 +03:00
struct gfs2_glock_aspace {
struct gfs2_glock glock ;
struct address_space mapping ;
} ;
2008-02-22 19:07:18 +03:00
static inline struct gfs2_holder * gfs2_glock_is_locked_by_me ( struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
struct gfs2_holder * gh ;
2008-02-07 11:13:19 +03:00
struct pid * pid ;
2006-01-16 19:50:04 +03:00
/* Look in glock's list of holders for one with current task as owner */
2015-10-29 18:58:09 +03:00
spin_lock ( & gl - > gl_lockref . lock ) ;
2008-02-07 11:13:19 +03:00
pid = task_pid ( current ) ;
2006-01-16 19:50:04 +03:00
list_for_each_entry ( gh , & gl - > gl_holders , gh_list ) {
2008-05-21 20:03:22 +04:00
if ( ! test_bit ( HIF_HOLDER , & gh - > gh_iflags ) )
break ;
2008-02-22 19:07:18 +03:00
if ( gh - > gh_owner_pid = = pid )
goto out ;
2006-01-16 19:50:04 +03:00
}
2008-02-22 19:07:18 +03:00
gh = NULL ;
out :
2015-10-29 18:58:09 +03:00
spin_unlock ( & gl - > gl_lockref . lock ) ;
2006-01-16 19:50:04 +03:00
2008-02-22 19:07:18 +03:00
return gh ;
2006-01-16 19:50:04 +03:00
}
static inline int gfs2_glock_is_held_excl ( struct gfs2_glock * gl )
{
2006-09-04 17:49:55 +04:00
return gl - > gl_state = = LM_ST_EXCLUSIVE ;
2006-01-16 19:50:04 +03:00
}
static inline int gfs2_glock_is_held_dfrd ( struct gfs2_glock * gl )
{
2006-09-04 17:49:55 +04:00
return gl - > gl_state = = LM_ST_DEFERRED ;
2006-01-16 19:50:04 +03:00
}
static inline int gfs2_glock_is_held_shrd ( struct gfs2_glock * gl )
{
2006-09-04 17:49:55 +04:00
return gl - > gl_state = = LM_ST_SHARED ;
2006-01-16 19:50:04 +03:00
}
2009-12-08 15:12:13 +03:00
static inline struct address_space * gfs2_glock2aspace ( struct gfs2_glock * gl )
{
2022-05-08 13:06:30 +03:00
if ( gl - > gl_ops - > go_flags & GLOF_ASPACE ) {
struct gfs2_glock_aspace * gla =
container_of ( gl , struct gfs2_glock_aspace , glock ) ;
return & gla - > mapping ;
}
2009-12-08 15:12:13 +03:00
return NULL ;
}
2012-10-15 13:57:02 +04:00
extern int gfs2_glock_get ( struct gfs2_sbd * sdp , u64 number ,
const struct gfs2_glock_operations * glops ,
int create , struct gfs2_glock * * glp ) ;
2022-12-02 20:00:15 +03:00
extern struct gfs2_glock * gfs2_glock_hold ( struct gfs2_glock * gl ) ;
2012-10-15 13:57:02 +04:00
extern void gfs2_glock_put ( struct gfs2_glock * gl ) ;
2017-08-01 19:45:23 +03:00
extern void gfs2_glock_queue_put ( struct gfs2_glock * gl ) ;
2021-09-30 21:49:36 +03:00
extern void __gfs2_holder_init ( struct gfs2_glock * gl , unsigned int state ,
u16 flags , struct gfs2_holder * gh ,
unsigned long ip ) ;
static inline void gfs2_holder_init ( struct gfs2_glock * gl , unsigned int state ,
u16 flags , struct gfs2_holder * gh ) {
__gfs2_holder_init ( gl , state , flags , gh , _RET_IP_ ) ;
}
2015-07-24 17:45:43 +03:00
extern void gfs2_holder_reinit ( unsigned int state , u16 flags ,
2012-10-15 13:57:02 +04:00
struct gfs2_holder * gh ) ;
extern void gfs2_holder_uninit ( struct gfs2_holder * gh ) ;
extern int gfs2_glock_nq ( struct gfs2_holder * gh ) ;
extern int gfs2_glock_poll ( struct gfs2_holder * gh ) ;
gfs2: fix GL_SKIP node_scope problems
Before this patch, when a glock was locked, the very first holder on the
queue would unlock the lockref and call the go_instantiate glops function
(if one existed), unless GL_SKIP was specified. When we introduced the new
node-scope concept, we allowed multiple holders to lock glocks in EX mode
and share the lock.
But node-scope introduced a new problem: if the first holder has GL_SKIP
and the next one does NOT, since it is not the first holder on the queue,
the go_instantiate op was not called. Eventually the GL_SKIP holder may
call the instantiate sub-function (e.g. gfs2_rgrp_bh_get) but there was
still a window of time in which another non-GL_SKIP holder assumes the
instantiate function had been called by the first holder. In the case of
rgrp glocks, this led to a NULL pointer dereference on the buffer_heads.
This patch tries to fix the problem by introducing two new glock flags:
GLF_INSTANTIATE_NEEDED, which keeps track of when the instantiate function
needs to be called to "fill in" or "read in" the object before it is
referenced.
GLF_INSTANTIATE_IN_PROG which is used to determine when a process is
in the process of reading in the object. Whenever a function needs to
reference the object, it checks the GLF_INSTANTIATE_NEEDED flag, and if
set, it sets GLF_INSTANTIATE_IN_PROG and calls the glops "go_instantiate"
function.
As before, the gl_lockref spin_lock is unlocked during the IO operation,
which may take a relatively long amount of time to complete. While
unlocked, if another process determines go_instantiate is still needed,
it sees GLF_INSTANTIATE_IN_PROG is set, and waits for the go_instantiate
glop operation to be completed. Once GLF_INSTANTIATE_IN_PROG is cleared,
it needs to check GLF_INSTANTIATE_NEEDED again because the other process's
go_instantiate operation may not have been successful.
Functions that previously called the instantiate sub-functions now call
directly into gfs2_instantiate so the new bits are managed properly.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2021-10-06 17:29:18 +03:00
extern int gfs2_instantiate ( struct gfs2_holder * gh ) ;
2022-06-11 06:04:11 +03:00
extern int gfs2_glock_holder_ready ( struct gfs2_holder * gh ) ;
2012-10-15 13:57:02 +04:00
extern int gfs2_glock_wait ( struct gfs2_holder * gh ) ;
gfs2: Use async glocks for rename
Because s_vfs_rename_mutex is not cluster-wide, multiple nodes can
reverse the roles of which directories are "old" and which are "new" for
the purposes of rename. This can cause deadlocks where two nodes end up
waiting for each other.
There can be several layers of directory dependencies across many nodes.
This patch fixes the problem by acquiring all gfs2_rename's inode glocks
asychronously and waiting for all glocks to be acquired. That way all
inodes are locked regardless of the order.
The timeout value for multiple asynchronous glocks is calculated to be
the total of the individual wait times for each glock times two.
Since gfs2_exchange is very similar to gfs2_rename, both functions are
patched in the same way.
A new async glock wait queue, sd_async_glock_wait, keeps a list of
waiters for these events. If gfs2's holder_wake function detects an
async holder, it wakes up any waiters for the event. The waiter only
tests whether any of its requests are still pending.
Since the glocks are sent to dlm asychronously, the wait function needs
to check to see which glocks, if any, were granted.
If a glock is granted by dlm (and therefore held), its minimum hold time
is checked and adjusted as necessary, as other glock grants do.
If the event times out, all glocks held thus far must be dequeued to
resolve any existing deadlocks. Then, if there are any outstanding
locking requests, we need to loop around and wait for dlm to respond to
those requests too. After we release all requests, we return -ESTALE to
the caller (vfs rename) which loops around and retries the request.
Node1 Node2
--------- ---------
1. Enqueue A Enqueue B
2. Enqueue B Enqueue A
3. A granted
6. B granted
7. Wait for B
8. Wait for A
9. A times out (since Node 1 holds A)
10. Dequeue B (since it was granted)
11. Wait for all requests from DLM
12. B Granted (since Node2 released it in step 10)
13. Rename
14. Dequeue A
15. DLM Grants A
16. Dequeue A (due to the timeout and since we
no longer have B held for our task).
17. Dequeue B
18. Return -ESTALE to vfs
19. VFS retries the operation, goto step 1.
This release-all-locks / acquire-all-locks may slow rename / exchange
down as both nodes struggle in the same way and do the same thing.
However, this will only happen when there is contention for the same
inodes, which ought to be rare.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2019-08-30 20:31:02 +03:00
extern int gfs2_glock_async_wait ( unsigned int num_gh , struct gfs2_holder * ghs ) ;
2012-10-15 13:57:02 +04:00
extern void gfs2_glock_dq ( struct gfs2_holder * gh ) ;
extern void gfs2_glock_dq_wait ( struct gfs2_holder * gh ) ;
extern void gfs2_glock_dq_uninit ( struct gfs2_holder * gh ) ;
extern int gfs2_glock_nq_num ( struct gfs2_sbd * sdp , u64 number ,
const struct gfs2_glock_operations * glops ,
2015-07-24 17:45:43 +03:00
unsigned int state , u16 flags ,
2012-10-15 13:57:02 +04:00
struct gfs2_holder * gh ) ;
extern int gfs2_glock_nq_m ( unsigned int num_gh , struct gfs2_holder * ghs ) ;
extern void gfs2_glock_dq_m ( unsigned int num_gh , struct gfs2_holder * ghs ) ;
2019-05-09 17:21:48 +03:00
extern void gfs2_dump_glock ( struct seq_file * seq , struct gfs2_glock * gl ,
bool fsid ) ;
# define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
gfs2_dump_glock ( NULL , gl , true ) ; \
BUG ( ) ; } } while ( 0 )
2020-05-23 16:13:50 +03:00
# define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) { \
gfs2_dump_glock ( NULL , gl , true ) ; \
gfs2_assert_warn ( ( gl ) - > gl_name . ln_sbd , ( x ) ) ; } } \
while ( 0 )
# define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) { \
gfs2_dump_glock ( NULL , gl , true ) ; \
gfs2_assert_withdraw ( ( gl ) - > gl_name . ln_sbd , ( x ) ) ; } } \
while ( 0 )
2012-10-15 13:57:02 +04:00
extern __printf ( 2 , 3 )
2008-05-21 20:03:22 +04:00
void gfs2_print_dbg ( struct seq_file * seq , const char * fmt , . . . ) ;
2006-01-16 19:50:04 +03:00
2006-03-29 23:36:49 +04:00
/**
2010-10-16 17:19:22 +04:00
* gfs2_glock_nq_init - initialize a holder and enqueue it on a glock
2006-03-29 23:36:49 +04:00
* @ gl : the glock
* @ state : the state we ' re requesting
* @ flags : the modifier flags
* @ gh : the holder structure
*
* Returns : 0 , GLR_ * , or errno
*/
static inline int gfs2_glock_nq_init ( struct gfs2_glock * gl ,
2015-07-24 17:45:43 +03:00
unsigned int state , u16 flags ,
2006-03-29 23:36:49 +04:00
struct gfs2_holder * gh )
{
int error ;
2021-09-30 21:49:36 +03:00
__gfs2_holder_init ( gl , state , flags , gh , _RET_IP_ ) ;
2006-03-29 23:36:49 +04:00
error = gfs2_glock_nq ( gh ) ;
if ( error )
gfs2_holder_uninit ( gh ) ;
return error ;
}
2011-01-19 12:30:01 +03:00
extern void gfs2_glock_cb ( struct gfs2_glock * gl , unsigned int state ) ;
extern void gfs2_glock_complete ( struct gfs2_glock * gl , int ret ) ;
2022-12-21 02:52:51 +03:00
extern bool gfs2_queue_try_to_evict ( struct gfs2_glock * gl ) ;
2020-01-16 22:12:26 +03:00
extern void gfs2_cancel_delete_work ( struct gfs2_glock * gl ) ;
extern void gfs2_flush_delete_work ( struct gfs2_sbd * sdp ) ;
2011-01-19 12:30:01 +03:00
extern void gfs2_gl_hash_clear ( struct gfs2_sbd * sdp ) ;
2022-08-18 21:32:37 +03:00
extern void gfs2_gl_dq_holders ( struct gfs2_sbd * sdp ) ;
2011-01-19 12:30:01 +03:00
extern void gfs2_glock_thaw ( struct gfs2_sbd * sdp ) ;
2011-03-30 19:33:25 +04:00
extern void gfs2_glock_add_to_lru ( struct gfs2_glock * gl ) ;
2011-03-09 13:58:04 +03:00
extern void gfs2_glock_free ( struct gfs2_glock * gl ) ;
2011-01-19 12:30:01 +03:00
extern int __init gfs2_glock_init ( void ) ;
extern void gfs2_glock_exit ( void ) ;
2019-01-22 18:21:51 +03:00
extern void gfs2_create_debugfs_file ( struct gfs2_sbd * sdp ) ;
2011-01-19 12:30:01 +03:00
extern void gfs2_delete_debugfs_file ( struct gfs2_sbd * sdp ) ;
2019-01-22 18:21:51 +03:00
extern void gfs2_register_debugfs ( void ) ;
2011-01-19 12:30:01 +03:00
extern void gfs2_unregister_debugfs ( void ) ;
2006-09-07 22:40:21 +04:00
2022-12-05 16:44:37 +03:00
extern void glock_set_object ( struct gfs2_glock * gl , void * object ) ;
extern void glock_clear_object ( struct gfs2_glock * gl , void * object ) ;
2009-01-12 13:43:39 +03:00
extern const struct lm_lockops gfs2_dlm_ops ;
2016-06-17 15:31:27 +03:00
static inline void gfs2_holder_mark_uninitialized ( struct gfs2_holder * gh )
{
gh - > gh_gl = NULL ;
}
static inline bool gfs2_holder_initialized ( struct gfs2_holder * gh )
{
return gh - > gh_gl ;
}
gfs2: Use async glocks for rename
Because s_vfs_rename_mutex is not cluster-wide, multiple nodes can
reverse the roles of which directories are "old" and which are "new" for
the purposes of rename. This can cause deadlocks where two nodes end up
waiting for each other.
There can be several layers of directory dependencies across many nodes.
This patch fixes the problem by acquiring all gfs2_rename's inode glocks
asychronously and waiting for all glocks to be acquired. That way all
inodes are locked regardless of the order.
The timeout value for multiple asynchronous glocks is calculated to be
the total of the individual wait times for each glock times two.
Since gfs2_exchange is very similar to gfs2_rename, both functions are
patched in the same way.
A new async glock wait queue, sd_async_glock_wait, keeps a list of
waiters for these events. If gfs2's holder_wake function detects an
async holder, it wakes up any waiters for the event. The waiter only
tests whether any of its requests are still pending.
Since the glocks are sent to dlm asychronously, the wait function needs
to check to see which glocks, if any, were granted.
If a glock is granted by dlm (and therefore held), its minimum hold time
is checked and adjusted as necessary, as other glock grants do.
If the event times out, all glocks held thus far must be dequeued to
resolve any existing deadlocks. Then, if there are any outstanding
locking requests, we need to loop around and wait for dlm to respond to
those requests too. After we release all requests, we return -ESTALE to
the caller (vfs rename) which loops around and retries the request.
Node1 Node2
--------- ---------
1. Enqueue A Enqueue B
2. Enqueue B Enqueue A
3. A granted
6. B granted
7. Wait for B
8. Wait for A
9. A times out (since Node 1 holds A)
10. Dequeue B (since it was granted)
11. Wait for all requests from DLM
12. B Granted (since Node2 released it in step 10)
13. Rename
14. Dequeue A
15. DLM Grants A
16. Dequeue A (due to the timeout and since we
no longer have B held for our task).
17. Dequeue B
18. Return -ESTALE to vfs
19. VFS retries the operation, goto step 1.
This release-all-locks / acquire-all-locks may slow rename / exchange
down as both nodes struggle in the same way and do the same thing.
However, this will only happen when there is contention for the same
inodes, which ought to be rare.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2019-08-30 20:31:02 +03:00
static inline bool gfs2_holder_queued ( struct gfs2_holder * gh )
{
return ! list_empty ( & gh - > gh_list ) ;
}
2020-01-13 23:21:49 +03:00
extern void gfs2_inode_remember_delete ( struct gfs2_glock * gl , u64 generation ) ;
extern bool gfs2_inode_already_deleted ( struct gfs2_glock * gl , u64 generation ) ;
2006-01-16 19:50:04 +03:00
# endif /* __GLOCK_DOT_H__ */