2006-01-16 19:50:04 +03:00
/*
* Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2006-05-18 23:09:15 +04:00
* Copyright ( C ) 2004 - 2006 Red Hat , Inc . All rights reserved .
2006-01-16 19:50:04 +03:00
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
2006-09-01 19:05:15 +04:00
* of the GNU General Public License version 2.
2006-01-16 19:50:04 +03:00
*/
# include <linux/slab.h>
# include <linux/spinlock.h>
# include <linux/completion.h>
# include <linux/buffer_head.h>
2006-02-28 01:23:27 +03:00
# include <linux/gfs2_ondisk.h>
2006-09-19 09:56:29 +04:00
# include <linux/lm_interface.h>
2006-01-16 19:50:04 +03:00
# include "gfs2.h"
2006-02-28 01:23:27 +03:00
# include "incore.h"
2006-01-16 19:50:04 +03:00
# include "bmap.h"
# include "glock.h"
# include "glops.h"
# include "inode.h"
# include "log.h"
# include "meta_io.h"
# include "recovery.h"
# include "rgrp.h"
2006-02-28 01:23:27 +03:00
# include "util.h"
2006-10-03 19:10:41 +04:00
# include "trans.h"
2006-01-16 19:50:04 +03:00
2006-10-03 19:10:41 +04:00
/**
* ail_empty_gl - remove all buffers for a given lock from the AIL
* @ gl : the glock
*
* None of the buffers should be dirty , locked , or pinned .
*/
static void gfs2_ail_empty_gl ( struct gfs2_glock * gl )
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
unsigned int blocks ;
struct list_head * head = & gl - > gl_ail_list ;
struct gfs2_bufdata * bd ;
struct buffer_head * bh ;
int error ;
blocks = atomic_read ( & gl - > gl_ail_count ) ;
if ( ! blocks )
return ;
error = gfs2_trans_begin ( sdp , 0 , blocks ) ;
if ( gfs2_assert_withdraw ( sdp , ! error ) )
return ;
gfs2_log_lock ( sdp ) ;
while ( ! list_empty ( head ) ) {
bd = list_entry ( head - > next , struct gfs2_bufdata ,
bd_ail_gl_list ) ;
bh = bd - > bd_bh ;
2007-10-15 19:29:05 +04:00
gfs2_remove_from_ail ( bd ) ;
2007-09-03 14:01:33 +04:00
bd - > bd_bh = NULL ;
bh - > b_private = NULL ;
bd - > bd_blkno = bh - > b_blocknr ;
gfs2_assert_withdraw ( sdp , ! buffer_busy ( bh ) ) ;
gfs2_trans_add_revoke ( sdp , bd ) ;
2006-10-03 19:10:41 +04:00
}
gfs2_assert_withdraw ( sdp , ! atomic_read ( & gl - > gl_ail_count ) ) ;
gfs2_log_unlock ( sdp ) ;
gfs2_trans_end ( sdp ) ;
gfs2_log_flush ( sdp , NULL ) ;
}
2006-07-26 19:27:10 +04:00
/**
* gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
* @ gl : the glock
*
*/
static void gfs2_pte_inval ( struct gfs2_glock * gl )
{
struct gfs2_inode * ip ;
struct inode * inode ;
ip = gl - > gl_object ;
inode = & ip - > i_inode ;
2006-11-01 20:22:46 +03:00
if ( ! ip | | ! S_ISREG ( inode - > i_mode ) )
2006-07-26 19:27:10 +04:00
return ;
unmap_shared_mapping_range ( inode - > i_mapping , 0 , 0 ) ;
if ( test_bit ( GIF_SW_PAGED , & ip - > i_flags ) )
set_bit ( GLF_DIRTY , & gl - > gl_flags ) ;
}
2006-01-16 19:50:04 +03:00
/**
* meta_go_sync - sync out the metadata for this glock
* @ gl : the glock
*
* Called when demoting or unlocking an EX glock . We must flush
* to disk all dirty buffers / pages relating to this glock , and must not
* not return to caller to demote / unlock the glock until I / O is complete .
*/
2006-11-20 18:37:45 +03:00
static void meta_go_sync ( struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
2007-01-22 20:15:34 +03:00
if ( gl - > gl_state ! = LM_ST_EXCLUSIVE )
return ;
2006-01-16 19:50:04 +03:00
if ( test_and_clear_bit ( GLF_DIRTY , & gl - > gl_flags ) ) {
2006-04-07 19:17:32 +04:00
gfs2_log_flush ( gl - > gl_sbd , gl ) ;
2006-09-22 01:05:23 +04:00
gfs2_meta_sync ( gl ) ;
2006-11-20 18:37:45 +03:00
gfs2_ail_empty_gl ( gl ) ;
2006-01-16 19:50:04 +03:00
}
}
/**
* meta_go_inval - invalidate the metadata for this glock
* @ gl : the glock
* @ flags :
*
*/
static void meta_go_inval ( struct gfs2_glock * gl , int flags )
{
if ( ! ( flags & DIO_METADATA ) )
return ;
gfs2_meta_inval ( gl ) ;
gl - > gl_vn + + ;
}
2007-01-22 20:15:34 +03:00
/**
* inode_go_sync - Sync the dirty data and / or metadata for an inode glock
* @ gl : the glock protecting the inode
*
*/
static void inode_go_sync ( struct gfs2_glock * gl )
{
struct gfs2_inode * ip = gl - > gl_object ;
if ( ip & & ! S_ISREG ( ip - > i_inode . i_mode ) )
ip = NULL ;
if ( test_bit ( GLF_DIRTY , & gl - > gl_flags ) ) {
2007-08-16 19:03:57 +04:00
if ( ip & & ! gfs2_is_jdata ( ip ) )
2007-01-22 20:15:34 +03:00
filemap_fdatawrite ( ip - > i_inode . i_mapping ) ;
[GFS2] flush the glock completely in inode_go_sync
Fix for bz #231910
When filemap_fdatawrite() is called on the inode mapping in data=ordered mode,
it will add the glock to the log. In inode_go_sync(), if you do the
gfs2_log_flush() before this, after the filemap_fdatawrite() call, the glock
and its associated data buffers will be on the log again. This means you can
demote a lock from exclusive, without having it flushed from the log. The
attached patch simply moves the gfs2_log_flush up to after the
filemap_fdatawrite() call.
Originally, I tried moving the gfs2_log_flush to after gfs2_meta_sync(), but
that caused me to trip the following assert.
GFS2: fsid=cypher-36:test.0: fatal: assertion "!buffer_busy(bh)" failed
GFS2: fsid=cypher-36:test.0: function = gfs2_ail_empty_gl, file = fs/gfs2/glops.c, line = 61
It appears that gfs2_log_flush() puts some of the glocks buffers in the busy
state and the filemap_fdatawrite() call is necessary to flush them. This makes
me worry slightly that a related problem could happen because of moving the
gfs2_log_flush() after the initial filemap_fdatawrite(), but I assume that
gfs2_ail_empty_gl() would catch that case as well.
Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2007-05-02 18:44:03 +04:00
gfs2_log_flush ( gl - > gl_sbd , gl ) ;
2007-08-16 19:03:57 +04:00
if ( ip & & gfs2_is_jdata ( ip ) )
filemap_fdatawrite ( ip - > i_inode . i_mapping ) ;
2007-01-22 20:15:34 +03:00
gfs2_meta_sync ( gl ) ;
if ( ip ) {
struct address_space * mapping = ip - > i_inode . i_mapping ;
int error = filemap_fdatawait ( mapping ) ;
2007-05-08 11:23:25 +04:00
mapping_set_error ( mapping , error ) ;
2007-01-22 20:15:34 +03:00
}
clear_bit ( GLF_DIRTY , & gl - > gl_flags ) ;
gfs2_ail_empty_gl ( gl ) ;
}
}
2006-01-16 19:50:04 +03:00
/**
* inode_go_xmote_th - promote / demote a glock
* @ gl : the glock
* @ state : the requested state
* @ flags :
*
*/
2007-01-22 20:15:34 +03:00
static void inode_go_xmote_th ( struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
if ( gl - > gl_state ! = LM_ST_UNLOCKED )
gfs2_pte_inval ( gl ) ;
2007-01-22 20:15:34 +03:00
if ( gl - > gl_state = = LM_ST_EXCLUSIVE )
inode_go_sync ( gl ) ;
2006-01-16 19:50:04 +03:00
}
/**
* inode_go_xmote_bh - After promoting / demoting a glock
* @ gl : the glock
*
*/
static void inode_go_xmote_bh ( struct gfs2_glock * gl )
{
struct gfs2_holder * gh = gl - > gl_req_gh ;
struct buffer_head * bh ;
int error ;
if ( gl - > gl_state ! = LM_ST_UNLOCKED & &
( ! gh | | ! ( gh - > gh_flags & GL_SKIP ) ) ) {
2006-09-22 01:05:23 +04:00
error = gfs2_meta_read ( gl , gl - > gl_name . ln_number , 0 , & bh ) ;
2006-01-16 19:50:04 +03:00
if ( ! error )
brelse ( bh ) ;
}
}
/**
* inode_go_drop_th - unlock a glock
* @ gl : the glock
*
* Invoked from rq_demote ( ) .
* Another node needs the lock in EXCLUSIVE mode , or lock ( unused for too long )
* is being purged from our node ' s glock cache ; we ' re dropping lock .
*/
static void inode_go_drop_th ( struct gfs2_glock * gl )
{
gfs2_pte_inval ( gl ) ;
2007-01-22 20:15:34 +03:00
if ( gl - > gl_state = = LM_ST_EXCLUSIVE )
inode_go_sync ( gl ) ;
2006-01-16 19:50:04 +03:00
}
/**
* inode_go_inval - prepare a inode glock to be released
* @ gl : the glock
* @ flags :
*
*/
static void inode_go_inval ( struct gfs2_glock * gl , int flags )
{
2006-11-23 18:51:34 +03:00
struct gfs2_inode * ip = gl - > gl_object ;
2006-01-16 19:50:04 +03:00
int meta = ( flags & DIO_METADATA ) ;
if ( meta ) {
gfs2_meta_inval ( gl ) ;
2006-11-23 18:51:34 +03:00
if ( ip )
set_bit ( GIF_INVALID , & ip - > i_flags ) ;
}
2007-10-15 18:40:33 +04:00
if ( ip & & S_ISREG ( ip - > i_inode . i_mode ) )
2006-11-23 18:51:34 +03:00
truncate_inode_pages ( ip - > i_inode . i_mapping , 0 ) ;
2006-01-16 19:50:04 +03:00
}
/**
* inode_go_demote_ok - Check to see if it ' s ok to unlock an inode glock
* @ gl : the glock
*
* Returns : 1 if it ' s ok
*/
static int inode_go_demote_ok ( struct gfs2_glock * gl )
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
int demote = 0 ;
2006-02-28 01:23:27 +03:00
if ( ! gl - > gl_object & & ! gl - > gl_aspace - > i_mapping - > nrpages )
2006-01-16 19:50:04 +03:00
demote = 1 ;
else if ( ! sdp - > sd_args . ar_localcaching & &
time_after_eq ( jiffies , gl - > gl_stamp +
gfs2_tune_get ( sdp , gt_demote_secs ) * HZ ) )
demote = 1 ;
return demote ;
}
/**
* inode_go_lock - operation done after an inode lock is locked by a process
* @ gl : the glock
* @ flags :
*
* Returns : errno
*/
static int inode_go_lock ( struct gfs2_holder * gh )
{
struct gfs2_glock * gl = gh - > gh_gl ;
2006-02-28 01:23:27 +03:00
struct gfs2_inode * ip = gl - > gl_object ;
2006-01-16 19:50:04 +03:00
int error = 0 ;
if ( ! ip )
return 0 ;
2006-11-02 00:05:38 +03:00
if ( test_bit ( GIF_INVALID , & ip - > i_flags ) ) {
2006-01-16 19:50:04 +03:00
error = gfs2_inode_refresh ( ip ) ;
if ( error )
return error ;
}
if ( ( ip - > i_di . di_flags & GFS2_DIF_TRUNC_IN_PROG ) & &
( gl - > gl_state = = LM_ST_EXCLUSIVE ) & &
2007-01-22 20:10:39 +03:00
( gh - > gh_state = = LM_ST_EXCLUSIVE ) )
2006-01-16 19:50:04 +03:00
error = gfs2_truncatei_resume ( ip ) ;
return error ;
}
/**
* rgrp_go_demote_ok - Check to see if it ' s ok to unlock a RG ' s glock
* @ gl : the glock
*
* Returns : 1 if it ' s ok
*/
static int rgrp_go_demote_ok ( struct gfs2_glock * gl )
{
return ! gl - > gl_aspace - > i_mapping - > nrpages ;
}
/**
* rgrp_go_lock - operation done after an rgrp lock is locked by
* a first holder on this node .
* @ gl : the glock
* @ flags :
*
* Returns : errno
*/
static int rgrp_go_lock ( struct gfs2_holder * gh )
{
2006-02-28 01:23:27 +03:00
return gfs2_rgrp_bh_get ( gh - > gh_gl - > gl_object ) ;
2006-01-16 19:50:04 +03:00
}
/**
* rgrp_go_unlock - operation done before an rgrp lock is unlocked by
* a last holder on this node .
* @ gl : the glock
* @ flags :
*
*/
static void rgrp_go_unlock ( struct gfs2_holder * gh )
{
2006-02-28 01:23:27 +03:00
gfs2_rgrp_bh_put ( gh - > gh_gl - > gl_object ) ;
2006-01-16 19:50:04 +03:00
}
/**
* trans_go_xmote_th - promote / demote the transaction glock
* @ gl : the glock
* @ state : the requested state
* @ flags :
*
*/
2007-01-22 20:15:34 +03:00
static void trans_go_xmote_th ( struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
if ( gl - > gl_state ! = LM_ST_UNLOCKED & &
test_bit ( SDF_JOURNAL_LIVE , & sdp - > sd_flags ) ) {
gfs2_meta_syncfs ( sdp ) ;
gfs2_log_shutdown ( sdp ) ;
}
}
/**
* trans_go_xmote_bh - After promoting / demoting the transaction glock
* @ gl : the glock
*
*/
static void trans_go_xmote_bh ( struct gfs2_glock * gl )
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
2006-06-14 23:32:57 +04:00
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_jdesc - > jd_inode ) ;
2006-02-28 01:23:27 +03:00
struct gfs2_glock * j_gl = ip - > i_gl ;
2006-10-14 05:47:13 +04:00
struct gfs2_log_header_host head ;
2006-01-16 19:50:04 +03:00
int error ;
if ( gl - > gl_state ! = LM_ST_UNLOCKED & &
test_bit ( SDF_JOURNAL_LIVE , & sdp - > sd_flags ) ) {
2006-11-20 18:37:45 +03:00
j_gl - > gl_ops - > go_inval ( j_gl , DIO_METADATA ) ;
2006-01-16 19:50:04 +03:00
error = gfs2_find_jhead ( sdp - > sd_jdesc , & head ) ;
if ( error )
gfs2_consist ( sdp ) ;
if ( ! ( head . lh_flags & GFS2_LOG_HEAD_UNMOUNT ) )
gfs2_consist ( sdp ) ;
/* Initialize some head of the log stuff */
if ( ! test_bit ( SDF_SHUTDOWN , & sdp - > sd_flags ) ) {
sdp - > sd_log_sequence = head . lh_sequence + 1 ;
gfs2_log_pointers_init ( sdp , head . lh_blkno ) ;
}
}
}
/**
* trans_go_drop_th - unlock the transaction glock
* @ gl : the glock
*
* We want to sync the device even with localcaching . Remember
* that localcaching journal replay only marks buffers dirty .
*/
static void trans_go_drop_th ( struct gfs2_glock * gl )
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
if ( test_bit ( SDF_JOURNAL_LIVE , & sdp - > sd_flags ) ) {
gfs2_meta_syncfs ( sdp ) ;
gfs2_log_shutdown ( sdp ) ;
}
}
/**
* quota_go_demote_ok - Check to see if it ' s ok to unlock a quota glock
* @ gl : the glock
*
* Returns : 1 if it ' s ok
*/
static int quota_go_demote_ok ( struct gfs2_glock * gl )
{
return ! atomic_read ( & gl - > gl_lvb_count ) ;
}
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_meta_glops = {
2007-01-22 20:15:34 +03:00
. go_xmote_th = meta_go_sync ,
. go_drop_th = meta_go_sync ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_META ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_inode_glops = {
2006-01-16 19:50:04 +03:00
. go_xmote_th = inode_go_xmote_th ,
. go_xmote_bh = inode_go_xmote_bh ,
. go_drop_th = inode_go_drop_th ,
. go_inval = inode_go_inval ,
. go_demote_ok = inode_go_demote_ok ,
. go_lock = inode_go_lock ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_INODE ,
[GFS2] delay glock demote for a minimum hold time
When a lot of IO, with some distributed mmap IO, is run on a GFS2 filesystem in
a cluster, it will deadlock. The reason is that do_no_page() will repeatedly
call gfs2_sharewrite_nopage(), because each node keeps giving up the glock
too early, and is forced to call unmap_mapping_range(). This bumps the
mapping->truncate_count sequence count, forcing do_no_page() to retry. This
patch institutes a minimum glock hold time a tenth a second. This insures
that even in heavy contention cases, the node has enough time to get some
useful work done before it gives up the glock.
A second issue is that when gfs2_glock_dq() is called from within a page fault
to demote a lock, and the associated page needs to be written out, it will
try to acqire a lock on it, but it has already been locked at a higher level.
This patch puts makes gfs2_glock_dq() use the work queue as well, to avoid this
issue. This is the same patch as Steve Whitehouse originally proposed to fix
this issue, execpt that gfs2_glock_dq() now grabs a reference to the glock
before it queues up the work on it.
Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2007-08-23 22:19:05 +04:00
. go_min_hold_time = HZ / 10 ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_rgrp_glops = {
2007-02-28 17:03:00 +03:00
. go_xmote_th = meta_go_sync ,
. go_drop_th = meta_go_sync ,
2006-01-16 19:50:04 +03:00
. go_inval = meta_go_inval ,
. go_demote_ok = rgrp_go_demote_ok ,
. go_lock = rgrp_go_lock ,
. go_unlock = rgrp_go_unlock ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_RGRP ,
[GFS2] delay glock demote for a minimum hold time
When a lot of IO, with some distributed mmap IO, is run on a GFS2 filesystem in
a cluster, it will deadlock. The reason is that do_no_page() will repeatedly
call gfs2_sharewrite_nopage(), because each node keeps giving up the glock
too early, and is forced to call unmap_mapping_range(). This bumps the
mapping->truncate_count sequence count, forcing do_no_page() to retry. This
patch institutes a minimum glock hold time a tenth a second. This insures
that even in heavy contention cases, the node has enough time to get some
useful work done before it gives up the glock.
A second issue is that when gfs2_glock_dq() is called from within a page fault
to demote a lock, and the associated page needs to be written out, it will
try to acqire a lock on it, but it has already been locked at a higher level.
This patch puts makes gfs2_glock_dq() use the work queue as well, to avoid this
issue. This is the same patch as Steve Whitehouse originally proposed to fix
this issue, execpt that gfs2_glock_dq() now grabs a reference to the glock
before it queues up the work on it.
Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2007-08-23 22:19:05 +04:00
. go_min_hold_time = HZ / 10 ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_trans_glops = {
2006-01-16 19:50:04 +03:00
. go_xmote_th = trans_go_xmote_th ,
. go_xmote_bh = trans_go_xmote_bh ,
. go_drop_th = trans_go_drop_th ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_NONDISK ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_iopen_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_IOPEN ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_flock_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_FLOCK ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_nondisk_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_NONDISK ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_quota_glops = {
2006-01-16 19:50:04 +03:00
. go_demote_ok = quota_go_demote_ok ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_QUOTA ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_journal_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_JOURNAL ,
2006-01-16 19:50:04 +03:00
} ;