2006-01-16 19:50:04 +03:00
/*
* Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2008-01-31 19:31:39 +03:00
* Copyright ( C ) 2004 - 2008 Red Hat , Inc . All rights reserved .
2006-01-16 19:50:04 +03:00
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
2006-09-01 19:05:15 +04:00
* of the GNU General Public License version 2.
2006-01-16 19:50:04 +03:00
*/
# include <linux/slab.h>
# include <linux/spinlock.h>
# include <linux/completion.h>
# include <linux/buffer_head.h>
2006-02-28 01:23:27 +03:00
# include <linux/gfs2_ondisk.h>
2008-05-21 20:03:22 +04:00
# include <linux/bio.h>
2006-01-16 19:50:04 +03:00
# include "gfs2.h"
2006-02-28 01:23:27 +03:00
# include "incore.h"
2006-01-16 19:50:04 +03:00
# include "bmap.h"
# include "glock.h"
# include "glops.h"
# include "inode.h"
# include "log.h"
# include "meta_io.h"
# include "recovery.h"
# include "rgrp.h"
2006-02-28 01:23:27 +03:00
# include "util.h"
2006-10-03 19:10:41 +04:00
# include "trans.h"
2006-01-16 19:50:04 +03:00
2006-10-03 19:10:41 +04:00
/**
* ail_empty_gl - remove all buffers for a given lock from the AIL
* @ gl : the glock
*
* None of the buffers should be dirty , locked , or pinned .
*/
static void gfs2_ail_empty_gl ( struct gfs2_glock * gl )
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
struct list_head * head = & gl - > gl_ail_list ;
struct gfs2_bufdata * bd ;
struct buffer_head * bh ;
2009-02-05 13:12:38 +03:00
struct gfs2_trans tr ;
2006-10-03 19:10:41 +04:00
2009-02-05 13:12:38 +03:00
memset ( & tr , 0 , sizeof ( tr ) ) ;
tr . tr_revokes = atomic_read ( & gl - > gl_ail_count ) ;
2006-10-03 19:10:41 +04:00
2009-02-05 13:12:38 +03:00
if ( ! tr . tr_revokes )
2006-10-03 19:10:41 +04:00
return ;
2009-02-05 13:12:38 +03:00
/* A shortened, inline version of gfs2_trans_begin() */
tr . tr_reserved = 1 + gfs2_struct2blk ( sdp , tr . tr_revokes , sizeof ( u64 ) ) ;
tr . tr_ip = ( unsigned long ) __builtin_return_address ( 0 ) ;
INIT_LIST_HEAD ( & tr . tr_list_buf ) ;
gfs2_log_reserve ( sdp , tr . tr_reserved ) ;
BUG_ON ( current - > journal_info ) ;
current - > journal_info = & tr ;
2006-10-03 19:10:41 +04:00
gfs2_log_lock ( sdp ) ;
while ( ! list_empty ( head ) ) {
bd = list_entry ( head - > next , struct gfs2_bufdata ,
bd_ail_gl_list ) ;
bh = bd - > bd_bh ;
2007-10-15 19:29:05 +04:00
gfs2_remove_from_ail ( bd ) ;
2007-09-03 14:01:33 +04:00
bd - > bd_bh = NULL ;
bh - > b_private = NULL ;
bd - > bd_blkno = bh - > b_blocknr ;
gfs2_assert_withdraw ( sdp , ! buffer_busy ( bh ) ) ;
gfs2_trans_add_revoke ( sdp , bd ) ;
2006-10-03 19:10:41 +04:00
}
gfs2_assert_withdraw ( sdp , ! atomic_read ( & gl - > gl_ail_count ) ) ;
gfs2_log_unlock ( sdp ) ;
gfs2_trans_end ( sdp ) ;
gfs2_log_flush ( sdp , NULL ) ;
}
2006-07-26 19:27:10 +04:00
/**
* gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
* @ gl : the glock
*
*/
static void gfs2_pte_inval ( struct gfs2_glock * gl )
{
struct gfs2_inode * ip ;
struct inode * inode ;
ip = gl - > gl_object ;
inode = & ip - > i_inode ;
2006-11-01 20:22:46 +03:00
if ( ! ip | | ! S_ISREG ( inode - > i_mode ) )
2006-07-26 19:27:10 +04:00
return ;
unmap_shared_mapping_range ( inode - > i_mapping , 0 , 0 ) ;
if ( test_bit ( GIF_SW_PAGED , & ip - > i_flags ) )
set_bit ( GLF_DIRTY , & gl - > gl_flags ) ;
}
2006-01-16 19:50:04 +03:00
/**
* meta_go_sync - sync out the metadata for this glock
* @ gl : the glock
*
* Called when demoting or unlocking an EX glock . We must flush
* to disk all dirty buffers / pages relating to this glock , and must not
* not return to caller to demote / unlock the glock until I / O is complete .
*/
2006-11-20 18:37:45 +03:00
static void meta_go_sync ( struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
2007-01-22 20:15:34 +03:00
if ( gl - > gl_state ! = LM_ST_EXCLUSIVE )
return ;
2006-01-16 19:50:04 +03:00
if ( test_and_clear_bit ( GLF_DIRTY , & gl - > gl_flags ) ) {
2006-04-07 19:17:32 +04:00
gfs2_log_flush ( gl - > gl_sbd , gl ) ;
2006-09-22 01:05:23 +04:00
gfs2_meta_sync ( gl ) ;
2006-11-20 18:37:45 +03:00
gfs2_ail_empty_gl ( gl ) ;
2006-01-16 19:50:04 +03:00
}
}
/**
* meta_go_inval - invalidate the metadata for this glock
* @ gl : the glock
* @ flags :
*
*/
static void meta_go_inval ( struct gfs2_glock * gl , int flags )
{
if ( ! ( flags & DIO_METADATA ) )
return ;
gfs2_meta_inval ( gl ) ;
2008-01-31 19:31:39 +03:00
if ( gl - > gl_object = = GFS2_I ( gl - > gl_sbd - > sd_rindex ) )
gl - > gl_sbd - > sd_rindex_uptodate = 0 ;
else if ( gl - > gl_ops = = & gfs2_rgrp_glops & & gl - > gl_object ) {
struct gfs2_rgrpd * rgd = ( struct gfs2_rgrpd * ) gl - > gl_object ;
rgd - > rd_flags & = ~ GFS2_RDF_UPTODATE ;
}
2006-01-16 19:50:04 +03:00
}
2007-01-22 20:15:34 +03:00
/**
* inode_go_sync - Sync the dirty data and / or metadata for an inode glock
* @ gl : the glock protecting the inode
*
*/
static void inode_go_sync ( struct gfs2_glock * gl )
{
struct gfs2_inode * ip = gl - > gl_object ;
2007-11-02 11:39:34 +03:00
struct address_space * metamapping = gl - > gl_aspace - > i_mapping ;
int error ;
if ( gl - > gl_state ! = LM_ST_UNLOCKED )
gfs2_pte_inval ( gl ) ;
if ( gl - > gl_state ! = LM_ST_EXCLUSIVE )
return ;
2007-01-22 20:15:34 +03:00
if ( ip & & ! S_ISREG ( ip - > i_inode . i_mode ) )
ip = NULL ;
if ( test_bit ( GLF_DIRTY , & gl - > gl_flags ) ) {
[GFS2] flush the glock completely in inode_go_sync
Fix for bz #231910
When filemap_fdatawrite() is called on the inode mapping in data=ordered mode,
it will add the glock to the log. In inode_go_sync(), if you do the
gfs2_log_flush() before this, after the filemap_fdatawrite() call, the glock
and its associated data buffers will be on the log again. This means you can
demote a lock from exclusive, without having it flushed from the log. The
attached patch simply moves the gfs2_log_flush up to after the
filemap_fdatawrite() call.
Originally, I tried moving the gfs2_log_flush to after gfs2_meta_sync(), but
that caused me to trip the following assert.
GFS2: fsid=cypher-36:test.0: fatal: assertion "!buffer_busy(bh)" failed
GFS2: fsid=cypher-36:test.0: function = gfs2_ail_empty_gl, file = fs/gfs2/glops.c, line = 61
It appears that gfs2_log_flush() puts some of the glocks buffers in the busy
state and the filemap_fdatawrite() call is necessary to flush them. This makes
me worry slightly that a related problem could happen because of moving the
gfs2_log_flush() after the initial filemap_fdatawrite(), but I assume that
gfs2_ail_empty_gl() would catch that case as well.
Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2007-05-02 18:44:03 +04:00
gfs2_log_flush ( gl - > gl_sbd , gl ) ;
2007-11-02 11:39:34 +03:00
filemap_fdatawrite ( metamapping ) ;
2007-01-22 20:15:34 +03:00
if ( ip ) {
struct address_space * mapping = ip - > i_inode . i_mapping ;
2007-11-02 11:39:34 +03:00
filemap_fdatawrite ( mapping ) ;
error = filemap_fdatawait ( mapping ) ;
2007-05-08 11:23:25 +04:00
mapping_set_error ( mapping , error ) ;
2007-01-22 20:15:34 +03:00
}
2007-11-02 11:39:34 +03:00
error = filemap_fdatawait ( metamapping ) ;
mapping_set_error ( metamapping , error ) ;
2007-01-22 20:15:34 +03:00
clear_bit ( GLF_DIRTY , & gl - > gl_flags ) ;
gfs2_ail_empty_gl ( gl ) ;
}
}
2006-01-16 19:50:04 +03:00
/**
* inode_go_inval - prepare a inode glock to be released
* @ gl : the glock
* @ flags :
*
*/
static void inode_go_inval ( struct gfs2_glock * gl , int flags )
{
2006-11-23 18:51:34 +03:00
struct gfs2_inode * ip = gl - > gl_object ;
2006-01-16 19:50:04 +03:00
int meta = ( flags & DIO_METADATA ) ;
if ( meta ) {
gfs2_meta_inval ( gl ) ;
2006-11-23 18:51:34 +03:00
if ( ip )
set_bit ( GIF_INVALID , & ip - > i_flags ) ;
}
2007-10-15 18:40:33 +04:00
if ( ip & & S_ISREG ( ip - > i_inode . i_mode ) )
2006-11-23 18:51:34 +03:00
truncate_inode_pages ( ip - > i_inode . i_mapping , 0 ) ;
2006-01-16 19:50:04 +03:00
}
/**
* inode_go_demote_ok - Check to see if it ' s ok to unlock an inode glock
* @ gl : the glock
*
* Returns : 1 if it ' s ok
*/
2008-11-20 16:39:47 +03:00
static int inode_go_demote_ok ( const struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
2008-11-20 16:39:47 +03:00
if ( sdp - > sd_jindex = = gl - > gl_object | | sdp - > sd_rindex = = gl - > gl_object )
return 0 ;
return 1 ;
2006-01-16 19:50:04 +03:00
}
/**
* inode_go_lock - operation done after an inode lock is locked by a process
* @ gl : the glock
* @ flags :
*
* Returns : errno
*/
static int inode_go_lock ( struct gfs2_holder * gh )
{
struct gfs2_glock * gl = gh - > gh_gl ;
2008-11-18 16:38:48 +03:00
struct gfs2_sbd * sdp = gl - > gl_sbd ;
2006-02-28 01:23:27 +03:00
struct gfs2_inode * ip = gl - > gl_object ;
2006-01-16 19:50:04 +03:00
int error = 0 ;
2008-04-29 21:35:48 +04:00
if ( ! ip | | ( gh - > gh_flags & GL_SKIP ) )
2006-01-16 19:50:04 +03:00
return 0 ;
2006-11-02 00:05:38 +03:00
if ( test_bit ( GIF_INVALID , & ip - > i_flags ) ) {
2006-01-16 19:50:04 +03:00
error = gfs2_inode_refresh ( ip ) ;
if ( error )
return error ;
}
2008-11-04 13:05:22 +03:00
if ( ( ip - > i_diskflags & GFS2_DIF_TRUNC_IN_PROG ) & &
2006-01-16 19:50:04 +03:00
( gl - > gl_state = = LM_ST_EXCLUSIVE ) & &
2008-11-18 16:38:48 +03:00
( gh - > gh_state = = LM_ST_EXCLUSIVE ) ) {
spin_lock ( & sdp - > sd_trunc_lock ) ;
if ( list_empty ( & ip - > i_trunc_list ) )
list_add ( & sdp - > sd_trunc_list , & ip - > i_trunc_list ) ;
spin_unlock ( & sdp - > sd_trunc_lock ) ;
wake_up ( & sdp - > sd_quota_wait ) ;
return 1 ;
}
2006-01-16 19:50:04 +03:00
return error ;
}
2008-05-21 20:03:22 +04:00
/**
* inode_go_dump - print information about an inode
* @ seq : The iterator
* @ ip : the inode
*
* Returns : 0 on success , - ENOBUFS when we run out of space
*/
static int inode_go_dump ( struct seq_file * seq , const struct gfs2_glock * gl )
{
const struct gfs2_inode * ip = gl - > gl_object ;
if ( ip = = NULL )
return 0 ;
2008-11-10 13:10:12 +03:00
gfs2_print_dbg ( seq , " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu \n " ,
2008-05-21 20:03:22 +04:00
( unsigned long long ) ip - > i_no_formal_ino ,
( unsigned long long ) ip - > i_no_addr ,
2008-11-10 13:10:12 +03:00
IF2DT ( ip - > i_inode . i_mode ) , ip - > i_flags ,
( unsigned int ) ip - > i_diskflags ,
( unsigned long long ) ip - > i_inode . i_size ,
( unsigned long long ) ip - > i_disksize ) ;
2008-05-21 20:03:22 +04:00
return 0 ;
}
2006-01-16 19:50:04 +03:00
/**
* rgrp_go_demote_ok - Check to see if it ' s ok to unlock a RG ' s glock
* @ gl : the glock
*
* Returns : 1 if it ' s ok
*/
2008-11-20 16:39:47 +03:00
static int rgrp_go_demote_ok ( const struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
return ! gl - > gl_aspace - > i_mapping - > nrpages ;
}
/**
* rgrp_go_lock - operation done after an rgrp lock is locked by
* a first holder on this node .
* @ gl : the glock
* @ flags :
*
* Returns : errno
*/
static int rgrp_go_lock ( struct gfs2_holder * gh )
{
2006-02-28 01:23:27 +03:00
return gfs2_rgrp_bh_get ( gh - > gh_gl - > gl_object ) ;
2006-01-16 19:50:04 +03:00
}
/**
* rgrp_go_unlock - operation done before an rgrp lock is unlocked by
* a last holder on this node .
* @ gl : the glock
* @ flags :
*
*/
static void rgrp_go_unlock ( struct gfs2_holder * gh )
{
2006-02-28 01:23:27 +03:00
gfs2_rgrp_bh_put ( gh - > gh_gl - > gl_object ) ;
2006-01-16 19:50:04 +03:00
}
2008-05-21 20:03:22 +04:00
/**
* rgrp_go_dump - print out an rgrp
* @ seq : The iterator
* @ gl : The glock in question
*
*/
static int rgrp_go_dump ( struct seq_file * seq , const struct gfs2_glock * gl )
{
const struct gfs2_rgrpd * rgd = gl - > gl_object ;
if ( rgd = = NULL )
return 0 ;
2008-11-10 13:10:12 +03:00
gfs2_print_dbg ( seq , " R: n:%llu f:%02x b:%u/%u i:%u \n " ,
( unsigned long long ) rgd - > rd_addr , rgd - > rd_flags ,
rgd - > rd_free , rgd - > rd_free_clone , rgd - > rd_dinodes ) ;
2008-05-21 20:03:22 +04:00
return 0 ;
}
2006-01-16 19:50:04 +03:00
/**
2007-11-02 11:39:34 +03:00
* trans_go_sync - promote / demote the transaction glock
2006-01-16 19:50:04 +03:00
* @ gl : the glock
* @ state : the requested state
* @ flags :
*
*/
2007-11-02 11:39:34 +03:00
static void trans_go_sync ( struct gfs2_glock * gl )
2006-01-16 19:50:04 +03:00
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
if ( gl - > gl_state ! = LM_ST_UNLOCKED & &
test_bit ( SDF_JOURNAL_LIVE , & sdp - > sd_flags ) ) {
gfs2_meta_syncfs ( sdp ) ;
gfs2_log_shutdown ( sdp ) ;
}
}
/**
* trans_go_xmote_bh - After promoting / demoting the transaction glock
* @ gl : the glock
*
*/
2008-05-21 20:03:22 +04:00
static int trans_go_xmote_bh ( struct gfs2_glock * gl , struct gfs2_holder * gh )
2006-01-16 19:50:04 +03:00
{
struct gfs2_sbd * sdp = gl - > gl_sbd ;
2006-06-14 23:32:57 +04:00
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_jdesc - > jd_inode ) ;
2006-02-28 01:23:27 +03:00
struct gfs2_glock * j_gl = ip - > i_gl ;
2006-10-14 05:47:13 +04:00
struct gfs2_log_header_host head ;
2006-01-16 19:50:04 +03:00
int error ;
2008-05-21 20:03:22 +04:00
if ( test_bit ( SDF_JOURNAL_LIVE , & sdp - > sd_flags ) ) {
2006-11-20 18:37:45 +03:00
j_gl - > gl_ops - > go_inval ( j_gl , DIO_METADATA ) ;
2006-01-16 19:50:04 +03:00
error = gfs2_find_jhead ( sdp - > sd_jdesc , & head ) ;
if ( error )
gfs2_consist ( sdp ) ;
if ( ! ( head . lh_flags & GFS2_LOG_HEAD_UNMOUNT ) )
gfs2_consist ( sdp ) ;
/* Initialize some head of the log stuff */
if ( ! test_bit ( SDF_SHUTDOWN , & sdp - > sd_flags ) ) {
sdp - > sd_log_sequence = head . lh_sequence + 1 ;
gfs2_log_pointers_init ( sdp , head . lh_blkno ) ;
}
}
2008-05-21 20:03:22 +04:00
return 0 ;
2006-01-16 19:50:04 +03:00
}
2008-11-20 16:39:47 +03:00
/**
* trans_go_demote_ok
* @ gl : the glock
*
* Always returns 0
*/
static int trans_go_demote_ok ( const struct gfs2_glock * gl )
{
return 0 ;
}
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_meta_glops = {
2007-01-22 20:15:34 +03:00
. go_xmote_th = meta_go_sync ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_META ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_inode_glops = {
2007-11-02 11:39:34 +03:00
. go_xmote_th = inode_go_sync ,
2006-01-16 19:50:04 +03:00
. go_inval = inode_go_inval ,
. go_demote_ok = inode_go_demote_ok ,
. go_lock = inode_go_lock ,
2008-05-21 20:03:22 +04:00
. go_dump = inode_go_dump ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_INODE ,
2008-05-21 20:03:22 +04:00
. go_min_hold_time = HZ / 5 ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_rgrp_glops = {
2007-02-28 17:03:00 +03:00
. go_xmote_th = meta_go_sync ,
2006-01-16 19:50:04 +03:00
. go_inval = meta_go_inval ,
. go_demote_ok = rgrp_go_demote_ok ,
. go_lock = rgrp_go_lock ,
. go_unlock = rgrp_go_unlock ,
2008-05-21 20:03:22 +04:00
. go_dump = rgrp_go_dump ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_RGRP ,
2008-05-21 20:03:22 +04:00
. go_min_hold_time = HZ / 5 ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_trans_glops = {
2007-11-02 11:39:34 +03:00
. go_xmote_th = trans_go_sync ,
2006-01-16 19:50:04 +03:00
. go_xmote_bh = trans_go_xmote_bh ,
2008-11-20 16:39:47 +03:00
. go_demote_ok = trans_go_demote_ok ,
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_NONDISK ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_iopen_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_IOPEN ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_flock_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_FLOCK ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_nondisk_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_NONDISK ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_quota_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_QUOTA ,
2006-01-16 19:50:04 +03:00
} ;
2006-08-30 17:30:00 +04:00
const struct gfs2_glock_operations gfs2_journal_glops = {
2006-09-05 18:53:09 +04:00
. go_type = LM_TYPE_JOURNAL ,
2006-01-16 19:50:04 +03:00
} ;
GFS2: Add a "demote a glock" interface to sysfs
This adds a sysfs file called demote_rq to GFS2's
per filesystem directory. Its possible to use this
file to demote arbitrary glocks in exactly the same
way as if a request had come in from a remote node.
This is intended for testing issues relating to caching
of data under glocks. Despite that, the interface is
generic enough to send requests to any type of glock,
but be careful as its not always safe to send an
arbitrary message to an arbitrary glock. For that reason
and to prevent DoS, this interface is restricted to root
only.
The messages look like this:
<type>:<glocknumber> <mode>
Example:
echo -n "2:13324 EX" >/sys/fs/gfs2/unity:myfs/demote_rq
Which means "please demote inode glock (type 2) number 13324 so that
I can get an EX (exclusive) lock". The lock modes are those which
would normally be sent by a remote node in its callback so if you
want to unlock a glock, you use EX, to demote to shared, use SH or PR
(depending on whether you like GFS2 or DLM lock modes better!).
If the glock doesn't exist, you'll get -ENOENT returned. If the
arguments don't make sense, you'll get -EINVAL returned.
The plan is that this interface will be used in combination with
the blktrace patch which I recently posted for comments although
it is, of course, still useful in its own right.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2009-02-12 16:31:58 +03:00
const struct gfs2_glock_operations * gfs2_glops_list [ ] = {
[ LM_TYPE_META ] = & gfs2_meta_glops ,
[ LM_TYPE_INODE ] = & gfs2_inode_glops ,
[ LM_TYPE_RGRP ] = & gfs2_rgrp_glops ,
[ LM_TYPE_NONDISK ] = & gfs2_trans_glops ,
[ LM_TYPE_IOPEN ] = & gfs2_iopen_glops ,
[ LM_TYPE_FLOCK ] = & gfs2_flock_glops ,
[ LM_TYPE_NONDISK ] = & gfs2_nondisk_glops ,
[ LM_TYPE_QUOTA ] = & gfs2_quota_glops ,
[ LM_TYPE_JOURNAL ] = & gfs2_journal_glops ,
} ;