2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* dcache . c
*
* dentry cache handling code
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/namei.h>
# define MLOG_MASK_PREFIX ML_DCACHE
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
# include "dcache.h"
2006-09-09 01:43:18 +04:00
# include "dlmglue.h"
2005-12-16 01:31:24 +03:00
# include "file.h"
# include "inode.h"
2009-01-13 01:20:31 +03:00
# include "super.h"
2005-12-16 01:31:24 +03:00
2006-09-09 01:43:18 +04:00
2005-12-16 01:31:24 +03:00
static int ocfs2_dentry_revalidate ( struct dentry * dentry ,
struct nameidata * nd )
{
struct inode * inode = dentry - > d_inode ;
int ret = 0 ; /* if all else fails, just return false */
2006-09-09 01:43:18 +04:00
struct ocfs2_super * osb = OCFS2_SB ( dentry - > d_sb ) ;
2005-12-16 01:31:24 +03:00
mlog_entry ( " (0x%p, '%.*s') \n " , dentry ,
dentry - > d_name . len , dentry - > d_name . name ) ;
/* Never trust a negative dentry - force a new lookup. */
if ( inode = = NULL ) {
mlog ( 0 , " negative dentry: %.*s \n " , dentry - > d_name . len ,
dentry - > d_name . name ) ;
goto bail ;
}
BUG_ON ( ! osb ) ;
2006-09-09 01:43:18 +04:00
if ( inode = = osb - > root_inode | | is_bad_inode ( inode ) )
goto bail ;
spin_lock ( & OCFS2_I ( inode ) - > ip_lock ) ;
/* did we or someone else delete this inode? */
if ( OCFS2_I ( inode ) - > ip_flags & OCFS2_INODE_DELETED ) {
2005-12-16 01:31:24 +03:00
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
2006-09-09 01:43:18 +04:00
mlog ( 0 , " inode (%llu) deleted, returning false \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
goto bail ;
}
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
2005-12-16 01:31:24 +03:00
2006-09-09 01:43:18 +04:00
/*
* We don ' t need a cluster lock to test this because once an
* inode nlink hits zero , it never goes back .
*/
if ( inode - > i_nlink = = 0 ) {
mlog ( 0 , " Inode %llu orphaned, returning false "
" dir = %d \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
S_ISDIR ( inode - > i_mode ) ) ;
goto bail ;
2005-12-16 01:31:24 +03:00
}
ret = 1 ;
bail :
mlog_exit ( ret ) ;
return ret ;
}
2006-09-09 01:43:18 +04:00
static int ocfs2_match_dentry ( struct dentry * dentry ,
u64 parent_blkno ,
int skip_unhashed )
{
struct inode * parent ;
/*
* ocfs2_lookup ( ) does a d_splice_alias ( ) _before_ attaching
* to the lock data , so we skip those here , otherwise
* ocfs2_dentry_attach_lock ( ) will get its original dentry
* back .
*/
if ( ! dentry - > d_fsdata )
return 0 ;
if ( ! dentry - > d_parent )
return 0 ;
if ( skip_unhashed & & d_unhashed ( dentry ) )
return 0 ;
parent = dentry - > d_parent - > d_inode ;
/* Negative parent dentry? */
if ( ! parent )
return 0 ;
/* Name is in a different directory. */
if ( OCFS2_I ( parent ) - > ip_blkno ! = parent_blkno )
return 0 ;
return 1 ;
}
/*
* Walk the inode alias list , and find a dentry which has a given
* parent . ocfs2_dentry_attach_lock ( ) wants to find _any_ alias as it
2007-09-25 02:56:19 +04:00
* is looking for a dentry_lock reference . The downconvert thread is
* looking to unhash aliases , so we allow it to skip any that already
* have that property .
2006-09-09 01:43:18 +04:00
*/
struct dentry * ocfs2_find_local_alias ( struct inode * inode ,
u64 parent_blkno ,
int skip_unhashed )
{
struct list_head * p ;
struct dentry * dentry = NULL ;
spin_lock ( & dcache_lock ) ;
list_for_each ( p , & inode - > i_dentry ) {
dentry = list_entry ( p , struct dentry , d_alias ) ;
if ( ocfs2_match_dentry ( dentry , parent_blkno , skip_unhashed ) ) {
mlog ( 0 , " dentry found: %.*s \n " ,
dentry - > d_name . len , dentry - > d_name . name ) ;
dget_locked ( dentry ) ;
break ;
}
dentry = NULL ;
}
spin_unlock ( & dcache_lock ) ;
return dentry ;
}
2006-09-09 01:14:34 +04:00
DEFINE_SPINLOCK ( dentry_attach_lock ) ;
2006-09-09 01:43:18 +04:00
/*
* Attach this dentry to a cluster lock .
*
* Dentry locks cover all links in a given directory to a particular
* inode . We do this so that ocfs2 can build a lock name which all
* nodes in the cluster can agree on at all times . Shoving full names
* in the cluster lock won ' t work due to size restrictions . Covering
* links inside of a directory is a good compromise because it still
* allows us to use the parent directory lock to synchronize
* operations .
*
* Call this function with the parent dir semaphore and the parent dir
* cluster lock held .
*
* The dir semaphore will protect us from having to worry about
* concurrent processes on our node trying to attach a lock at the
* same time .
*
* The dir cluster lock ( held at either PR or EX mode ) protects us
* from unlink and rename on other nodes .
*
* A dput ( ) can happen asynchronously due to pruning , so we cover
* attaching and detaching the dentry lock with a
* dentry_attach_lock .
*
* A node which has done lookup on a name retains a protected read
* lock until final dput . If the user requests and unlink or rename ,
* the protected read is upgraded to an exclusive lock . Other nodes
* who have seen the dentry will then be informed that they need to
* downgrade their lock , which will involve d_delete on the
* dentry . This happens in ocfs2_dentry_convert_worker ( ) .
*/
int ocfs2_dentry_attach_lock ( struct dentry * dentry ,
struct inode * inode ,
2006-09-22 03:51:28 +04:00
u64 parent_blkno )
2006-09-09 01:43:18 +04:00
{
int ret ;
struct dentry * alias ;
struct ocfs2_dentry_lock * dl = dentry - > d_fsdata ;
2006-09-22 03:51:28 +04:00
mlog ( 0 , " Attach \" %.*s \" , parent %llu, fsdata: %p \n " ,
2006-09-09 01:43:18 +04:00
dentry - > d_name . len , dentry - > d_name . name ,
2006-09-22 03:51:28 +04:00
( unsigned long long ) parent_blkno , dl ) ;
2006-09-09 01:43:18 +04:00
/*
* Negative dentry . We ignore these for now .
*
* XXX : Could we can improve ocfs2_dentry_revalidate ( ) by
* tracking these ?
*/
if ( ! inode )
return 0 ;
if ( dl ) {
mlog_bug_on_msg ( dl - > dl_parent_blkno ! = parent_blkno ,
" \" %.*s \" : old parent: %llu, new: %llu \n " ,
dentry - > d_name . len , dentry - > d_name . name ,
( unsigned long long ) parent_blkno ,
( unsigned long long ) dl - > dl_parent_blkno ) ;
return 0 ;
}
alias = ocfs2_find_local_alias ( inode , parent_blkno , 0 ) ;
if ( alias ) {
/*
* Great , an alias exists , which means we must have a
* dentry lock already . We can just grab the lock off
* the alias and add it to the list .
*
* We ' re depending here on the fact that this dentry
* was found and exists in the dcache and so must have
* a reference to the dentry_lock because we can ' t
* race creates . Final dput ( ) cannot happen on it
* since we have it pinned , so our reference is safe .
*/
dl = alias - > d_fsdata ;
2006-09-22 03:51:28 +04:00
mlog_bug_on_msg ( ! dl , " parent %llu, ino %llu \n " ,
2006-09-09 01:43:18 +04:00
( unsigned long long ) parent_blkno ,
2006-09-22 03:51:28 +04:00
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2006-09-09 01:43:18 +04:00
mlog_bug_on_msg ( dl - > dl_parent_blkno ! = parent_blkno ,
" \" %.*s \" : old parent: %llu, new: %llu \n " ,
dentry - > d_name . len , dentry - > d_name . name ,
( unsigned long long ) parent_blkno ,
( unsigned long long ) dl - > dl_parent_blkno ) ;
mlog ( 0 , " Found: %s \n " , dl - > dl_lockres . l_name ) ;
goto out_attach ;
}
/*
* There are no other aliases
*/
dl = kmalloc ( sizeof ( * dl ) , GFP_NOFS ) ;
if ( ! dl ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
return ret ;
}
dl - > dl_count = 0 ;
/*
* Does this have to happen below , for all attaches , in case
2007-09-25 02:56:19 +04:00
* the struct inode gets blown away by the downconvert thread ?
2006-09-09 01:43:18 +04:00
*/
dl - > dl_inode = igrab ( inode ) ;
dl - > dl_parent_blkno = parent_blkno ;
ocfs2_dentry_lock_res_init ( dl , parent_blkno , inode ) ;
out_attach :
spin_lock ( & dentry_attach_lock ) ;
dentry - > d_fsdata = dl ;
dl - > dl_count + + ;
spin_unlock ( & dentry_attach_lock ) ;
/*
* This actually gets us our PRMODE level lock . From now on ,
* we ' ll have a notification if one of these names is
* destroyed on another node .
*/
ret = ocfs2_dentry_lock ( dentry , 0 ) ;
2006-09-22 03:51:28 +04:00
if ( ! ret )
ocfs2_dentry_unlock ( dentry , 0 ) ;
else
2006-09-09 01:43:18 +04:00
mlog_errno ( ret ) ;
2009-04-21 08:34:18 +04:00
/*
* In case of error , manually free the allocation and do the iput ( ) .
* We need to do this because error here means no d_instantiate ( ) ,
* which means iput ( ) will not be called during dput ( dentry ) .
*/
if ( ret < 0 & & ! alias ) {
ocfs2_lock_res_free ( & dl - > dl_lockres ) ;
BUG_ON ( dl - > dl_count ! = 1 ) ;
spin_lock ( & dentry_attach_lock ) ;
dentry - > d_fsdata = NULL ;
spin_unlock ( & dentry_attach_lock ) ;
kfree ( dl ) ;
iput ( inode ) ;
}
2006-09-09 01:43:18 +04:00
dput ( alias ) ;
return ret ;
}
2009-07-20 14:12:36 +04:00
DEFINE_SPINLOCK ( dentry_list_lock ) ;
2009-01-13 01:20:31 +03:00
/* We limit the number of dentry locks to drop in one go. We have
* this limit so that we don ' t starve other users of ocfs2_wq . */
# define DL_INODE_DROP_COUNT 64
/* Drop inode references from dentry locks */
2009-07-20 14:12:36 +04:00
static void __ocfs2_drop_dl_inodes ( struct ocfs2_super * osb , int drop_count )
2009-01-13 01:20:31 +03:00
{
struct ocfs2_dentry_lock * dl ;
spin_lock ( & dentry_list_lock ) ;
2009-07-20 14:12:36 +04:00
while ( osb - > dentry_lock_list & & ( drop_count < 0 | | drop_count - - ) ) {
2009-01-13 01:20:31 +03:00
dl = osb - > dentry_lock_list ;
osb - > dentry_lock_list = dl - > dl_next ;
spin_unlock ( & dentry_list_lock ) ;
iput ( dl - > dl_inode ) ;
kfree ( dl ) ;
spin_lock ( & dentry_list_lock ) ;
}
2009-07-20 14:12:36 +04:00
spin_unlock ( & dentry_list_lock ) ;
}
void ocfs2_drop_dl_inodes ( struct work_struct * work )
{
struct ocfs2_super * osb = container_of ( work , struct ocfs2_super ,
dentry_lock_work ) ;
__ocfs2_drop_dl_inodes ( osb , DL_INODE_DROP_COUNT ) ;
/*
* Don ' t queue dropping if umount is in progress . We flush the
* list in ocfs2_dismount_volume
*/
spin_lock ( & dentry_list_lock ) ;
if ( osb - > dentry_lock_list & &
! ocfs2_test_osb_flag ( osb , OCFS2_OSB_DROP_DENTRY_LOCK_IMMED ) )
2009-01-13 01:20:31 +03:00
queue_work ( ocfs2_wq , & osb - > dentry_lock_work ) ;
spin_unlock ( & dentry_list_lock ) ;
}
2009-07-20 14:12:36 +04:00
/* Flush the whole work queue */
void ocfs2_drop_all_dl_inodes ( struct ocfs2_super * osb )
{
__ocfs2_drop_dl_inodes ( osb , - 1 ) ;
}
2006-09-09 01:43:18 +04:00
/*
* ocfs2_dentry_iput ( ) and friends .
*
* At this point , our particular dentry is detached from the inodes
* alias list , so there ' s no way that the locking code can find it .
*
* The interesting stuff happens when we determine that our lock needs
* to go away because this is the last subdir alias in the
* system . This function needs to handle a couple things :
*
* 1 ) Synchronizing lock shutdown with the downconvert threads . This
* is already handled for us via the lockres release drop function
* called in ocfs2_release_dentry_lock ( )
*
* 2 ) A race may occur when we ' re doing our lock shutdown and
* another process wants to create a new dentry lock . Right now we
* let them race , which means that for a very short while , this
* node might have two locks on a lock resource . This should be a
* problem though because one of them is in the process of being
* thrown out .
*/
static void ocfs2_drop_dentry_lock ( struct ocfs2_super * osb ,
struct ocfs2_dentry_lock * dl )
{
ocfs2_simple_drop_lockres ( osb , & dl - > dl_lockres ) ;
ocfs2_lock_res_free ( & dl - > dl_lockres ) ;
2009-01-13 01:20:31 +03:00
/* We leave dropping of inode reference to ocfs2_wq as that can
* possibly lead to inode deletion which gets tricky */
spin_lock ( & dentry_list_lock ) ;
2009-07-20 14:12:36 +04:00
if ( ! osb - > dentry_lock_list & &
! ocfs2_test_osb_flag ( osb , OCFS2_OSB_DROP_DENTRY_LOCK_IMMED ) )
2009-01-13 01:20:31 +03:00
queue_work ( ocfs2_wq , & osb - > dentry_lock_work ) ;
dl - > dl_next = osb - > dentry_lock_list ;
osb - > dentry_lock_list = dl ;
spin_unlock ( & dentry_list_lock ) ;
2006-09-09 01:43:18 +04:00
}
void ocfs2_dentry_lock_put ( struct ocfs2_super * osb ,
struct ocfs2_dentry_lock * dl )
{
2009-01-13 01:20:31 +03:00
int unlock ;
2006-09-09 01:43:18 +04:00
BUG_ON ( dl - > dl_count = = 0 ) ;
spin_lock ( & dentry_attach_lock ) ;
dl - > dl_count - - ;
unlock = ! dl - > dl_count ;
spin_unlock ( & dentry_attach_lock ) ;
if ( unlock )
ocfs2_drop_dentry_lock ( osb , dl ) ;
}
static void ocfs2_dentry_iput ( struct dentry * dentry , struct inode * inode )
{
struct ocfs2_dentry_lock * dl = dentry - > d_fsdata ;
2007-11-08 03:35:14 +03:00
if ( ! dl ) {
/*
* No dentry lock is ok if we ' re disconnected or
* unhashed .
*/
if ( ! ( dentry - > d_flags & DCACHE_DISCONNECTED ) & &
! d_unhashed ( dentry ) ) {
unsigned long long ino = 0ULL ;
if ( inode )
ino = ( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ;
mlog ( ML_ERROR , " Dentry is missing cluster lock. "
" inode: %llu, d_flags: 0x%x, d_name: %.*s \n " ,
ino , dentry - > d_flags , dentry - > d_name . len ,
dentry - > d_name . name ) ;
}
2006-09-09 01:43:18 +04:00
goto out ;
2007-11-08 03:35:14 +03:00
}
2006-09-09 01:43:18 +04:00
mlog_bug_on_msg ( dl - > dl_count = = 0 , " dentry: %.*s, count: %u \n " ,
dentry - > d_name . len , dentry - > d_name . name ,
dl - > dl_count ) ;
ocfs2_dentry_lock_put ( OCFS2_SB ( dentry - > d_sb ) , dl ) ;
out :
iput ( inode ) ;
}
/*
* d_move ( ) , but keep the locks in sync .
*
* When we are done , " dentry " will have the parent dir and name of
* " target " , which will be thrown away .
*
* We manually update the lock of " dentry " if need be .
*
* " target " doesn ' t have it ' s dentry lock touched - we allow the later
* dput ( ) to handle this for us .
*
* This is called during ocfs2_rename ( ) , while holding parent
* directory locks . The dentries have already been deleted on other
* nodes via ocfs2_remote_dentry_delete ( ) .
*
2007-10-20 01:10:43 +04:00
* Normally , the VFS handles the d_move ( ) for the file system , after
2006-09-09 01:43:18 +04:00
* the - > rename ( ) callback . OCFS2 wants to handle this internally , so
* the new lock can be created atomically with respect to the cluster .
*/
void ocfs2_dentry_move ( struct dentry * dentry , struct dentry * target ,
struct inode * old_dir , struct inode * new_dir )
{
int ret ;
struct ocfs2_super * osb = OCFS2_SB ( old_dir - > i_sb ) ;
struct inode * inode = dentry - > d_inode ;
/*
* Move within the same directory , so the actual lock info won ' t
* change .
*
* XXX : Is there any advantage to dropping the lock here ?
*/
if ( old_dir = = new_dir )
2006-09-09 01:22:54 +04:00
goto out_move ;
2006-09-09 01:43:18 +04:00
ocfs2_dentry_lock_put ( osb , dentry - > d_fsdata ) ;
dentry - > d_fsdata = NULL ;
2006-09-22 03:51:28 +04:00
ret = ocfs2_dentry_attach_lock ( dentry , inode , OCFS2_I ( new_dir ) - > ip_blkno ) ;
2006-09-09 01:43:18 +04:00
if ( ret )
mlog_errno ( ret ) ;
2006-09-09 01:22:54 +04:00
out_move :
d_move ( dentry , target ) ;
2006-09-09 01:43:18 +04:00
}
2009-02-20 09:00:26 +03:00
const struct dentry_operations ocfs2_dentry_ops = {
2005-12-16 01:31:24 +03:00
. d_revalidate = ocfs2_dentry_revalidate ,
2006-09-09 01:43:18 +04:00
. d_iput = ocfs2_dentry_iput ,
2005-12-16 01:31:24 +03:00
} ;