2019-05-24 13:04:05 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* dcache . c
*
* dentry cache handling code
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*/
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/namei.h>
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
# include "dcache.h"
2006-09-09 01:43:18 +04:00
# include "dlmglue.h"
2005-12-16 01:31:24 +03:00
# include "file.h"
# include "inode.h"
2011-02-23 17:51:49 +03:00
# include "ocfs2_trace.h"
2005-12-16 01:31:24 +03:00
2010-06-28 19:04:32 +04:00
void ocfs2_dentry_attach_gen ( struct dentry * dentry )
{
unsigned long gen =
2015-03-18 01:25:59 +03:00
OCFS2_I ( d_inode ( dentry - > d_parent ) ) - > ip_dir_lock_gen ;
BUG_ON ( d_inode ( dentry ) ) ;
2010-06-28 19:04:32 +04:00
dentry - > d_fsdata = ( void * ) gen ;
}
2006-09-09 01:43:18 +04:00
2012-06-11 00:03:43 +04:00
static int ocfs2_dentry_revalidate ( struct dentry * dentry , unsigned int flags )
2005-12-16 01:31:24 +03:00
{
2011-01-07 09:49:57 +03:00
struct inode * inode ;
2005-12-16 01:31:24 +03:00
int ret = 0 ; /* if all else fails, just return false */
2011-01-07 09:49:57 +03:00
struct ocfs2_super * osb ;
2012-06-11 00:03:43 +04:00
if ( flags & LOOKUP_RCU )
2011-01-07 09:49:57 +03:00
return - ECHILD ;
2015-03-18 01:25:59 +03:00
inode = d_inode ( dentry ) ;
2011-01-07 09:49:57 +03:00
osb = OCFS2_SB ( dentry - > d_sb ) ;
2005-12-16 01:31:24 +03:00
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_revalidate ( dentry , dentry - > d_name . len ,
dentry - > d_name . name ) ;
2005-12-16 01:31:24 +03:00
2010-06-28 19:04:32 +04:00
/* For a negative dentry -
* check the generation number of the parent and compare with the
* one stored in the inode .
*/
2005-12-16 01:31:24 +03:00
if ( inode = = NULL ) {
2010-06-28 19:04:32 +04:00
unsigned long gen = ( unsigned long ) dentry - > d_fsdata ;
2013-09-29 22:59:30 +04:00
unsigned long pgen ;
spin_lock ( & dentry - > d_lock ) ;
2015-03-18 01:25:59 +03:00
pgen = OCFS2_I ( d_inode ( dentry - > d_parent ) ) - > ip_dir_lock_gen ;
2013-09-29 22:59:30 +04:00
spin_unlock ( & dentry - > d_lock ) ;
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_revalidate_negative ( dentry - > d_name . len ,
dentry - > d_name . name ,
pgen , gen ) ;
2010-06-28 19:04:32 +04:00
if ( gen ! = pgen )
goto bail ;
goto valid ;
2005-12-16 01:31:24 +03:00
}
BUG_ON ( ! osb ) ;
2006-09-09 01:43:18 +04:00
if ( inode = = osb - > root_inode | | is_bad_inode ( inode ) )
goto bail ;
spin_lock ( & OCFS2_I ( inode ) - > ip_lock ) ;
/* did we or someone else delete this inode? */
if ( OCFS2_I ( inode ) - > ip_flags & OCFS2_INODE_DELETED ) {
2005-12-16 01:31:24 +03:00
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_revalidate_delete (
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2006-09-09 01:43:18 +04:00
goto bail ;
}
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
2005-12-16 01:31:24 +03:00
2006-09-09 01:43:18 +04:00
/*
* We don ' t need a cluster lock to test this because once an
* inode nlink hits zero , it never goes back .
*/
if ( inode - > i_nlink = = 0 ) {
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_revalidate_orphaned (
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
S_ISDIR ( inode - > i_mode ) ) ;
2006-09-09 01:43:18 +04:00
goto bail ;
2005-12-16 01:31:24 +03:00
}
2009-08-27 10:46:56 +04:00
/*
* If the last lookup failed to create dentry lock , let us
* redo it .
*/
if ( ! dentry - > d_fsdata ) {
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_revalidate_nofsdata (
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2009-08-27 10:46:56 +04:00
goto bail ;
}
2010-06-28 19:04:32 +04:00
valid :
2005-12-16 01:31:24 +03:00
ret = 1 ;
bail :
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_revalidate_ret ( ret ) ;
2005-12-16 01:31:24 +03:00
return ret ;
}
2006-09-09 01:43:18 +04:00
static int ocfs2_match_dentry ( struct dentry * dentry ,
u64 parent_blkno ,
int skip_unhashed )
{
struct inode * parent ;
/*
* ocfs2_lookup ( ) does a d_splice_alias ( ) _before_ attaching
* to the lock data , so we skip those here , otherwise
* ocfs2_dentry_attach_lock ( ) will get its original dentry
* back .
*/
if ( ! dentry - > d_fsdata )
return 0 ;
if ( ! dentry - > d_parent )
return 0 ;
if ( skip_unhashed & & d_unhashed ( dentry ) )
return 0 ;
2015-03-18 01:25:59 +03:00
parent = d_inode ( dentry - > d_parent ) ;
2006-09-09 01:43:18 +04:00
/* Negative parent dentry? */
if ( ! parent )
return 0 ;
/* Name is in a different directory. */
if ( OCFS2_I ( parent ) - > ip_blkno ! = parent_blkno )
return 0 ;
return 1 ;
}
/*
* Walk the inode alias list , and find a dentry which has a given
* parent . ocfs2_dentry_attach_lock ( ) wants to find _any_ alias as it
2007-09-25 02:56:19 +04:00
* is looking for a dentry_lock reference . The downconvert thread is
* looking to unhash aliases , so we allow it to skip any that already
* have that property .
2006-09-09 01:43:18 +04:00
*/
struct dentry * ocfs2_find_local_alias ( struct inode * inode ,
u64 parent_blkno ,
int skip_unhashed )
{
2012-06-09 21:09:15 +04:00
struct dentry * dentry ;
2006-09-09 01:43:18 +04:00
2011-01-07 09:50:06 +03:00
spin_lock ( & inode - > i_lock ) ;
2014-10-27 02:19:16 +03:00
hlist_for_each_entry ( dentry , & inode - > i_dentry , d_u . d_alias ) {
2011-01-07 09:49:33 +03:00
spin_lock ( & dentry - > d_lock ) ;
2006-09-09 01:43:18 +04:00
if ( ocfs2_match_dentry ( dentry , parent_blkno , skip_unhashed ) ) {
2011-02-23 17:51:49 +03:00
trace_ocfs2_find_local_alias ( dentry - > d_name . len ,
dentry - > d_name . name ) ;
2006-09-09 01:43:18 +04:00
2011-01-07 09:49:43 +03:00
dget_dlock ( dentry ) ;
2011-01-07 09:49:33 +03:00
spin_unlock ( & dentry - > d_lock ) ;
2012-06-09 21:09:15 +04:00
spin_unlock ( & inode - > i_lock ) ;
return dentry ;
2006-09-09 01:43:18 +04:00
}
2011-01-07 09:49:33 +03:00
spin_unlock ( & dentry - > d_lock ) ;
2006-09-09 01:43:18 +04:00
}
2011-01-07 09:50:06 +03:00
spin_unlock ( & inode - > i_lock ) ;
2012-06-09 21:09:15 +04:00
return NULL ;
2006-09-09 01:43:18 +04:00
}
2006-09-09 01:14:34 +04:00
DEFINE_SPINLOCK ( dentry_attach_lock ) ;
2006-09-09 01:43:18 +04:00
/*
* Attach this dentry to a cluster lock .
*
* Dentry locks cover all links in a given directory to a particular
* inode . We do this so that ocfs2 can build a lock name which all
* nodes in the cluster can agree on at all times . Shoving full names
* in the cluster lock won ' t work due to size restrictions . Covering
* links inside of a directory is a good compromise because it still
* allows us to use the parent directory lock to synchronize
* operations .
*
* Call this function with the parent dir semaphore and the parent dir
* cluster lock held .
*
* The dir semaphore will protect us from having to worry about
* concurrent processes on our node trying to attach a lock at the
* same time .
*
* The dir cluster lock ( held at either PR or EX mode ) protects us
* from unlink and rename on other nodes .
*
* A dput ( ) can happen asynchronously due to pruning , so we cover
* attaching and detaching the dentry lock with a
* dentry_attach_lock .
*
* A node which has done lookup on a name retains a protected read
* lock until final dput . If the user requests and unlink or rename ,
* the protected read is upgraded to an exclusive lock . Other nodes
* who have seen the dentry will then be informed that they need to
* downgrade their lock , which will involve d_delete on the
* dentry . This happens in ocfs2_dentry_convert_worker ( ) .
*/
int ocfs2_dentry_attach_lock ( struct dentry * dentry ,
struct inode * inode ,
2006-09-22 03:51:28 +04:00
u64 parent_blkno )
2006-09-09 01:43:18 +04:00
{
int ret ;
struct dentry * alias ;
struct ocfs2_dentry_lock * dl = dentry - > d_fsdata ;
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_attach_lock ( dentry - > d_name . len , dentry - > d_name . name ,
( unsigned long long ) parent_blkno , dl ) ;
2006-09-09 01:43:18 +04:00
/*
* Negative dentry . We ignore these for now .
*
* XXX : Could we can improve ocfs2_dentry_revalidate ( ) by
* tracking these ?
*/
if ( ! inode )
return 0 ;
2015-03-18 01:25:59 +03:00
if ( d_really_is_negative ( dentry ) & & dentry - > d_fsdata ) {
2010-06-28 19:04:32 +04:00
/* Converting a negative dentry to positive
Clear dentry - > d_fsdata */
dentry - > d_fsdata = dl = NULL ;
}
2006-09-09 01:43:18 +04:00
if ( dl ) {
mlog_bug_on_msg ( dl - > dl_parent_blkno ! = parent_blkno ,
2014-10-22 04:11:25 +04:00
" \" %pd \" : old parent: %llu, new: %llu \n " ,
dentry ,
2006-09-09 01:43:18 +04:00
( unsigned long long ) parent_blkno ,
( unsigned long long ) dl - > dl_parent_blkno ) ;
return 0 ;
}
alias = ocfs2_find_local_alias ( inode , parent_blkno , 0 ) ;
if ( alias ) {
/*
* Great , an alias exists , which means we must have a
* dentry lock already . We can just grab the lock off
* the alias and add it to the list .
*
* We ' re depending here on the fact that this dentry
* was found and exists in the dcache and so must have
* a reference to the dentry_lock because we can ' t
* race creates . Final dput ( ) cannot happen on it
* since we have it pinned , so our reference is safe .
*/
dl = alias - > d_fsdata ;
2006-09-22 03:51:28 +04:00
mlog_bug_on_msg ( ! dl , " parent %llu, ino %llu \n " ,
2006-09-09 01:43:18 +04:00
( unsigned long long ) parent_blkno ,
2006-09-22 03:51:28 +04:00
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2006-09-09 01:43:18 +04:00
mlog_bug_on_msg ( dl - > dl_parent_blkno ! = parent_blkno ,
2014-10-22 04:11:25 +04:00
" \" %pd \" : old parent: %llu, new: %llu \n " ,
dentry ,
2006-09-09 01:43:18 +04:00
( unsigned long long ) parent_blkno ,
( unsigned long long ) dl - > dl_parent_blkno ) ;
2011-02-23 17:51:49 +03:00
trace_ocfs2_dentry_attach_lock_found ( dl - > dl_lockres . l_name ,
( unsigned long long ) parent_blkno ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2006-09-09 01:43:18 +04:00
goto out_attach ;
}
/*
* There are no other aliases
*/
dl = kmalloc ( sizeof ( * dl ) , GFP_NOFS ) ;
if ( ! dl ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
return ret ;
}
dl - > dl_count = 0 ;
/*
* Does this have to happen below , for all attaches , in case
2007-09-25 02:56:19 +04:00
* the struct inode gets blown away by the downconvert thread ?
2006-09-09 01:43:18 +04:00
*/
dl - > dl_inode = igrab ( inode ) ;
dl - > dl_parent_blkno = parent_blkno ;
ocfs2_dentry_lock_res_init ( dl , parent_blkno , inode ) ;
out_attach :
spin_lock ( & dentry_attach_lock ) ;
fs/ocfs2: fix race in ocfs2_dentry_attach_lock()
ocfs2_dentry_attach_lock() can be executed in parallel threads against the
same dentry. Make that race safe. The race is like this:
thread A thread B
(A1) enter ocfs2_dentry_attach_lock,
seeing dentry->d_fsdata is NULL,
and no alias found by
ocfs2_find_local_alias, so kmalloc
a new ocfs2_dentry_lock structure
to local variable "dl", dl1
.....
(B1) enter ocfs2_dentry_attach_lock,
seeing dentry->d_fsdata is NULL,
and no alias found by
ocfs2_find_local_alias so kmalloc
a new ocfs2_dentry_lock structure
to local variable "dl", dl2.
......
(A2) set dentry->d_fsdata with dl1,
call ocfs2_dentry_lock() and increase
dl1->dl_lockres.l_ro_holders to 1 on
success.
......
(B2) set dentry->d_fsdata with dl2
call ocfs2_dentry_lock() and increase
dl2->dl_lockres.l_ro_holders to 1 on
success.
......
(A3) call ocfs2_dentry_unlock()
and decrease
dl2->dl_lockres.l_ro_holders to 0
on success.
....
(B3) call ocfs2_dentry_unlock(),
decreasing
dl2->dl_lockres.l_ro_holders, but
see it's zero now, panic
Link: http://lkml.kernel.org/r/20190529174636.22364-1-wen.gang.wang@oracle.com
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Reported-by: Daniel Sobe <daniel.sobe@nxp.com>
Tested-by: Daniel Sobe <daniel.sobe@nxp.com>
Reviewed-by: Changwei Ge <gechangwei@live.cn>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-06-14 01:56:01 +03:00
if ( unlikely ( dentry - > d_fsdata & & ! alias ) ) {
/* d_fsdata is set by a racing thread which is doing
* the same thing as this thread is doing . Leave the racing
* thread going ahead and we return here .
*/
spin_unlock ( & dentry_attach_lock ) ;
iput ( dl - > dl_inode ) ;
ocfs2_lock_res_free ( & dl - > dl_lockres ) ;
kfree ( dl ) ;
return 0 ;
}
2006-09-09 01:43:18 +04:00
dentry - > d_fsdata = dl ;
dl - > dl_count + + ;
spin_unlock ( & dentry_attach_lock ) ;
/*
* This actually gets us our PRMODE level lock . From now on ,
* we ' ll have a notification if one of these names is
* destroyed on another node .
*/
ret = ocfs2_dentry_lock ( dentry , 0 ) ;
2006-09-22 03:51:28 +04:00
if ( ! ret )
ocfs2_dentry_unlock ( dentry , 0 ) ;
else
2006-09-09 01:43:18 +04:00
mlog_errno ( ret ) ;
2009-04-21 08:34:18 +04:00
/*
* In case of error , manually free the allocation and do the iput ( ) .
* We need to do this because error here means no d_instantiate ( ) ,
* which means iput ( ) will not be called during dput ( dentry ) .
*/
if ( ret < 0 & & ! alias ) {
ocfs2_lock_res_free ( & dl - > dl_lockres ) ;
BUG_ON ( dl - > dl_count ! = 1 ) ;
spin_lock ( & dentry_attach_lock ) ;
dentry - > d_fsdata = NULL ;
spin_unlock ( & dentry_attach_lock ) ;
kfree ( dl ) ;
iput ( inode ) ;
}
2006-09-09 01:43:18 +04:00
dput ( alias ) ;
return ret ;
}
/*
* ocfs2_dentry_iput ( ) and friends .
*
* At this point , our particular dentry is detached from the inodes
* alias list , so there ' s no way that the locking code can find it .
*
* The interesting stuff happens when we determine that our lock needs
* to go away because this is the last subdir alias in the
* system . This function needs to handle a couple things :
*
* 1 ) Synchronizing lock shutdown with the downconvert threads . This
* is already handled for us via the lockres release drop function
* called in ocfs2_release_dentry_lock ( )
*
* 2 ) A race may occur when we ' re doing our lock shutdown and
* another process wants to create a new dentry lock . Right now we
* let them race , which means that for a very short while , this
* node might have two locks on a lock resource . This should be a
* problem though because one of them is in the process of being
* thrown out .
*/
static void ocfs2_drop_dentry_lock ( struct ocfs2_super * osb ,
struct ocfs2_dentry_lock * dl )
{
2014-04-04 01:46:59 +04:00
iput ( dl - > dl_inode ) ;
2006-09-09 01:43:18 +04:00
ocfs2_simple_drop_lockres ( osb , & dl - > dl_lockres ) ;
ocfs2_lock_res_free ( & dl - > dl_lockres ) ;
2014-04-04 01:46:59 +04:00
kfree ( dl ) ;
2006-09-09 01:43:18 +04:00
}
void ocfs2_dentry_lock_put ( struct ocfs2_super * osb ,
struct ocfs2_dentry_lock * dl )
{
2014-04-04 01:46:59 +04:00
int unlock = 0 ;
2006-09-09 01:43:18 +04:00
BUG_ON ( dl - > dl_count = = 0 ) ;
spin_lock ( & dentry_attach_lock ) ;
dl - > dl_count - - ;
unlock = ! dl - > dl_count ;
spin_unlock ( & dentry_attach_lock ) ;
if ( unlock )
ocfs2_drop_dentry_lock ( osb , dl ) ;
}
static void ocfs2_dentry_iput ( struct dentry * dentry , struct inode * inode )
{
struct ocfs2_dentry_lock * dl = dentry - > d_fsdata ;
2007-11-08 03:35:14 +03:00
if ( ! dl ) {
/*
* No dentry lock is ok if we ' re disconnected or
* unhashed .
*/
if ( ! ( dentry - > d_flags & DCACHE_DISCONNECTED ) & &
! d_unhashed ( dentry ) ) {
unsigned long long ino = 0ULL ;
if ( inode )
ino = ( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ;
mlog ( ML_ERROR , " Dentry is missing cluster lock. "
2014-10-22 04:11:25 +04:00
" inode: %llu, d_flags: 0x%x, d_name: %pd \n " ,
ino , dentry - > d_flags , dentry ) ;
2007-11-08 03:35:14 +03:00
}
2006-09-09 01:43:18 +04:00
goto out ;
2007-11-08 03:35:14 +03:00
}
2006-09-09 01:43:18 +04:00
2014-10-22 04:11:25 +04:00
mlog_bug_on_msg ( dl - > dl_count = = 0 , " dentry: %pd, count: %u \n " ,
dentry , dl - > dl_count ) ;
2006-09-09 01:43:18 +04:00
ocfs2_dentry_lock_put ( OCFS2_SB ( dentry - > d_sb ) , dl ) ;
out :
iput ( inode ) ;
}
/*
* d_move ( ) , but keep the locks in sync .
*
* When we are done , " dentry " will have the parent dir and name of
* " target " , which will be thrown away .
*
* We manually update the lock of " dentry " if need be .
*
* " target " doesn ' t have it ' s dentry lock touched - we allow the later
* dput ( ) to handle this for us .
*
* This is called during ocfs2_rename ( ) , while holding parent
* directory locks . The dentries have already been deleted on other
* nodes via ocfs2_remote_dentry_delete ( ) .
*
2007-10-20 01:10:43 +04:00
* Normally , the VFS handles the d_move ( ) for the file system , after
2006-09-09 01:43:18 +04:00
* the - > rename ( ) callback . OCFS2 wants to handle this internally , so
* the new lock can be created atomically with respect to the cluster .
*/
void ocfs2_dentry_move ( struct dentry * dentry , struct dentry * target ,
struct inode * old_dir , struct inode * new_dir )
{
int ret ;
struct ocfs2_super * osb = OCFS2_SB ( old_dir - > i_sb ) ;
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2006-09-09 01:43:18 +04:00
/*
* Move within the same directory , so the actual lock info won ' t
* change .
*
* XXX : Is there any advantage to dropping the lock here ?
*/
if ( old_dir = = new_dir )
2006-09-09 01:22:54 +04:00
goto out_move ;
2006-09-09 01:43:18 +04:00
ocfs2_dentry_lock_put ( osb , dentry - > d_fsdata ) ;
dentry - > d_fsdata = NULL ;
2006-09-22 03:51:28 +04:00
ret = ocfs2_dentry_attach_lock ( dentry , inode , OCFS2_I ( new_dir ) - > ip_blkno ) ;
2006-09-09 01:43:18 +04:00
if ( ret )
mlog_errno ( ret ) ;
2006-09-09 01:22:54 +04:00
out_move :
d_move ( dentry , target ) ;
2006-09-09 01:43:18 +04:00
}
2009-02-20 09:00:26 +03:00
const struct dentry_operations ocfs2_dentry_ops = {
2005-12-16 01:31:24 +03:00
. d_revalidate = ocfs2_dentry_revalidate ,
2006-09-09 01:43:18 +04:00
. d_iput = ocfs2_dentry_iput ,
2005-12-16 01:31:24 +03:00
} ;