2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* inode . c
*
* vfs ' aops , fops , dops and iops
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/highmem.h>
# include <linux/pagemap.h>
2008-10-09 21:38:40 +04:00
# include <linux/quotaops.h>
2005-12-16 01:31:24 +03:00
# include <asm/byteorder.h>
# define MLOG_MASK_PREFIX ML_INODE
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
2008-11-13 03:27:44 +03:00
# include "dir.h"
2008-10-18 01:55:01 +04:00
# include "blockcheck.h"
2005-12-16 01:31:24 +03:00
# include "dlmglue.h"
# include "extent_map.h"
# include "file.h"
2006-02-23 04:35:08 +03:00
# include "heartbeat.h"
2005-12-16 01:31:24 +03:00
# include "inode.h"
# include "journal.h"
# include "namei.h"
# include "suballoc.h"
# include "super.h"
# include "symlink.h"
# include "sysfile.h"
# include "uptodate.h"
2008-08-18 13:11:00 +04:00
# include "xattr.h"
2009-08-18 07:43:49 +04:00
# include "refcounttree.h"
2005-12-16 01:31:24 +03:00
# include "buffer_head_io.h"
struct ocfs2_find_inode_args
{
u64 fi_blkno ;
unsigned long fi_ino ;
unsigned int fi_flags ;
2008-01-11 02:11:45 +03:00
unsigned int fi_sysfile_type ;
2005-12-16 01:31:24 +03:00
} ;
2008-01-11 02:11:45 +03:00
static struct lock_class_key ocfs2_sysfile_lock_key [ NUM_SYSTEM_INODES ] ;
2005-12-16 01:31:24 +03:00
static int ocfs2_read_locked_inode ( struct inode * inode ,
struct ocfs2_find_inode_args * args ) ;
static int ocfs2_init_locked_inode ( struct inode * inode , void * opaque ) ;
static int ocfs2_find_actor ( struct inode * inode , void * opaque ) ;
static int ocfs2_truncate_for_delete ( struct ocfs2_super * osb ,
struct inode * inode ,
struct buffer_head * fe_bh ) ;
2006-07-04 04:27:12 +04:00
void ocfs2_set_inode_flags ( struct inode * inode )
{
unsigned int flags = OCFS2_I ( inode ) - > ip_attr ;
inode - > i_flags & = ~ ( S_IMMUTABLE |
S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC ) ;
if ( flags & OCFS2_IMMUTABLE_FL )
inode - > i_flags | = S_IMMUTABLE ;
if ( flags & OCFS2_SYNC_FL )
inode - > i_flags | = S_SYNC ;
if ( flags & OCFS2_APPEND_FL )
inode - > i_flags | = S_APPEND ;
if ( flags & OCFS2_NOATIME_FL )
inode - > i_flags | = S_NOATIME ;
if ( flags & OCFS2_DIRSYNC_FL )
inode - > i_flags | = S_DIRSYNC ;
}
2007-04-27 22:08:01 +04:00
/* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
void ocfs2_get_inode_flags ( struct ocfs2_inode_info * oi )
{
unsigned int flags = oi - > vfs_inode . i_flags ;
oi - > ip_attr & = ~ ( OCFS2_SYNC_FL | OCFS2_APPEND_FL |
OCFS2_IMMUTABLE_FL | OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL ) ;
if ( flags & S_SYNC )
oi - > ip_attr | = OCFS2_SYNC_FL ;
if ( flags & S_APPEND )
oi - > ip_attr | = OCFS2_APPEND_FL ;
if ( flags & S_IMMUTABLE )
oi - > ip_attr | = OCFS2_IMMUTABLE_FL ;
if ( flags & S_NOATIME )
oi - > ip_attr | = OCFS2_NOATIME_FL ;
if ( flags & S_DIRSYNC )
oi - > ip_attr | = OCFS2_DIRSYNC_FL ;
}
2009-03-06 16:29:10 +03:00
struct inode * ocfs2_ilookup ( struct super_block * sb , u64 blkno )
{
struct ocfs2_find_inode_args args ;
args . fi_blkno = blkno ;
args . fi_flags = 0 ;
args . fi_ino = ino_from_blkno ( sb , blkno ) ;
args . fi_sysfile_type = 0 ;
return ilookup5 ( sb , blkno , ocfs2_find_actor , & args ) ;
}
2008-01-11 02:11:45 +03:00
struct inode * ocfs2_iget ( struct ocfs2_super * osb , u64 blkno , unsigned flags ,
int sysfile_type )
2005-12-16 01:31:24 +03:00
{
struct inode * inode = NULL ;
struct super_block * sb = osb - > sb ;
struct ocfs2_find_inode_args args ;
2006-03-03 21:24:33 +03:00
mlog_entry ( " (blkno = %llu) \n " , ( unsigned long long ) blkno ) ;
2005-12-16 01:31:24 +03:00
/* Ok. By now we've either got the offsets passed to us by the
* caller , or we just pulled them off the bh . Lets do some
* sanity checks to make sure they ' re OK . */
if ( blkno = = 0 ) {
inode = ERR_PTR ( - EINVAL ) ;
mlog_errno ( PTR_ERR ( inode ) ) ;
goto bail ;
}
args . fi_blkno = blkno ;
2006-09-23 04:28:19 +04:00
args . fi_flags = flags ;
2005-12-16 01:31:24 +03:00
args . fi_ino = ino_from_blkno ( sb , blkno ) ;
2008-01-11 02:11:45 +03:00
args . fi_sysfile_type = sysfile_type ;
2005-12-16 01:31:24 +03:00
inode = iget5_locked ( sb , args . fi_ino , ocfs2_find_actor ,
ocfs2_init_locked_inode , & args ) ;
/* inode was *not* in the inode cache. 2.6.x requires
* us to do our own read_inode call and unlock it
* afterwards . */
if ( inode & & inode - > i_state & I_NEW ) {
mlog ( 0 , " Inode was not in inode cache, reading it. \n " ) ;
ocfs2_read_locked_inode ( inode , & args ) ;
unlock_new_inode ( inode ) ;
}
if ( inode = = NULL ) {
inode = ERR_PTR ( - ENOMEM ) ;
mlog_errno ( PTR_ERR ( inode ) ) ;
goto bail ;
}
if ( is_bad_inode ( inode ) ) {
iput ( inode ) ;
inode = ERR_PTR ( - ESTALE ) ;
goto bail ;
}
bail :
if ( ! IS_ERR ( inode ) ) {
2006-03-03 21:24:33 +03:00
mlog ( 0 , " returning inode with number %llu \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
mlog_exit_ptr ( inode ) ;
2007-01-04 04:06:59 +03:00
}
2005-12-16 01:31:24 +03:00
return inode ;
}
/*
* here ' s how inodes get read from disk :
* iget5_locked - > find_actor - > OCFS2_FIND_ACTOR
* found ? : return the in - memory inode
* not found ? : get_new_inode - > OCFS2_INIT_LOCKED_INODE
*/
static int ocfs2_find_actor ( struct inode * inode , void * opaque )
{
struct ocfs2_find_inode_args * args = NULL ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
int ret = 0 ;
mlog_entry ( " (0x%p, %lu, 0x%p) \n " , inode , inode - > i_ino , opaque ) ;
args = opaque ;
mlog_bug_on_msg ( ! inode , " No inode in find actor! \n " ) ;
if ( oi - > ip_blkno ! = args - > fi_blkno )
goto bail ;
ret = 1 ;
bail :
mlog_exit ( ret ) ;
return ret ;
}
/*
* initialize the new inode , but don ' t do anything that would cause
* us to sleep .
* return 0 on success , 1 on failure
*/
static int ocfs2_init_locked_inode ( struct inode * inode , void * opaque )
{
struct ocfs2_find_inode_args * args = opaque ;
2009-06-04 17:26:50 +04:00
static struct lock_class_key ocfs2_quota_ip_alloc_sem_key ,
ocfs2_file_ip_alloc_sem_key ;
2005-12-16 01:31:24 +03:00
mlog_entry ( " inode = %p, opaque = %p \n " , inode , opaque ) ;
inode - > i_ino = args - > fi_ino ;
OCFS2_I ( inode ) - > ip_blkno = args - > fi_blkno ;
2008-01-11 02:11:45 +03:00
if ( args - > fi_sysfile_type ! = 0 )
lockdep_set_class ( & inode - > i_mutex ,
& ocfs2_sysfile_lock_key [ args - > fi_sysfile_type ] ) ;
2009-06-04 17:26:50 +04:00
if ( args - > fi_sysfile_type = = USER_QUOTA_SYSTEM_INODE | |
args - > fi_sysfile_type = = GROUP_QUOTA_SYSTEM_INODE | |
args - > fi_sysfile_type = = LOCAL_USER_QUOTA_SYSTEM_INODE | |
args - > fi_sysfile_type = = LOCAL_GROUP_QUOTA_SYSTEM_INODE )
lockdep_set_class ( & OCFS2_I ( inode ) - > ip_alloc_sem ,
& ocfs2_quota_ip_alloc_sem_key ) ;
else
lockdep_set_class ( & OCFS2_I ( inode ) - > ip_alloc_sem ,
& ocfs2_file_ip_alloc_sem_key ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( 0 ) ;
return 0 ;
}
2008-11-14 01:49:11 +03:00
void ocfs2_populate_inode ( struct inode * inode , struct ocfs2_dinode * fe ,
int create_ino )
2005-12-16 01:31:24 +03:00
{
struct super_block * sb ;
struct ocfs2_super * osb ;
2008-07-22 01:29:16 +04:00
int use_plocks = 1 ;
2005-12-16 01:31:24 +03:00
2006-03-03 21:24:33 +03:00
mlog_entry ( " (0x%p, size:%llu) \n " , inode ,
2007-04-28 03:01:25 +04:00
( unsigned long long ) le64_to_cpu ( fe - > i_size ) ) ;
2005-12-16 01:31:24 +03:00
sb = inode - > i_sb ;
osb = OCFS2_SB ( sb ) ;
2008-07-22 01:29:16 +04:00
if ( ( osb - > s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS ) | |
ocfs2_mount_local ( osb ) | | ! ocfs2_stack_supports_plocks ( ) )
use_plocks = 0 ;
2008-11-14 01:49:11 +03:00
/*
* These have all been checked by ocfs2_read_inode_block ( ) or set
* by ocfs2_mknod_locked ( ) , so a failure is a code bug .
*/
BUG_ON ( ! OCFS2_IS_VALID_DINODE ( fe ) ) ; /* This means that read_inode
cannot create a superblock
inode today . change if
that is needed . */
BUG_ON ( ! ( fe - > i_flags & cpu_to_le32 ( OCFS2_VALID_FL ) ) ) ;
BUG_ON ( le32_to_cpu ( fe - > i_fs_generation ) ! = osb - > fs_generation ) ;
2005-12-16 01:31:24 +03:00
2007-03-23 02:53:23 +03:00
OCFS2_I ( inode ) - > ip_clusters = le32_to_cpu ( fe - > i_clusters ) ;
OCFS2_I ( inode ) - > ip_attr = le32_to_cpu ( fe - > i_attr ) ;
2007-09-08 00:58:15 +04:00
OCFS2_I ( inode ) - > ip_dyn_features = le16_to_cpu ( fe - > i_dyn_features ) ;
2007-03-23 02:53:23 +03:00
2005-12-16 01:31:24 +03:00
inode - > i_version = 1 ;
inode - > i_generation = le32_to_cpu ( fe - > i_generation ) ;
inode - > i_rdev = huge_decode_dev ( le64_to_cpu ( fe - > id1 . dev1 . i_rdev ) ) ;
inode - > i_mode = le16_to_cpu ( fe - > i_mode ) ;
inode - > i_uid = le32_to_cpu ( fe - > i_uid ) ;
inode - > i_gid = le32_to_cpu ( fe - > i_gid ) ;
/* Fast symlinks will have i_size but no allocated clusters. */
if ( S_ISLNK ( inode - > i_mode ) & & ! fe - > i_clusters )
inode - > i_blocks = 0 ;
else
2007-03-23 02:53:23 +03:00
inode - > i_blocks = ocfs2_inode_sector_count ( inode ) ;
2005-12-16 01:31:24 +03:00
inode - > i_mapping - > a_ops = & ocfs2_aops ;
inode - > i_atime . tv_sec = le64_to_cpu ( fe - > i_atime ) ;
inode - > i_atime . tv_nsec = le32_to_cpu ( fe - > i_atime_nsec ) ;
inode - > i_mtime . tv_sec = le64_to_cpu ( fe - > i_mtime ) ;
inode - > i_mtime . tv_nsec = le32_to_cpu ( fe - > i_mtime_nsec ) ;
inode - > i_ctime . tv_sec = le64_to_cpu ( fe - > i_ctime ) ;
inode - > i_ctime . tv_nsec = le32_to_cpu ( fe - > i_ctime_nsec ) ;
if ( OCFS2_I ( inode ) - > ip_blkno ! = le64_to_cpu ( fe - > i_blkno ) )
mlog ( ML_ERROR ,
2006-03-03 21:24:33 +03:00
" ip_blkno %llu != i_blkno %llu! \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
2007-04-28 03:01:25 +04:00
( unsigned long long ) le64_to_cpu ( fe - > i_blkno ) ) ;
2005-12-16 01:31:24 +03:00
2008-11-21 04:54:57 +03:00
inode - > i_nlink = ocfs2_read_links_count ( fe ) ;
2005-12-16 01:31:24 +03:00
2008-08-21 20:22:30 +04:00
if ( fe - > i_flags & cpu_to_le32 ( OCFS2_SYSTEM_FL ) ) {
2006-09-23 04:28:19 +04:00
OCFS2_I ( inode ) - > ip_flags | = OCFS2_INODE_SYSTEM_FILE ;
2008-08-21 20:22:30 +04:00
inode - > i_flags | = S_NOQUOTA ;
}
2006-09-23 04:28:19 +04:00
2005-12-16 01:31:24 +03:00
if ( fe - > i_flags & cpu_to_le32 ( OCFS2_LOCAL_ALLOC_FL ) ) {
OCFS2_I ( inode ) - > ip_flags | = OCFS2_INODE_BITMAP ;
mlog ( 0 , " local alloc inode: i_ino=%lu \n " , inode - > i_ino ) ;
} else if ( fe - > i_flags & cpu_to_le32 ( OCFS2_BITMAP_FL ) ) {
OCFS2_I ( inode ) - > ip_flags | = OCFS2_INODE_BITMAP ;
2008-08-20 17:43:36 +04:00
} else if ( fe - > i_flags & cpu_to_le32 ( OCFS2_QUOTA_FL ) ) {
inode - > i_flags | = S_NOQUOTA ;
2005-12-16 01:31:24 +03:00
} else if ( fe - > i_flags & cpu_to_le32 ( OCFS2_SUPER_BLOCK_FL ) ) {
mlog ( 0 , " superblock inode: i_ino=%lu \n " , inode - > i_ino ) ;
/* we can't actually hit this as read_inode can't
* handle superblocks today ; - ) */
BUG ( ) ;
}
switch ( inode - > i_mode & S_IFMT ) {
case S_IFREG :
2008-07-22 01:29:16 +04:00
if ( use_plocks )
inode - > i_fop = & ocfs2_fops ;
else
inode - > i_fop = & ocfs2_fops_no_plocks ;
2005-12-16 01:31:24 +03:00
inode - > i_op = & ocfs2_file_iops ;
i_size_write ( inode , le64_to_cpu ( fe - > i_size ) ) ;
break ;
case S_IFDIR :
inode - > i_op = & ocfs2_dir_iops ;
2008-07-22 01:29:16 +04:00
if ( use_plocks )
inode - > i_fop = & ocfs2_dops ;
else
inode - > i_fop = & ocfs2_dops_no_plocks ;
2005-12-16 01:31:24 +03:00
i_size_write ( inode , le64_to_cpu ( fe - > i_size ) ) ;
break ;
case S_IFLNK :
if ( ocfs2_inode_is_fast_symlink ( inode ) )
inode - > i_op = & ocfs2_fast_symlink_inode_operations ;
else
inode - > i_op = & ocfs2_symlink_inode_operations ;
i_size_write ( inode , le64_to_cpu ( fe - > i_size ) ) ;
break ;
default :
inode - > i_op = & ocfs2_special_file_iops ;
init_special_inode ( inode , inode - > i_mode ,
inode - > i_rdev ) ;
break ;
}
2006-09-23 04:28:19 +04:00
if ( create_ino ) {
inode - > i_ino = ino_from_blkno ( inode - > i_sb ,
le64_to_cpu ( fe - > i_blkno ) ) ;
/*
* If we ever want to create system files from kernel ,
* the generation argument to
* ocfs2_inode_lock_res_init ( ) will have to change .
*/
2007-04-28 03:01:25 +04:00
BUG_ON ( le32_to_cpu ( fe - > i_flags ) & OCFS2_SYSTEM_FL ) ;
2006-09-23 04:28:19 +04:00
2007-10-19 02:30:42 +04:00
ocfs2_inode_lock_res_init ( & OCFS2_I ( inode ) - > ip_inode_lockres ,
2006-09-23 04:28:19 +04:00
OCFS2_LOCK_TYPE_META , 0 , inode ) ;
2007-03-21 02:01:38 +03:00
ocfs2_inode_lock_res_init ( & OCFS2_I ( inode ) - > ip_open_lockres ,
OCFS2_LOCK_TYPE_OPEN , 0 , inode ) ;
2006-09-23 04:28:19 +04:00
}
2005-12-16 01:31:24 +03:00
ocfs2_inode_lock_res_init ( & OCFS2_I ( inode ) - > ip_rw_lockres ,
2006-09-23 04:28:19 +04:00
OCFS2_LOCK_TYPE_RW , inode - > i_generation ,
inode ) ;
2006-07-04 04:27:12 +04:00
ocfs2_set_inode_flags ( inode ) ;
2009-02-24 19:53:23 +03:00
OCFS2_I ( inode ) - > ip_last_used_slot = 0 ;
OCFS2_I ( inode ) - > ip_last_used_group = 0 ;
2008-11-14 01:49:11 +03:00
mlog_exit_void ( ) ;
2005-12-16 01:31:24 +03:00
}
static int ocfs2_read_locked_inode ( struct inode * inode ,
struct ocfs2_find_inode_args * args )
{
struct super_block * sb ;
struct ocfs2_super * osb ;
struct ocfs2_dinode * fe ;
struct buffer_head * bh = NULL ;
2006-09-23 04:28:19 +04:00
int status , can_lock ;
u32 generation = 0 ;
2005-12-16 01:31:24 +03:00
mlog_entry ( " (0x%p, 0x%p) \n " , inode , args ) ;
status = - EINVAL ;
if ( inode = = NULL | | inode - > i_sb = = NULL ) {
mlog ( ML_ERROR , " bad inode \n " ) ;
2006-09-23 04:28:19 +04:00
return status ;
2005-12-16 01:31:24 +03:00
}
sb = inode - > i_sb ;
osb = OCFS2_SB ( sb ) ;
if ( ! args ) {
mlog ( ML_ERROR , " bad inode args \n " ) ;
make_bad_inode ( inode ) ;
2006-09-23 04:28:19 +04:00
return status ;
}
/*
* To improve performance of cold - cache inode stats , we take
* the cluster lock here if possible .
*
* Generally , OCFS2 never trusts the contents of an inode
* unless it ' s holding a cluster lock , so taking it here isn ' t
* a correctness issue as much as it is a performance
* improvement .
*
* There are three times when taking the lock is not a good idea :
*
* 1 ) During startup , before we have initialized the DLM .
*
* 2 ) If we are reading certain system files which never get
* cluster locks ( local alloc , truncate log ) .
*
* 3 ) If the process doing the iget ( ) is responsible for
* orphan dir recovery . We ' re holding the orphan dir lock and
* can get into a deadlock with another process on another
* node in - > delete_inode ( ) .
*
* # 1 and # 2 can be simply solved by never taking the lock
* here for system files ( which are the only type we read
* during mount ) . It ' s a heavier approach , but our main
* concern is user - accesible files anyway .
*
* # 3 works itself out because we ' ll eventually take the
* cluster lock before trusting anything anyway .
*/
can_lock = ! ( args - > fi_flags & OCFS2_FI_FLAG_SYSFILE )
2007-03-21 02:01:38 +03:00
& & ! ( args - > fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY )
2006-12-06 04:56:35 +03:00
& & ! ocfs2_mount_local ( osb ) ;
2006-09-23 04:28:19 +04:00
/*
* To maintain backwards compatibility with older versions of
* ocfs2 - tools , we still store the generation value for system
* files . The only ones that actually matter to userspace are
* the journals , but it ' s easier and inexpensive to just flag
* all system files similarly .
*/
if ( args - > fi_flags & OCFS2_FI_FLAG_SYSFILE )
generation = osb - > fs_generation ;
2007-10-19 02:30:42 +04:00
ocfs2_inode_lock_res_init ( & OCFS2_I ( inode ) - > ip_inode_lockres ,
2006-09-23 04:28:19 +04:00
OCFS2_LOCK_TYPE_META ,
generation , inode ) ;
2007-03-21 02:01:38 +03:00
ocfs2_inode_lock_res_init ( & OCFS2_I ( inode ) - > ip_open_lockres ,
OCFS2_LOCK_TYPE_OPEN ,
0 , inode ) ;
2006-09-23 04:28:19 +04:00
if ( can_lock ) {
2007-03-21 02:01:38 +03:00
status = ocfs2_open_lock ( inode ) ;
if ( status ) {
make_bad_inode ( inode ) ;
mlog_errno ( status ) ;
return status ;
}
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( inode , NULL , 0 ) ;
2006-09-23 04:28:19 +04:00
if ( status ) {
make_bad_inode ( inode ) ;
mlog_errno ( status ) ;
return status ;
}
2005-12-16 01:31:24 +03:00
}
2007-03-21 02:01:38 +03:00
if ( args - > fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY ) {
status = ocfs2_try_open_lock ( inode , 0 ) ;
if ( status ) {
2010-01-26 03:57:38 +03:00
make_bad_inode ( inode ) ;
2007-03-21 02:01:38 +03:00
return status ;
}
}
2008-11-14 01:49:11 +03:00
if ( can_lock ) {
status = ocfs2_read_inode_block_full ( inode , & bh ,
OCFS2_BH_IGNORE_CACHE ) ;
} else {
2008-10-10 04:20:29 +04:00
status = ocfs2_read_blocks_sync ( osb , args - > fi_blkno , 1 , & bh ) ;
2008-11-14 01:49:11 +03:00
if ( ! status )
status = ocfs2_validate_inode_block ( osb - > sb , bh ) ;
}
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
2006-09-23 04:28:19 +04:00
status = - EINVAL ;
2005-12-16 01:31:24 +03:00
fe = ( struct ocfs2_dinode * ) bh - > b_data ;
2006-09-23 04:28:19 +04:00
/*
* This is a code bug . Right now the caller needs to
* understand whether it is asking for a system file inode or
* not so the proper lock names can be built .
*/
mlog_bug_on_msg ( ! ! ( fe - > i_flags & cpu_to_le32 ( OCFS2_SYSTEM_FL ) ) ! =
! ! ( args - > fi_flags & OCFS2_FI_FLAG_SYSFILE ) ,
" Inode %llu: system file state is ambigous \n " ,
( unsigned long long ) args - > fi_blkno ) ;
2005-12-16 01:31:24 +03:00
if ( S_ISCHR ( le16_to_cpu ( fe - > i_mode ) ) | |
S_ISBLK ( le16_to_cpu ( fe - > i_mode ) ) )
2008-11-14 01:49:11 +03:00
inode - > i_rdev = huge_decode_dev ( le64_to_cpu ( fe - > id1 . dev1 . i_rdev ) ) ;
2005-12-16 01:31:24 +03:00
2008-11-14 01:49:11 +03:00
ocfs2_populate_inode ( inode , fe , 0 ) ;
2005-12-16 01:31:24 +03:00
BUG_ON ( args - > fi_blkno ! = le64_to_cpu ( fe - > i_blkno ) ) ;
status = 0 ;
bail :
2006-09-23 04:28:19 +04:00
if ( can_lock )
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 0 ) ;
2006-09-23 04:28:19 +04:00
if ( status < 0 )
make_bad_inode ( inode ) ;
2005-12-16 01:31:24 +03:00
if ( args & & bh )
brelse ( bh ) ;
mlog_exit ( status ) ;
return status ;
}
void ocfs2_sync_blockdev ( struct super_block * sb )
{
sync_blockdev ( sb - > s_bdev ) ;
}
static int ocfs2_truncate_for_delete ( struct ocfs2_super * osb ,
struct inode * inode ,
struct buffer_head * fe_bh )
{
int status = 0 ;
struct ocfs2_truncate_context * tc = NULL ;
struct ocfs2_dinode * fe ;
2007-02-16 22:46:50 +03:00
handle_t * handle = NULL ;
2005-12-16 01:31:24 +03:00
mlog_entry_void ( ) ;
fe = ( struct ocfs2_dinode * ) fe_bh - > b_data ;
2007-09-08 01:46:51 +04:00
/*
* This check will also skip truncate of inodes with inline
* data and fast symlinks .
*/
2007-01-17 23:53:31 +03:00
if ( fe - > i_clusters ) {
2008-09-04 07:03:41 +04:00
if ( ocfs2_should_order_data ( inode ) )
ocfs2_begin_ordered_truncate ( inode , 0 ) ;
2007-02-16 22:46:50 +03:00
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
mlog_errno ( status ) ;
goto out ;
}
2009-02-13 03:41:25 +03:00
status = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) ,
fe_bh ,
2008-10-18 06:25:01 +04:00
OCFS2_JOURNAL_ACCESS_WRITE ) ;
2007-02-16 22:46:50 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto out ;
}
i_size_write ( inode , 0 ) ;
status = ocfs2_mark_inode_dirty ( handle , inode , fe_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto out ;
}
ocfs2_commit_trans ( osb , handle ) ;
handle = NULL ;
2007-01-17 23:53:31 +03:00
status = ocfs2_prepare_truncate ( osb , inode , fe_bh , & tc ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto out ;
}
2005-12-16 01:31:24 +03:00
2007-01-17 23:53:31 +03:00
status = ocfs2_commit_truncate ( osb , inode , fe_bh , tc ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto out ;
}
2005-12-16 01:31:24 +03:00
}
2007-02-16 22:46:50 +03:00
out :
if ( handle )
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( status ) ;
return status ;
}
static int ocfs2_remove_inode ( struct inode * inode ,
struct buffer_head * di_bh ,
struct inode * orphan_dir_inode ,
struct buffer_head * orphan_dir_bh )
{
int status ;
struct inode * inode_alloc_inode = NULL ;
struct buffer_head * inode_alloc_bh = NULL ;
2006-10-10 05:11:45 +04:00
handle_t * handle ;
2005-12-16 01:31:24 +03:00
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
inode_alloc_inode =
ocfs2_get_system_file_inode ( osb , INODE_ALLOC_SYSTEM_INODE ,
le16_to_cpu ( di - > i_suballoc_slot ) ) ;
if ( ! inode_alloc_inode ) {
status = - EEXIST ;
mlog_errno ( status ) ;
goto bail ;
}
2006-01-10 02:59:24 +03:00
mutex_lock ( & inode_alloc_inode - > i_mutex ) ;
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( inode_alloc_inode , & inode_alloc_bh , 1 ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode_alloc_inode - > i_mutex ) ;
2005-12-16 01:31:24 +03:00
mlog_errno ( status ) ;
goto bail ;
}
2008-10-09 21:38:40 +04:00
handle = ocfs2_start_trans ( osb , OCFS2_DELETE_INODE_CREDITS +
2008-11-13 03:27:44 +03:00
ocfs2_quota_trans_credits ( inode - > i_sb ) ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
mlog_errno ( status ) ;
goto bail_unlock ;
}
status = ocfs2_orphan_del ( osb , handle , orphan_dir_inode , inode ,
orphan_dir_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_commit ;
}
/* set the inodes dtime */
2009-02-13 03:41:25 +03:00
status = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) , di_bh ,
2008-10-18 06:25:01 +04:00
OCFS2_JOURNAL_ACCESS_WRITE ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_commit ;
}
di - > i_dtime = cpu_to_le64 ( CURRENT_TIME . tv_sec ) ;
2007-12-25 17:52:59 +03:00
di - > i_flags & = cpu_to_le32 ( ~ ( OCFS2_VALID_FL | OCFS2_ORPHANED_FL ) ) ;
2005-12-16 01:31:24 +03:00
status = ocfs2_journal_dirty ( handle , di_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_commit ;
}
2009-02-11 07:00:41 +03:00
ocfs2_remove_from_cache ( INODE_CACHE ( inode ) , di_bh ) ;
2010-03-03 17:05:01 +03:00
dquot_free_inode ( inode ) ;
2005-12-16 01:31:24 +03:00
status = ocfs2_free_dinode ( handle , inode_alloc_inode ,
inode_alloc_bh , di ) ;
if ( status < 0 )
mlog_errno ( status ) ;
bail_commit :
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
bail_unlock :
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode_alloc_inode , 1 ) ;
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode_alloc_inode - > i_mutex ) ;
2005-12-16 01:31:24 +03:00
brelse ( inode_alloc_bh ) ;
bail :
iput ( inode_alloc_inode ) ;
return status ;
}
2010-01-26 03:57:38 +03:00
/*
2006-02-23 04:35:08 +03:00
* Serialize with orphan dir recovery . If the process doing
* recovery on this orphan dir does an iget ( ) with the dir
* i_mutex held , we ' ll deadlock here . Instead we detect this
* and exit early - recovery will wipe this inode for us .
*/
static int ocfs2_check_orphan_recovery_state ( struct ocfs2_super * osb ,
int slot )
{
int ret = 0 ;
spin_lock ( & osb - > osb_lock ) ;
if ( ocfs2_node_map_test_bit ( osb , & osb - > osb_recovering_orphan_dirs , slot ) ) {
mlog ( 0 , " Recovery is happening on orphan dir %d, will skip "
" this inode \n " , slot ) ;
ret = - EDEADLK ;
goto out ;
}
/* This signals to the orphan recovery process that it should
* wait for us to handle the wipe . */
osb - > osb_orphan_wipes [ slot ] + + ;
out :
spin_unlock ( & osb - > osb_lock ) ;
return ret ;
}
static void ocfs2_signal_wipe_completion ( struct ocfs2_super * osb ,
int slot )
{
spin_lock ( & osb - > osb_lock ) ;
osb - > osb_orphan_wipes [ slot ] - - ;
spin_unlock ( & osb - > osb_lock ) ;
wake_up ( & osb - > osb_wipe_event ) ;
}
2005-12-16 01:31:24 +03:00
static int ocfs2_wipe_inode ( struct inode * inode ,
struct buffer_head * di_bh )
{
int status , orphaned_slot ;
struct inode * orphan_dir_inode = NULL ;
struct buffer_head * orphan_dir_bh = NULL ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
2007-03-21 02:01:38 +03:00
struct ocfs2_dinode * di ;
2005-12-16 01:31:24 +03:00
2007-03-21 02:01:38 +03:00
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
orphaned_slot = le16_to_cpu ( di - > i_orphaned_slot ) ;
2006-02-23 04:35:08 +03:00
status = ocfs2_check_orphan_recovery_state ( osb , orphaned_slot ) ;
if ( status )
return status ;
2005-12-16 01:31:24 +03:00
orphan_dir_inode = ocfs2_get_system_file_inode ( osb ,
ORPHAN_DIR_SYSTEM_INODE ,
orphaned_slot ) ;
if ( ! orphan_dir_inode ) {
status = - EEXIST ;
mlog_errno ( status ) ;
goto bail ;
}
/* Lock the orphan dir. The lock will be held for the entire
* delete_inode operation . We do this now to avoid races with
* recovery completion on other nodes . */
2006-01-10 02:59:24 +03:00
mutex_lock ( & orphan_dir_inode - > i_mutex ) ;
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( orphan_dir_inode , & orphan_dir_bh , 1 ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
2006-01-10 02:59:24 +03:00
mutex_unlock ( & orphan_dir_inode - > i_mutex ) ;
2005-12-16 01:31:24 +03:00
mlog_errno ( status ) ;
goto bail ;
}
/* we do this while holding the orphan dir lock because we
2007-09-25 02:56:19 +04:00
* don ' t want recovery being run from another node to try an
* inode delete underneath us - - this will result in two nodes
2005-12-16 01:31:24 +03:00
* truncating the same file ! */
status = ocfs2_truncate_for_delete ( osb , inode , di_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_unlock_dir ;
}
2008-11-13 03:27:44 +03:00
/* Remove any dir index tree */
if ( S_ISDIR ( inode - > i_mode ) ) {
status = ocfs2_dx_dir_truncate ( inode , di_bh ) ;
if ( status ) {
mlog_errno ( status ) ;
goto bail_unlock_dir ;
}
}
2008-08-18 13:11:00 +04:00
/*Free extended attribute resources associated with this inode.*/
status = ocfs2_xattr_remove ( inode , di_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_unlock_dir ;
}
2009-08-18 07:43:49 +04:00
status = ocfs2_remove_refcount_tree ( inode , di_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_unlock_dir ;
}
2005-12-16 01:31:24 +03:00
status = ocfs2_remove_inode ( inode , di_bh , orphan_dir_inode ,
orphan_dir_bh ) ;
if ( status < 0 )
mlog_errno ( status ) ;
bail_unlock_dir :
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( orphan_dir_inode , 1 ) ;
2006-01-10 02:59:24 +03:00
mutex_unlock ( & orphan_dir_inode - > i_mutex ) ;
2005-12-16 01:31:24 +03:00
brelse ( orphan_dir_bh ) ;
bail :
iput ( orphan_dir_inode ) ;
2006-02-23 04:35:08 +03:00
ocfs2_signal_wipe_completion ( osb , orphaned_slot ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
/* There is a series of simple checks that should be done before a
2007-09-25 02:56:19 +04:00
* trylock is even considered . Encapsulate those in this function . */
2005-12-16 01:31:24 +03:00
static int ocfs2_inode_is_valid_to_delete ( struct inode * inode )
{
int ret = 0 ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
/* We shouldn't be getting here for the root directory
* inode . . */
if ( inode = = osb - > root_inode ) {
mlog ( ML_ERROR , " Skipping delete of root inode. \n " ) ;
goto bail ;
}
2007-09-25 02:56:19 +04:00
/* If we're coming from downconvert_thread we can't go into our own
2005-12-16 01:31:24 +03:00
* voting [ hello , deadlock city ! ] , so unforuntately we just
* have to skip deleting this guy . That ' s OK though because
* the node who ' s doing the actual deleting should handle it
* anyway . */
2007-09-25 02:56:19 +04:00
if ( current = = osb - > dc_task ) {
2005-12-16 01:31:24 +03:00
mlog ( 0 , " Skipping delete of %lu because we're currently "
2007-09-25 02:56:19 +04:00
" in downconvert \n " , inode - > i_ino ) ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
spin_lock ( & oi - > ip_lock ) ;
/* OCFS2 *never* deletes system files. This should technically
* never get here as system file inodes should always have a
* positive link count . */
if ( oi - > ip_flags & OCFS2_INODE_SYSTEM_FILE ) {
2006-03-03 21:24:33 +03:00
mlog ( ML_ERROR , " Skipping delete of system file %llu \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
goto bail_unlock ;
}
2007-09-25 02:56:19 +04:00
/* If we have allowd wipe of this inode for another node, it
* will be marked here so we can safely skip it . Recovery will
* cleanup any inodes we might inadvertantly skip here . */
2005-12-16 01:31:24 +03:00
if ( oi - > ip_flags & OCFS2_INODE_SKIP_DELETE ) {
mlog ( 0 , " Skipping delete of %lu because another node "
" has done this for us. \n " , inode - > i_ino ) ;
goto bail_unlock ;
}
ret = 1 ;
bail_unlock :
spin_unlock ( & oi - > ip_lock ) ;
bail :
return ret ;
}
/* Query the cluster to determine whether we should wipe an inode from
* disk or not .
*
* Requires the inode to have the cluster lock . */
static int ocfs2_query_inode_wipe ( struct inode * inode ,
struct buffer_head * di_bh ,
int * wipe )
{
int status = 0 ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_dinode * di ;
* wipe = 0 ;
/* While we were waiting for the cluster lock in
* ocfs2_delete_inode , another node might have asked to delete
* the inode . Recheck our flags to catch this . */
if ( ! ocfs2_inode_is_valid_to_delete ( inode ) ) {
2006-03-03 21:24:33 +03:00
mlog ( 0 , " Skipping delete of %llu because flags changed \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
/* Now that we have an up to date inode, we can double check
* the link count . */
if ( inode - > i_nlink ) {
2006-03-03 21:24:33 +03:00
mlog ( 0 , " Skipping delete of %llu because nlink = %u \n " ,
( unsigned long long ) oi - > ip_blkno , inode - > i_nlink ) ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
/* Do some basic inode verification... */
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
if ( ! ( di - > i_flags & cpu_to_le32 ( OCFS2_ORPHANED_FL ) ) ) {
/* for lack of a better error? */
status = - EEXIST ;
mlog ( ML_ERROR ,
2006-03-03 21:24:33 +03:00
" Inode %llu (on-disk %llu) not orphaned! "
2005-12-16 01:31:24 +03:00
" Disk flags 0x%x, inode flags 0x%x \n " ,
2006-03-03 21:24:33 +03:00
( unsigned long long ) oi - > ip_blkno ,
2007-04-28 03:01:25 +04:00
( unsigned long long ) le64_to_cpu ( di - > i_blkno ) ,
le32_to_cpu ( di - > i_flags ) , oi - > ip_flags ) ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
/* has someone already deleted us?! baaad... */
if ( di - > i_dtime ) {
status = - EEXIST ;
mlog_errno ( status ) ;
goto bail ;
}
2007-03-21 03:17:54 +03:00
/*
* This is how ocfs2 determines whether an inode is still live
* within the cluster . Every node takes a shared read lock on
* the inode open lock in ocfs2_read_locked_inode ( ) . When we
* get to - > delete_inode ( ) , each node tries to convert it ' s
* lock to an exclusive . Trylocks are serialized by the inode
* meta data lock . If the upconvert suceeds , we know the inode
* is no longer live and can be deleted .
*
* Though we call this with the meta data lock held , the
* trylock keeps us from ABBA deadlock .
*/
status = ocfs2_try_open_lock ( inode , 1 ) ;
2007-03-21 02:01:38 +03:00
if ( status = = - EAGAIN ) {
2005-12-16 01:31:24 +03:00
status = 0 ;
2007-11-20 04:53:34 +03:00
mlog ( 0 , " Skipping delete of %llu because it is in use on "
2006-03-03 21:24:33 +03:00
" other nodes \n " , ( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
2007-03-21 02:01:38 +03:00
* wipe = 1 ;
mlog ( 0 , " Inode %llu is ok to wipe from orphan dir %u \n " ,
( unsigned long long ) oi - > ip_blkno ,
le16_to_cpu ( di - > i_orphaned_slot ) ) ;
2005-12-16 01:31:24 +03:00
bail :
return status ;
}
/* Support function for ocfs2_delete_inode. Will help us keep the
* inode data in a consistent state for clear_inode . Always truncates
* pages , optionally sync ' s them first . */
static void ocfs2_cleanup_delete_inode ( struct inode * inode ,
int sync_data )
{
2006-03-03 21:24:33 +03:00
mlog ( 0 , " Cleanup inode %llu, sync = %d \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno , sync_data ) ;
2005-12-16 01:31:24 +03:00
if ( sync_data )
write_inode_now ( inode , 1 ) ;
truncate_inode_pages ( & inode - > i_data , 0 ) ;
}
void ocfs2_delete_inode ( struct inode * inode )
{
int wipe , status ;
sigset_t blocked , oldset ;
struct buffer_head * di_bh = NULL ;
mlog_entry ( " (inode->i_ino = %lu) \n " , inode - > i_ino ) ;
2008-10-09 21:38:40 +04:00
/* When we fail in read_inode() we mark inode as bad. The second test
* catches the case when inode allocation fails before allocating
* a block for inode . */
if ( is_bad_inode ( inode ) | | ! OCFS2_I ( inode ) - > ip_blkno ) {
2005-12-16 01:31:24 +03:00
mlog ( 0 , " Skipping delete of bad inode \n " ) ;
goto bail ;
}
2010-03-03 17:05:06 +03:00
vfs_dq_init ( inode ) ;
2005-12-16 01:31:24 +03:00
if ( ! ocfs2_inode_is_valid_to_delete ( inode ) ) {
/* It's probably not necessary to truncate_inode_pages
* here but we do it for safety anyway ( it will most
* likely be a no - op anyway ) */
ocfs2_cleanup_delete_inode ( inode , 0 ) ;
goto bail ;
}
/* We want to block signals in delete_inode as the lock and
* messaging paths may return us - ERESTARTSYS . Which would
* cause us to exit early , resulting in inodes being orphaned
* forever . */
sigfillset ( & blocked ) ;
status = sigprocmask ( SIG_BLOCK , & blocked , & oldset ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
ocfs2_cleanup_delete_inode ( inode , 1 ) ;
goto bail ;
}
2009-03-06 16:29:10 +03:00
/*
* Synchronize us against ocfs2_get_dentry . We take this in
* shared mode so that all nodes can still concurrently
* process deletes .
*/
status = ocfs2_nfs_sync_lock ( OCFS2_SB ( inode - > i_sb ) , 0 ) ;
if ( status < 0 ) {
mlog ( ML_ERROR , " getting nfs sync lock(PR) failed %d \n " , status ) ;
ocfs2_cleanup_delete_inode ( inode , 0 ) ;
goto bail_unblock ;
}
2005-12-16 01:31:24 +03:00
/* Lock down the inode. This gives us an up to date view of
* it ' s metadata ( for verification ) , and allows us to
2007-09-25 02:56:19 +04:00
* serialize delete_inode on multiple nodes .
2005-12-16 01:31:24 +03:00
*
* Even though we might be doing a truncate , we don ' t take the
* allocation lock here as it won ' t be needed - nobody will
* have the file open .
*/
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( inode , & di_bh , 1 ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOENT )
mlog_errno ( status ) ;
ocfs2_cleanup_delete_inode ( inode , 0 ) ;
2009-03-06 16:29:10 +03:00
goto bail_unlock_nfs_sync ;
2005-12-16 01:31:24 +03:00
}
/* Query the cluster. This will be the final decision made
* before we go ahead and wipe the inode . */
status = ocfs2_query_inode_wipe ( inode , di_bh , & wipe ) ;
if ( ! wipe | | status < 0 ) {
2007-09-25 02:56:19 +04:00
/* Error and remote inode busy both mean we won't be
2005-12-16 01:31:24 +03:00
* removing the inode , so they take almost the same
* path . */
if ( status < 0 )
mlog_errno ( status ) ;
2007-09-25 02:56:19 +04:00
/* Someone in the cluster has disallowed a wipe of
* this inode , or it was never completely
* orphaned . Write out the pages and exit now . */
2005-12-16 01:31:24 +03:00
ocfs2_cleanup_delete_inode ( inode , 1 ) ;
goto bail_unlock_inode ;
}
ocfs2_cleanup_delete_inode ( inode , 0 ) ;
status = ocfs2_wipe_inode ( inode , di_bh ) ;
if ( status < 0 ) {
2006-02-23 04:35:08 +03:00
if ( status ! = - EDEADLK )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
goto bail_unlock_inode ;
}
2006-09-23 04:28:19 +04:00
/*
* Mark the inode as successfully deleted .
*
* This is important for ocfs2_clear_inode ( ) as it will check
* this flag and skip any checkpointing work
*
* ocfs2_stuff_meta_lvb ( ) also uses this flag to invalidate
* the LVB for other nodes .
*/
2005-12-16 01:31:24 +03:00
OCFS2_I ( inode ) - > ip_flags | = OCFS2_INODE_DELETED ;
bail_unlock_inode :
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 1 ) ;
2005-12-16 01:31:24 +03:00
brelse ( di_bh ) ;
2009-03-06 16:29:10 +03:00
bail_unlock_nfs_sync :
ocfs2_nfs_sync_unlock ( OCFS2_SB ( inode - > i_sb ) , 0 ) ;
2005-12-16 01:31:24 +03:00
bail_unblock :
status = sigprocmask ( SIG_SETMASK , & oldset , NULL ) ;
if ( status < 0 )
mlog_errno ( status ) ;
bail :
clear_inode ( inode ) ;
mlog_exit_void ( ) ;
}
void ocfs2_clear_inode ( struct inode * inode )
{
int status ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
mlog_entry_void ( ) ;
if ( ! inode )
goto bail ;
2006-03-03 21:24:33 +03:00
mlog ( 0 , " Clearing inode: %llu, nlink = %u \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno , inode - > i_nlink ) ;
2005-12-16 01:31:24 +03:00
mlog_bug_on_msg ( OCFS2_SB ( inode - > i_sb ) = = NULL ,
" Inode=%lu \n " , inode - > i_ino ) ;
2010-03-03 17:05:05 +03:00
dquot_drop ( inode ) ;
2010-03-03 17:05:04 +03:00
2007-09-25 02:56:19 +04:00
/* To preven remote deletes we hold open lock before, now it
* is time to unlock PR and EX open locks . */
2007-03-21 02:01:38 +03:00
ocfs2_open_unlock ( inode ) ;
2005-12-16 01:31:24 +03:00
/* Do these before all the other work so that we don't bounce
2007-09-25 02:56:19 +04:00
* the downconvert thread while waiting to destroy the locks . */
2005-12-16 01:31:24 +03:00
ocfs2_mark_lockres_freeing ( & oi - > ip_rw_lockres ) ;
2007-10-19 02:30:42 +04:00
ocfs2_mark_lockres_freeing ( & oi - > ip_inode_lockres ) ;
2007-03-21 02:01:38 +03:00
ocfs2_mark_lockres_freeing ( & oi - > ip_open_lockres ) ;
2005-12-16 01:31:24 +03:00
/* We very well may get a clear_inode before all an inodes
* metadata has hit disk . Of course , we can ' t drop any cluster
* locks until the journal has finished with it . The only
* exception here are successfully wiped inodes - their
* metadata can now be considered to be part of the system
* inodes from which it came . */
if ( ! ( OCFS2_I ( inode ) - > ip_flags & OCFS2_INODE_DELETED ) )
ocfs2_checkpoint_inode ( inode ) ;
mlog_bug_on_msg ( ! list_empty ( & oi - > ip_io_markers ) ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu, inode has io markers \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
2007-04-24 05:53:12 +04:00
ocfs2_extent_map_trunc ( inode , 0 ) ;
2005-12-16 01:31:24 +03:00
status = ocfs2_drop_inode_locks ( inode ) ;
if ( status < 0 )
mlog_errno ( status ) ;
ocfs2_lock_res_free ( & oi - > ip_rw_lockres ) ;
2007-10-19 02:30:42 +04:00
ocfs2_lock_res_free ( & oi - > ip_inode_lockres ) ;
2007-03-21 02:01:38 +03:00
ocfs2_lock_res_free ( & oi - > ip_open_lockres ) ;
2005-12-16 01:31:24 +03:00
2009-02-13 02:24:40 +03:00
ocfs2_metadata_cache_exit ( INODE_CACHE ( inode ) ) ;
2005-12-16 01:31:24 +03:00
2009-02-11 07:00:41 +03:00
mlog_bug_on_msg ( INODE_CACHE ( inode ) - > ci_num_cached ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu, inode has %u cache items \n " ,
2009-02-11 07:00:41 +03:00
( unsigned long long ) oi - > ip_blkno ,
INODE_CACHE ( inode ) - > ci_num_cached ) ;
2005-12-16 01:31:24 +03:00
2009-02-11 07:00:41 +03:00
mlog_bug_on_msg ( ! ( INODE_CACHE ( inode ) - > ci_flags & OCFS2_CACHE_FL_INLINE ) ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu, inode has a bad flag \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
mlog_bug_on_msg ( spin_is_locked ( & oi - > ip_lock ) ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu, inode is locked \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
2006-01-11 02:41:43 +03:00
mlog_bug_on_msg ( ! mutex_trylock ( & oi - > ip_io_mutex ) ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu, io_mutex is locked \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2006-01-11 02:41:43 +03:00
mutex_unlock ( & oi - > ip_io_mutex ) ;
2005-12-16 01:31:24 +03:00
/*
* down_trylock ( ) returns 0 , down_write_trylock ( ) returns 1
* kernel 1 , world 0
*/
mlog_bug_on_msg ( ! down_write_trylock ( & oi - > ip_alloc_sem ) ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu, alloc_sem is locked \n " ,
( unsigned long long ) oi - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
up_write ( & oi - > ip_alloc_sem ) ;
mlog_bug_on_msg ( oi - > ip_open_count ,
2006-03-03 21:24:33 +03:00
" Clear inode of %llu has open count %d \n " ,
( unsigned long long ) oi - > ip_blkno , oi - > ip_open_count ) ;
2005-12-16 01:31:24 +03:00
/* Clear all other flags. */
2009-02-11 03:05:07 +03:00
oi - > ip_flags = 0 ;
2005-12-16 01:31:24 +03:00
oi - > ip_dir_start_lookup = 0 ;
oi - > ip_blkno = 0ULL ;
2008-10-23 00:24:29 +04:00
/*
* ip_jinode is used to track txns against this inode . We ensure that
* the journal is flushed before journal shutdown . Thus it is safe to
* have inodes get cleaned up after journal shutdown .
*/
2008-09-04 07:03:41 +04:00
jbd2_journal_release_jbd_inode ( OCFS2_SB ( inode - > i_sb ) - > journal - > j_journal ,
& oi - > ip_jinode ) ;
2005-12-16 01:31:24 +03:00
bail :
mlog_exit_void ( ) ;
}
/* Called under inode_lock, with no more references on the
* struct inode , so it ' s safe here to check the flags field
* and to manipulate i_nlink without any other locks . */
void ocfs2_drop_inode ( struct inode * inode )
{
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
mlog_entry_void ( ) ;
2006-03-03 21:24:33 +03:00
mlog ( 0 , " Drop inode %llu, nlink = %u, ip_flags = 0x%x \n " ,
( unsigned long long ) oi - > ip_blkno , inode - > i_nlink , oi - > ip_flags ) ;
2005-12-16 01:31:24 +03:00
2006-09-09 01:21:03 +04:00
if ( oi - > ip_flags & OCFS2_INODE_MAYBE_ORPHANED )
generic_delete_inode ( inode ) ;
else
generic_drop_inode ( inode ) ;
2005-12-16 01:31:24 +03:00
mlog_exit_void ( ) ;
}
/*
* This is called from our getattr .
*/
int ocfs2_inode_revalidate ( struct dentry * dentry )
{
struct inode * inode = dentry - > d_inode ;
int status = 0 ;
2006-03-03 21:24:33 +03:00
mlog_entry ( " (inode = 0x%p, ino = %llu) \n " , inode ,
inode ? ( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno : 0ULL ) ;
2005-12-16 01:31:24 +03:00
if ( ! inode ) {
mlog ( 0 , " eep, no inode! \n " ) ;
status = - ENOENT ;
goto bail ;
}
spin_lock ( & OCFS2_I ( inode ) - > ip_lock ) ;
if ( OCFS2_I ( inode ) - > ip_flags & OCFS2_INODE_DELETED ) {
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
mlog ( 0 , " inode deleted! \n " ) ;
status = - ENOENT ;
goto bail ;
}
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
2007-10-19 02:30:42 +04:00
/* Let ocfs2_inode_lock do the work of updating our struct
2005-12-16 01:31:24 +03:00
* inode for us . */
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( inode , NULL , 0 ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOENT )
mlog_errno ( status ) ;
goto bail ;
}
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 0 ) ;
2005-12-16 01:31:24 +03:00
bail :
mlog_exit ( status ) ;
return status ;
}
/*
* Updates a disk inode from a
* struct inode .
* Only takes ip_lock .
*/
2006-10-10 05:11:45 +04:00
int ocfs2_mark_inode_dirty ( handle_t * handle ,
2005-12-16 01:31:24 +03:00
struct inode * inode ,
struct buffer_head * bh )
{
int status ;
struct ocfs2_dinode * fe = ( struct ocfs2_dinode * ) bh - > b_data ;
2006-03-03 21:24:33 +03:00
mlog_entry ( " (inode %llu) \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2005-12-16 01:31:24 +03:00
2009-02-13 03:41:25 +03:00
status = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) , bh ,
2008-10-18 06:25:01 +04:00
OCFS2_JOURNAL_ACCESS_WRITE ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
spin_lock ( & OCFS2_I ( inode ) - > ip_lock ) ;
fe - > i_clusters = cpu_to_le32 ( OCFS2_I ( inode ) - > ip_clusters ) ;
2007-04-27 22:08:01 +04:00
ocfs2_get_inode_flags ( OCFS2_I ( inode ) ) ;
2006-07-04 04:27:12 +04:00
fe - > i_attr = cpu_to_le32 ( OCFS2_I ( inode ) - > ip_attr ) ;
2007-09-08 00:58:15 +04:00
fe - > i_dyn_features = cpu_to_le16 ( OCFS2_I ( inode ) - > ip_dyn_features ) ;
2005-12-16 01:31:24 +03:00
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
fe - > i_size = cpu_to_le64 ( i_size_read ( inode ) ) ;
2008-11-21 04:54:57 +03:00
ocfs2_set_links_count ( fe , inode - > i_nlink ) ;
2005-12-16 01:31:24 +03:00
fe - > i_uid = cpu_to_le32 ( inode - > i_uid ) ;
fe - > i_gid = cpu_to_le32 ( inode - > i_gid ) ;
fe - > i_mode = cpu_to_le16 ( inode - > i_mode ) ;
fe - > i_atime = cpu_to_le64 ( inode - > i_atime . tv_sec ) ;
fe - > i_atime_nsec = cpu_to_le32 ( inode - > i_atime . tv_nsec ) ;
fe - > i_ctime = cpu_to_le64 ( inode - > i_ctime . tv_sec ) ;
fe - > i_ctime_nsec = cpu_to_le32 ( inode - > i_ctime . tv_nsec ) ;
fe - > i_mtime = cpu_to_le64 ( inode - > i_mtime . tv_sec ) ;
fe - > i_mtime_nsec = cpu_to_le32 ( inode - > i_mtime . tv_nsec ) ;
status = ocfs2_journal_dirty ( handle , bh ) ;
if ( status < 0 )
mlog_errno ( status ) ;
status = 0 ;
leave :
mlog_exit ( status ) ;
return status ;
}
/*
*
* Updates a struct inode from a disk inode .
* does no i / o , only takes ip_lock .
*/
void ocfs2_refresh_inode ( struct inode * inode ,
struct ocfs2_dinode * fe )
{
spin_lock ( & OCFS2_I ( inode ) - > ip_lock ) ;
OCFS2_I ( inode ) - > ip_clusters = le32_to_cpu ( fe - > i_clusters ) ;
2006-07-04 04:27:12 +04:00
OCFS2_I ( inode ) - > ip_attr = le32_to_cpu ( fe - > i_attr ) ;
2007-09-08 00:58:15 +04:00
OCFS2_I ( inode ) - > ip_dyn_features = le16_to_cpu ( fe - > i_dyn_features ) ;
2006-07-04 04:27:12 +04:00
ocfs2_set_inode_flags ( inode ) ;
2005-12-16 01:31:24 +03:00
i_size_write ( inode , le64_to_cpu ( fe - > i_size ) ) ;
2008-11-21 04:54:57 +03:00
inode - > i_nlink = ocfs2_read_links_count ( fe ) ;
2005-12-16 01:31:24 +03:00
inode - > i_uid = le32_to_cpu ( fe - > i_uid ) ;
inode - > i_gid = le32_to_cpu ( fe - > i_gid ) ;
inode - > i_mode = le16_to_cpu ( fe - > i_mode ) ;
if ( S_ISLNK ( inode - > i_mode ) & & le32_to_cpu ( fe - > i_clusters ) = = 0 )
inode - > i_blocks = 0 ;
else
2007-03-23 02:53:23 +03:00
inode - > i_blocks = ocfs2_inode_sector_count ( inode ) ;
2005-12-16 01:31:24 +03:00
inode - > i_atime . tv_sec = le64_to_cpu ( fe - > i_atime ) ;
inode - > i_atime . tv_nsec = le32_to_cpu ( fe - > i_atime_nsec ) ;
inode - > i_mtime . tv_sec = le64_to_cpu ( fe - > i_mtime ) ;
inode - > i_mtime . tv_nsec = le32_to_cpu ( fe - > i_mtime_nsec ) ;
inode - > i_ctime . tv_sec = le64_to_cpu ( fe - > i_ctime ) ;
inode - > i_ctime . tv_nsec = le32_to_cpu ( fe - > i_ctime_nsec ) ;
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
}
2008-11-14 01:49:11 +03:00
int ocfs2_validate_inode_block ( struct super_block * sb ,
struct buffer_head * bh )
{
2008-10-18 01:55:01 +04:00
int rc ;
2008-11-14 01:49:11 +03:00
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) bh - > b_data ;
2008-11-14 01:49:19 +03:00
mlog ( 0 , " Validating dinode %llu \n " ,
( unsigned long long ) bh - > b_blocknr ) ;
2008-11-14 01:49:11 +03:00
BUG_ON ( ! buffer_uptodate ( bh ) ) ;
2008-10-18 01:55:01 +04:00
/*
* If the ecc fails , we return the error but otherwise
* leave the filesystem running . We know any error is
* local to this block .
*/
rc = ocfs2_validate_meta_ecc ( sb , bh - > b_data , & di - > i_check ) ;
2008-10-18 06:25:01 +04:00
if ( rc ) {
mlog ( ML_ERROR , " Checksum failed for dinode %llu \n " ,
( unsigned long long ) bh - > b_blocknr ) ;
2008-10-18 01:55:01 +04:00
goto bail ;
2008-10-18 06:25:01 +04:00
}
2008-10-18 01:55:01 +04:00
/*
* Errors after here are fatal .
*/
rc = - EINVAL ;
2008-11-14 01:49:11 +03:00
if ( ! OCFS2_IS_VALID_DINODE ( di ) ) {
ocfs2_error ( sb , " Invalid dinode #%llu: signature = %.*s \n " ,
( unsigned long long ) bh - > b_blocknr , 7 ,
di - > i_signature ) ;
goto bail ;
}
if ( le64_to_cpu ( di - > i_blkno ) ! = bh - > b_blocknr ) {
ocfs2_error ( sb , " Invalid dinode #%llu: i_blkno is %llu \n " ,
( unsigned long long ) bh - > b_blocknr ,
( unsigned long long ) le64_to_cpu ( di - > i_blkno ) ) ;
goto bail ;
}
if ( ! ( di - > i_flags & cpu_to_le32 ( OCFS2_VALID_FL ) ) ) {
ocfs2_error ( sb ,
" Invalid dinode #%llu: OCFS2_VALID_FL not set \n " ,
( unsigned long long ) bh - > b_blocknr ) ;
goto bail ;
}
if ( le32_to_cpu ( di - > i_fs_generation ) ! =
OCFS2_SB ( sb ) - > fs_generation ) {
ocfs2_error ( sb ,
" Invalid dinode #%llu: fs_generation is %u \n " ,
( unsigned long long ) bh - > b_blocknr ,
le32_to_cpu ( di - > i_fs_generation ) ) ;
goto bail ;
}
rc = 0 ;
bail :
return rc ;
}
int ocfs2_read_inode_block_full ( struct inode * inode , struct buffer_head * * bh ,
int flags )
{
int rc ;
struct buffer_head * tmp = * bh ;
2009-02-11 07:00:41 +03:00
rc = ocfs2_read_blocks ( INODE_CACHE ( inode ) , OCFS2_I ( inode ) - > ip_blkno ,
1 , & tmp , flags , ocfs2_validate_inode_block ) ;
2008-11-14 01:49:11 +03:00
/* If ocfs2_read_blocks() got us a new bh, pass it up. */
2008-11-14 01:49:19 +03:00
if ( ! rc & & ! * bh )
2008-11-14 01:49:11 +03:00
* bh = tmp ;
return rc ;
}
int ocfs2_read_inode_block ( struct inode * inode , struct buffer_head * * bh )
{
return ocfs2_read_inode_block_full ( inode , bh , 0 ) ;
}
2009-02-11 06:00:37 +03:00
static u64 ocfs2_inode_cache_owner ( struct ocfs2_caching_info * ci )
{
struct ocfs2_inode_info * oi = cache_info_to_inode ( ci ) ;
return oi - > ip_blkno ;
}
2009-02-11 07:00:41 +03:00
static struct super_block * ocfs2_inode_cache_get_super ( struct ocfs2_caching_info * ci )
{
struct ocfs2_inode_info * oi = cache_info_to_inode ( ci ) ;
return oi - > vfs_inode . i_sb ;
}
2009-02-11 06:00:37 +03:00
static void ocfs2_inode_cache_lock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_inode_info * oi = cache_info_to_inode ( ci ) ;
spin_lock ( & oi - > ip_lock ) ;
}
static void ocfs2_inode_cache_unlock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_inode_info * oi = cache_info_to_inode ( ci ) ;
spin_unlock ( & oi - > ip_lock ) ;
}
static void ocfs2_inode_cache_io_lock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_inode_info * oi = cache_info_to_inode ( ci ) ;
mutex_lock ( & oi - > ip_io_mutex ) ;
}
static void ocfs2_inode_cache_io_unlock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_inode_info * oi = cache_info_to_inode ( ci ) ;
mutex_unlock ( & oi - > ip_io_mutex ) ;
}
const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
. co_owner = ocfs2_inode_cache_owner ,
2009-02-11 07:00:41 +03:00
. co_get_super = ocfs2_inode_cache_get_super ,
2009-02-11 06:00:37 +03:00
. co_cache_lock = ocfs2_inode_cache_lock ,
. co_cache_unlock = ocfs2_inode_cache_unlock ,
. co_io_lock = ocfs2_inode_cache_io_lock ,
. co_io_unlock = ocfs2_inode_cache_io_unlock ,
} ;