2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* dir . c
*
* Creates , reads , walks and deletes directory - nodes
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*
* Portions of this code from linux / fs / ext3 / dir . c
*
* Copyright ( C ) 1992 , 1993 , 1994 , 1995
* Remy Card ( card @ masi . ibp . fr )
* Laboratoire MASI - Institut Blaise pascal
* Universite Pierre et Marie Curie ( Paris VI )
*
* from
*
* linux / fs / minix / dir . c
*
* Copyright ( C ) 1991 , 1992 Linux Torvalds
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/highmem.h>
# define MLOG_MASK_PREFIX ML_NAMEI
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
# include "dir.h"
# include "dlmglue.h"
# include "extent_map.h"
# include "file.h"
# include "inode.h"
# include "journal.h"
# include "namei.h"
# include "suballoc.h"
2007-09-08 05:21:26 +04:00
# include "super.h"
2005-12-16 01:31:24 +03:00
# include "uptodate.h"
# include "buffer_head_io.h"
2007-09-08 05:21:26 +04:00
# define NAMEI_RA_CHUNKS 2
# define NAMEI_RA_BLOCKS 4
# define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
# define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
2005-12-16 01:31:24 +03:00
static unsigned char ocfs2_filetype_table [ ] = {
DT_UNKNOWN , DT_REG , DT_DIR , DT_CHR , DT_BLK , DT_FIFO , DT_SOCK , DT_LNK
} ;
static int ocfs2_extend_dir ( struct ocfs2_super * osb ,
struct inode * dir ,
struct buffer_head * parent_fe_bh ,
2007-09-14 03:33:54 +04:00
unsigned int blocks_wanted ,
2005-12-16 01:31:24 +03:00
struct buffer_head * * new_de_bh ) ;
2007-09-08 05:21:26 +04:00
static int ocfs2_do_extend_dir ( struct super_block * sb ,
handle_t * handle ,
struct inode * dir ,
struct buffer_head * parent_fe_bh ,
struct ocfs2_alloc_context * data_ac ,
struct ocfs2_alloc_context * meta_ac ,
struct buffer_head * * new_bh ) ;
2008-10-10 04:20:32 +04:00
static struct buffer_head * ocfs2_bread ( struct inode * inode ,
int block , int * err , int reada )
{
struct buffer_head * bh = NULL ;
int tmperr ;
u64 p_blkno ;
2008-10-10 04:20:34 +04:00
int readflags = 0 ;
2008-10-10 04:20:32 +04:00
if ( reada )
readflags | = OCFS2_BH_READAHEAD ;
if ( ( ( u64 ) block < < inode - > i_sb - > s_blocksize_bits ) > =
i_size_read ( inode ) ) {
BUG_ON ( ! reada ) ;
return NULL ;
}
down_read ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
tmperr = ocfs2_extent_map_get_blocks ( inode , block , & p_blkno , NULL ,
NULL ) ;
up_read ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
if ( tmperr < 0 ) {
mlog_errno ( tmperr ) ;
goto fail ;
}
tmperr = ocfs2_read_blocks ( inode , p_blkno , 1 , & bh , readflags ) ;
if ( tmperr < 0 )
goto fail ;
tmperr = 0 ;
* err = 0 ;
return bh ;
fail :
brelse ( bh ) ;
bh = NULL ;
* err = - EIO ;
return NULL ;
}
2007-09-13 00:01:18 +04:00
/*
* bh passed here can be an inode block or a dir data block , depending
* on the inode inline data flag .
*/
2007-09-11 04:50:51 +04:00
static int ocfs2_check_dir_entry ( struct inode * dir ,
struct ocfs2_dir_entry * de ,
struct buffer_head * bh ,
unsigned long offset )
2007-09-08 05:21:26 +04:00
{
const char * error_msg = NULL ;
const int rlen = le16_to_cpu ( de - > rec_len ) ;
if ( rlen < OCFS2_DIR_REC_LEN ( 1 ) )
error_msg = " rec_len is smaller than minimal " ;
else if ( rlen % 4 ! = 0 )
error_msg = " rec_len % 4 != 0 " ;
else if ( rlen < OCFS2_DIR_REC_LEN ( de - > name_len ) )
error_msg = " rec_len is too small for name_len " ;
else if ( ( ( char * ) de - bh - > b_data ) + rlen > dir - > i_sb - > s_blocksize )
error_msg = " directory entry across blocks " ;
if ( error_msg ! = NULL )
mlog ( ML_ERROR , " bad entry in directory #%llu: %s - "
" offset=%lu, inode=%llu, rec_len=%d, name_len=%d \n " ,
( unsigned long long ) OCFS2_I ( dir ) - > ip_blkno , error_msg ,
offset , ( unsigned long long ) le64_to_cpu ( de - > inode ) , rlen ,
de - > name_len ) ;
return error_msg = = NULL ? 1 : 0 ;
}
static inline int ocfs2_match ( int len ,
const char * const name ,
struct ocfs2_dir_entry * de )
{
if ( len ! = de - > name_len )
return 0 ;
if ( ! de - > inode )
return 0 ;
return ! memcmp ( name , de - > name , len ) ;
}
/*
* Returns 0 if not found , - 1 on failure , and 1 on success
*/
static int inline ocfs2_search_dirblock ( struct buffer_head * bh ,
struct inode * dir ,
const char * name , int namelen ,
unsigned long offset ,
2007-09-13 00:01:18 +04:00
char * first_de ,
unsigned int bytes ,
2007-09-08 05:21:26 +04:00
struct ocfs2_dir_entry * * res_dir )
{
struct ocfs2_dir_entry * de ;
char * dlimit , * de_buf ;
int de_len ;
int ret = 0 ;
mlog_entry_void ( ) ;
2007-09-13 00:01:18 +04:00
de_buf = first_de ;
dlimit = de_buf + bytes ;
2007-09-08 05:21:26 +04:00
while ( de_buf < dlimit ) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = ( struct ocfs2_dir_entry * ) de_buf ;
if ( de_buf + namelen < = dlimit & &
ocfs2_match ( namelen , name , de ) ) {
/* found a match - just to be sure, do a full check */
if ( ! ocfs2_check_dir_entry ( dir , de , bh , offset ) ) {
ret = - 1 ;
goto bail ;
}
* res_dir = de ;
ret = 1 ;
goto bail ;
}
/* prevent looping on a bad block */
de_len = le16_to_cpu ( de - > rec_len ) ;
if ( de_len < = 0 ) {
ret = - 1 ;
goto bail ;
}
de_buf + = de_len ;
offset + = de_len ;
}
bail :
mlog_exit ( ret ) ;
return ret ;
}
2007-09-13 00:01:18 +04:00
static struct buffer_head * ocfs2_find_entry_id ( const char * name ,
int namelen ,
struct inode * dir ,
struct ocfs2_dir_entry * * res_dir )
{
int ret , found ;
struct buffer_head * di_bh = NULL ;
struct ocfs2_dinode * di ;
struct ocfs2_inline_data * data ;
2008-10-10 04:20:31 +04:00
ret = ocfs2_read_block ( dir , OCFS2_I ( dir ) - > ip_blkno , & di_bh ) ;
2007-09-13 00:01:18 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
data = & di - > id2 . i_data ;
found = ocfs2_search_dirblock ( di_bh , dir , name , namelen , 0 ,
data - > id_data , i_size_read ( dir ) , res_dir ) ;
if ( found = = 1 )
return di_bh ;
brelse ( di_bh ) ;
out :
return NULL ;
}
2007-10-24 20:23:27 +04:00
static struct buffer_head * ocfs2_find_entry_el ( const char * name , int namelen ,
struct inode * dir ,
struct ocfs2_dir_entry * * res_dir )
2007-09-08 05:21:26 +04:00
{
struct super_block * sb ;
struct buffer_head * bh_use [ NAMEI_RA_SIZE ] ;
struct buffer_head * bh , * ret = NULL ;
unsigned long start , block , b ;
int ra_max = 0 ; /* Number of bh's in the readahead
buffer , bh_use [ ] */
int ra_ptr = 0 ; /* Current index into readahead
buffer */
int num = 0 ;
int nblocks , i , err ;
mlog_entry_void ( ) ;
sb = dir - > i_sb ;
nblocks = i_size_read ( dir ) > > sb - > s_blocksize_bits ;
start = OCFS2_I ( dir ) - > ip_dir_start_lookup ;
if ( start > = nblocks )
start = 0 ;
block = start ;
restart :
do {
/*
* We deal with the read - ahead logic here .
*/
if ( ra_ptr > = ra_max ) {
/* Refill the readahead buffer */
ra_ptr = 0 ;
b = block ;
for ( ra_max = 0 ; ra_max < NAMEI_RA_SIZE ; ra_max + + ) {
/*
* Terminate if we reach the end of the
* directory and must wrap , or if our
* search has finished at this block .
*/
if ( b > = nblocks | | ( num & & block = = start ) ) {
bh_use [ ra_max ] = NULL ;
break ;
}
num + + ;
bh = ocfs2_bread ( dir , b + + , & err , 1 ) ;
bh_use [ ra_max ] = bh ;
}
}
if ( ( bh = bh_use [ ra_ptr + + ] ) = = NULL )
goto next ;
2008-10-10 04:20:33 +04:00
if ( ocfs2_read_block ( dir , block , & bh ) ) {
/* read error, skip block & hope for the best.
* ocfs2_read_block ( ) has released the bh . */
2007-09-08 05:21:26 +04:00
ocfs2_error ( dir - > i_sb , " reading directory %llu, "
" offset %lu \n " ,
( unsigned long long ) OCFS2_I ( dir ) - > ip_blkno ,
block ) ;
goto next ;
}
i = ocfs2_search_dirblock ( bh , dir , name , namelen ,
block < < sb - > s_blocksize_bits ,
2007-09-13 00:01:18 +04:00
bh - > b_data , sb - > s_blocksize ,
2007-09-08 05:21:26 +04:00
res_dir ) ;
if ( i = = 1 ) {
OCFS2_I ( dir ) - > ip_dir_start_lookup = block ;
ret = bh ;
goto cleanup_and_exit ;
} else {
brelse ( bh ) ;
if ( i < 0 )
goto cleanup_and_exit ;
}
next :
if ( + + block > = nblocks )
block = 0 ;
} while ( block ! = start ) ;
/*
* If the directory has grown while we were searching , then
* search the last part of the directory before giving up .
*/
block = nblocks ;
nblocks = i_size_read ( dir ) > > sb - > s_blocksize_bits ;
if ( block < nblocks ) {
start = 0 ;
goto restart ;
}
cleanup_and_exit :
/* Clean up the read-ahead blocks */
for ( ; ra_ptr < ra_max ; ra_ptr + + )
brelse ( bh_use [ ra_ptr ] ) ;
mlog_exit_ptr ( ret ) ;
return ret ;
}
2007-09-13 00:01:18 +04:00
/*
* Try to find an entry of the provided name within ' dir ' .
*
* If nothing was found , NULL is returned . Otherwise , a buffer_head
* and pointer to the dir entry are passed back .
*
* Caller can NOT assume anything about the contents of the
* buffer_head - it is passed back only so that it can be passed into
* any one of the manipulation functions ( add entry , delete entry ,
* etc ) . As an example , bh in the extent directory case is a data
* block , in the inline - data case it actually points to an inode .
*/
struct buffer_head * ocfs2_find_entry ( const char * name , int namelen ,
struct inode * dir ,
struct ocfs2_dir_entry * * res_dir )
{
* res_dir = NULL ;
if ( OCFS2_I ( dir ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL )
return ocfs2_find_entry_id ( name , namelen , dir , res_dir ) ;
return ocfs2_find_entry_el ( name , namelen , dir , res_dir ) ;
}
2007-09-14 03:33:54 +04:00
/*
* Update inode number and type of a previously found directory entry .
*/
2007-09-12 04:21:56 +04:00
int ocfs2_update_entry ( struct inode * dir , handle_t * handle ,
struct buffer_head * de_bh , struct ocfs2_dir_entry * de ,
struct inode * new_entry_inode )
{
int ret ;
2007-09-14 03:33:54 +04:00
/*
* The same code works fine for both inline - data and extent
* based directories , so no need to split this up .
*/
2007-09-12 04:21:56 +04:00
ret = ocfs2_journal_access ( handle , dir , de_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
de - > inode = cpu_to_le64 ( OCFS2_I ( new_entry_inode ) - > ip_blkno ) ;
ocfs2_set_de_type ( de , new_entry_inode - > i_mode ) ;
ocfs2_journal_dirty ( handle , de_bh ) ;
out :
return ret ;
}
2007-09-14 03:33:54 +04:00
static int __ocfs2_delete_entry ( handle_t * handle , struct inode * dir ,
struct ocfs2_dir_entry * de_del ,
struct buffer_head * bh , char * first_de ,
unsigned int bytes )
2007-09-08 05:21:26 +04:00
{
struct ocfs2_dir_entry * de , * pde ;
int i , status = - ENOENT ;
mlog_entry ( " (0x%p, 0x%p, 0x%p, 0x%p) \n " , handle , dir , de_del , bh ) ;
i = 0 ;
pde = NULL ;
2007-09-14 03:33:54 +04:00
de = ( struct ocfs2_dir_entry * ) first_de ;
while ( i < bytes ) {
2007-09-08 05:21:26 +04:00
if ( ! ocfs2_check_dir_entry ( dir , de , bh , i ) ) {
status = - EIO ;
mlog_errno ( status ) ;
goto bail ;
}
if ( de = = de_del ) {
status = ocfs2_journal_access ( handle , dir , bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( status < 0 ) {
status = - EIO ;
mlog_errno ( status ) ;
goto bail ;
}
if ( pde )
2008-02-13 02:06:18 +03:00
le16_add_cpu ( & pde - > rec_len ,
le16_to_cpu ( de - > rec_len ) ) ;
2007-09-08 05:21:26 +04:00
else
de - > inode = 0 ;
dir - > i_version + + ;
status = ocfs2_journal_dirty ( handle , bh ) ;
goto bail ;
}
i + = le16_to_cpu ( de - > rec_len ) ;
pde = de ;
de = ( struct ocfs2_dir_entry * ) ( ( char * ) de + le16_to_cpu ( de - > rec_len ) ) ;
}
bail :
mlog_exit ( status ) ;
return status ;
}
2007-09-14 03:33:54 +04:00
static inline int ocfs2_delete_entry_id ( handle_t * handle ,
struct inode * dir ,
struct ocfs2_dir_entry * de_del ,
struct buffer_head * bh )
{
int ret ;
struct buffer_head * di_bh = NULL ;
struct ocfs2_dinode * di ;
struct ocfs2_inline_data * data ;
2008-10-10 04:20:31 +04:00
ret = ocfs2_read_block ( dir , OCFS2_I ( dir ) - > ip_blkno , & di_bh ) ;
2007-09-14 03:33:54 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
data = & di - > id2 . i_data ;
ret = __ocfs2_delete_entry ( handle , dir , de_del , bh , data - > id_data ,
i_size_read ( dir ) ) ;
brelse ( di_bh ) ;
out :
return ret ;
}
static inline int ocfs2_delete_entry_el ( handle_t * handle ,
struct inode * dir ,
struct ocfs2_dir_entry * de_del ,
struct buffer_head * bh )
{
return __ocfs2_delete_entry ( handle , dir , de_del , bh , bh - > b_data ,
bh - > b_size ) ;
}
/*
* ocfs2_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
int ocfs2_delete_entry ( handle_t * handle ,
struct inode * dir ,
struct ocfs2_dir_entry * de_del ,
struct buffer_head * bh )
{
if ( OCFS2_I ( dir ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL )
return ocfs2_delete_entry_id ( handle , dir , de_del , bh ) ;
return ocfs2_delete_entry_el ( handle , dir , de_del , bh ) ;
}
2007-09-14 03:29:01 +04:00
/*
* Check whether ' de ' has enough room to hold an entry of
* ' new_rec_len ' bytes .
*/
static inline int ocfs2_dirent_would_fit ( struct ocfs2_dir_entry * de ,
unsigned int new_rec_len )
{
unsigned int de_really_used ;
/* Check whether this is an empty record with enough space */
if ( le64_to_cpu ( de - > inode ) = = 0 & &
le16_to_cpu ( de - > rec_len ) > = new_rec_len )
return 1 ;
/*
* Record might have free space at the end which we can
* use .
*/
de_really_used = OCFS2_DIR_REC_LEN ( de - > name_len ) ;
if ( le16_to_cpu ( de - > rec_len ) > = ( de_really_used + new_rec_len ) )
return 1 ;
return 0 ;
}
2007-09-08 05:21:26 +04:00
/* we don't always have a dentry for what we want to add, so people
* like orphan dir can call this instead .
*
* If you pass me insert_bh , I ' ll skip the search of the other dir
* blocks and put the record in there .
*/
int __ocfs2_add_entry ( handle_t * handle ,
struct inode * dir ,
const char * name , int namelen ,
struct inode * inode , u64 blkno ,
struct buffer_head * parent_fe_bh ,
struct buffer_head * insert_bh )
{
unsigned long offset ;
unsigned short rec_len ;
struct ocfs2_dir_entry * de , * de1 ;
2007-09-14 03:33:54 +04:00
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) parent_fe_bh - > b_data ;
struct super_block * sb = dir - > i_sb ;
2007-09-08 05:21:26 +04:00
int retval , status ;
2007-09-14 03:33:54 +04:00
unsigned int size = sb - > s_blocksize ;
char * data_start = insert_bh - > b_data ;
2007-09-08 05:21:26 +04:00
mlog_entry_void ( ) ;
if ( ! namelen )
return - EINVAL ;
2007-09-14 03:33:54 +04:00
if ( OCFS2_I ( dir ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
data_start = di - > id2 . i_data . id_data ;
size = i_size_read ( dir ) ;
BUG_ON ( insert_bh ! = parent_fe_bh ) ;
}
2007-09-08 05:21:26 +04:00
rec_len = OCFS2_DIR_REC_LEN ( namelen ) ;
offset = 0 ;
2007-09-14 03:33:54 +04:00
de = ( struct ocfs2_dir_entry * ) data_start ;
2007-09-08 05:21:26 +04:00
while ( 1 ) {
2007-09-14 03:33:54 +04:00
BUG_ON ( ( char * ) de > = ( size + data_start ) ) ;
2007-09-08 05:21:26 +04:00
/* These checks should've already been passed by the
* prepare function , but I guess we can leave them
* here anyway . */
if ( ! ocfs2_check_dir_entry ( dir , de , insert_bh , offset ) ) {
retval = - ENOENT ;
goto bail ;
}
if ( ocfs2_match ( namelen , name , de ) ) {
retval = - EEXIST ;
goto bail ;
}
2007-09-14 03:29:01 +04:00
if ( ocfs2_dirent_would_fit ( de , rec_len ) ) {
2007-09-08 05:21:26 +04:00
dir - > i_mtime = dir - > i_ctime = CURRENT_TIME ;
retval = ocfs2_mark_inode_dirty ( handle , dir , parent_fe_bh ) ;
if ( retval < 0 ) {
mlog_errno ( retval ) ;
goto bail ;
}
status = ocfs2_journal_access ( handle , dir , insert_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
/* By now the buffer is marked for journaling */
offset + = le16_to_cpu ( de - > rec_len ) ;
if ( le64_to_cpu ( de - > inode ) ) {
de1 = ( struct ocfs2_dir_entry * ) ( ( char * ) de +
OCFS2_DIR_REC_LEN ( de - > name_len ) ) ;
de1 - > rec_len =
cpu_to_le16 ( le16_to_cpu ( de - > rec_len ) -
OCFS2_DIR_REC_LEN ( de - > name_len ) ) ;
de - > rec_len = cpu_to_le16 ( OCFS2_DIR_REC_LEN ( de - > name_len ) ) ;
de = de1 ;
}
de - > file_type = OCFS2_FT_UNKNOWN ;
if ( blkno ) {
de - > inode = cpu_to_le64 ( blkno ) ;
ocfs2_set_de_type ( de , inode - > i_mode ) ;
} else
de - > inode = 0 ;
de - > name_len = namelen ;
memcpy ( de - > name , name , namelen ) ;
dir - > i_version + + ;
status = ocfs2_journal_dirty ( handle , insert_bh ) ;
retval = 0 ;
goto bail ;
}
offset + = le16_to_cpu ( de - > rec_len ) ;
de = ( struct ocfs2_dir_entry * ) ( ( char * ) de + le16_to_cpu ( de - > rec_len ) ) ;
}
/* when you think about it, the assert above should prevent us
* from ever getting here . */
retval = - ENOSPC ;
bail :
mlog_exit ( retval ) ;
return retval ;
}
2007-09-13 00:01:18 +04:00
static int ocfs2_dir_foreach_blk_id ( struct inode * inode ,
2007-10-17 10:27:21 +04:00
u64 * f_version ,
2007-09-13 00:01:18 +04:00
loff_t * f_pos , void * priv ,
2007-09-25 01:25:27 +04:00
filldir_t filldir , int * filldir_err )
2007-09-13 00:01:18 +04:00
{
int ret , i , filldir_ret ;
unsigned long offset = * f_pos ;
struct buffer_head * di_bh = NULL ;
struct ocfs2_dinode * di ;
struct ocfs2_inline_data * data ;
struct ocfs2_dir_entry * de ;
2008-10-10 04:20:31 +04:00
ret = ocfs2_read_block ( inode , OCFS2_I ( inode ) - > ip_blkno , & di_bh ) ;
2007-09-13 00:01:18 +04:00
if ( ret ) {
mlog ( ML_ERROR , " Unable to read inode block for dir %llu \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
goto out ;
}
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
data = & di - > id2 . i_data ;
while ( * f_pos < i_size_read ( inode ) ) {
revalidate :
/* If the dir block has changed since the last call to
* readdir ( 2 ) , then we might be pointing to an invalid
* dirent right now . Scan from the start of the block
* to make sure . */
if ( * f_version ! = inode - > i_version ) {
for ( i = 0 ; i < i_size_read ( inode ) & & i < offset ; ) {
de = ( struct ocfs2_dir_entry * )
( data - > id_data + i ) ;
/* It's too expensive to do a full
* dirent test each time round this
* loop , but we do have to test at
* least that it is non - zero . A
* failure will be detected in the
* dirent test below . */
if ( le16_to_cpu ( de - > rec_len ) <
OCFS2_DIR_REC_LEN ( 1 ) )
break ;
i + = le16_to_cpu ( de - > rec_len ) ;
}
* f_pos = offset = i ;
* f_version = inode - > i_version ;
}
de = ( struct ocfs2_dir_entry * ) ( data - > id_data + * f_pos ) ;
if ( ! ocfs2_check_dir_entry ( inode , de , di_bh , * f_pos ) ) {
/* On error, skip the f_pos to the end. */
* f_pos = i_size_read ( inode ) ;
goto out ;
}
offset + = le16_to_cpu ( de - > rec_len ) ;
if ( le64_to_cpu ( de - > inode ) ) {
/* We might block in the next section
* if the data destination is
* currently swapped out . So , use a
* version stamp to detect whether or
* not the directory has been modified
* during the copy operation .
*/
2007-10-17 10:27:21 +04:00
u64 version = * f_version ;
2007-09-13 00:01:18 +04:00
unsigned char d_type = DT_UNKNOWN ;
if ( de - > file_type < OCFS2_FT_MAX )
d_type = ocfs2_filetype_table [ de - > file_type ] ;
filldir_ret = filldir ( priv , de - > name ,
de - > name_len ,
* f_pos ,
le64_to_cpu ( de - > inode ) ,
d_type ) ;
2007-09-25 01:25:27 +04:00
if ( filldir_ret ) {
if ( filldir_err )
* filldir_err = filldir_ret ;
2007-09-13 00:01:18 +04:00
break ;
2007-09-25 01:25:27 +04:00
}
2007-09-13 00:01:18 +04:00
if ( version ! = * f_version )
goto revalidate ;
}
* f_pos + = le16_to_cpu ( de - > rec_len ) ;
}
out :
brelse ( di_bh ) ;
return 0 ;
}
static int ocfs2_dir_foreach_blk_el ( struct inode * inode ,
2007-10-17 10:27:21 +04:00
u64 * f_version ,
2007-09-13 00:01:18 +04:00
loff_t * f_pos , void * priv ,
2007-09-25 01:25:27 +04:00
filldir_t filldir , int * filldir_err )
2005-12-16 01:31:24 +03:00
{
int error = 0 ;
2006-04-22 00:49:02 +04:00
unsigned long offset , blk , last_ra_blk = 0 ;
int i , stored ;
2005-12-16 01:31:24 +03:00
struct buffer_head * bh , * tmp ;
struct ocfs2_dir_entry * de ;
int err ;
struct super_block * sb = inode - > i_sb ;
2006-04-22 00:49:02 +04:00
unsigned int ra_sectors = 16 ;
2005-12-16 01:31:24 +03:00
stored = 0 ;
bh = NULL ;
2007-09-11 04:17:52 +04:00
offset = ( * f_pos ) & ( sb - > s_blocksize - 1 ) ;
2005-12-16 01:31:24 +03:00
2007-09-11 04:17:52 +04:00
while ( ! error & & ! stored & & * f_pos < i_size_read ( inode ) ) {
blk = ( * f_pos ) > > sb - > s_blocksize_bits ;
2005-12-16 01:31:24 +03:00
bh = ocfs2_bread ( inode , blk , & err , 0 ) ;
if ( ! bh ) {
2006-03-03 21:24:33 +03:00
mlog ( ML_ERROR ,
" directory #%llu contains a hole at offset %lld \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
2007-09-11 04:17:52 +04:00
* f_pos ) ;
* f_pos + = sb - > s_blocksize - offset ;
2005-12-16 01:31:24 +03:00
continue ;
}
2006-04-22 00:49:02 +04:00
/* The idea here is to begin with 8k read-ahead and to stay
* 4 k ahead of our current position .
*
* TODO : Use the pagecache for this . We just need to
* make sure it ' s cluster - safe . . . */
if ( ! last_ra_blk
| | ( ( ( last_ra_blk - blk ) < < 9 ) < = ( ra_sectors / 2 ) ) ) {
for ( i = ra_sectors > > ( sb - > s_blocksize_bits - 9 ) ;
2005-12-16 01:31:24 +03:00
i > 0 ; i - - ) {
tmp = ocfs2_bread ( inode , + + blk , & err , 1 ) ;
2008-10-08 01:25:16 +04:00
brelse ( tmp ) ;
2005-12-16 01:31:24 +03:00
}
2006-04-22 00:49:02 +04:00
last_ra_blk = blk ;
ra_sectors = 8 ;
2005-12-16 01:31:24 +03:00
}
revalidate :
/* If the dir block has changed since the last call to
* readdir ( 2 ) , then we might be pointing to an invalid
* dirent right now . Scan from the start of the block
* to make sure . */
2007-09-11 04:17:52 +04:00
if ( * f_version ! = inode - > i_version ) {
2005-12-16 01:31:24 +03:00
for ( i = 0 ; i < sb - > s_blocksize & & i < offset ; ) {
de = ( struct ocfs2_dir_entry * ) ( bh - > b_data + i ) ;
/* It's too expensive to do a full
* dirent test each time round this
* loop , but we do have to test at
* least that it is non - zero . A
* failure will be detected in the
* dirent test below . */
if ( le16_to_cpu ( de - > rec_len ) <
OCFS2_DIR_REC_LEN ( 1 ) )
break ;
i + = le16_to_cpu ( de - > rec_len ) ;
}
offset = i ;
2007-09-11 04:17:52 +04:00
* f_pos = ( ( * f_pos ) & ~ ( sb - > s_blocksize - 1 ) )
2005-12-16 01:31:24 +03:00
| offset ;
2007-09-11 04:17:52 +04:00
* f_version = inode - > i_version ;
2005-12-16 01:31:24 +03:00
}
2007-09-11 04:17:52 +04:00
while ( ! error & & * f_pos < i_size_read ( inode )
2005-12-16 01:31:24 +03:00
& & offset < sb - > s_blocksize ) {
de = ( struct ocfs2_dir_entry * ) ( bh - > b_data + offset ) ;
if ( ! ocfs2_check_dir_entry ( inode , de , bh , offset ) ) {
/* On error, skip the f_pos to the
next block . */
2007-09-11 04:17:52 +04:00
* f_pos = ( ( * f_pos ) | ( sb - > s_blocksize - 1 ) ) + 1 ;
2005-12-16 01:31:24 +03:00
brelse ( bh ) ;
2007-09-11 04:17:52 +04:00
goto out ;
2005-12-16 01:31:24 +03:00
}
offset + = le16_to_cpu ( de - > rec_len ) ;
if ( le64_to_cpu ( de - > inode ) ) {
/* We might block in the next section
* if the data destination is
* currently swapped out . So , use a
* version stamp to detect whether or
* not the directory has been modified
* during the copy operation .
*/
2007-09-11 04:17:52 +04:00
unsigned long version = * f_version ;
2005-12-16 01:31:24 +03:00
unsigned char d_type = DT_UNKNOWN ;
if ( de - > file_type < OCFS2_FT_MAX )
d_type = ocfs2_filetype_table [ de - > file_type ] ;
2007-09-11 04:17:52 +04:00
error = filldir ( priv , de - > name ,
2005-12-16 01:31:24 +03:00
de - > name_len ,
2007-09-11 04:17:52 +04:00
* f_pos ,
2007-09-11 04:30:26 +04:00
le64_to_cpu ( de - > inode ) ,
2005-12-16 01:31:24 +03:00
d_type ) ;
2007-09-25 01:25:27 +04:00
if ( error ) {
if ( filldir_err )
* filldir_err = error ;
2005-12-16 01:31:24 +03:00
break ;
2007-09-25 01:25:27 +04:00
}
2007-09-11 04:17:52 +04:00
if ( version ! = * f_version )
2005-12-16 01:31:24 +03:00
goto revalidate ;
stored + + ;
}
2007-09-11 04:17:52 +04:00
* f_pos + = le16_to_cpu ( de - > rec_len ) ;
2005-12-16 01:31:24 +03:00
}
offset = 0 ;
brelse ( bh ) ;
}
stored = 0 ;
2007-09-11 04:17:52 +04:00
out :
return stored ;
}
2007-10-17 10:27:21 +04:00
static int ocfs2_dir_foreach_blk ( struct inode * inode , u64 * f_version ,
2007-09-25 01:25:27 +04:00
loff_t * f_pos , void * priv , filldir_t filldir ,
int * filldir_err )
2007-09-13 00:01:18 +04:00
{
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL )
return ocfs2_dir_foreach_blk_id ( inode , f_version , f_pos , priv ,
2007-09-25 01:25:27 +04:00
filldir , filldir_err ) ;
2007-09-13 00:01:18 +04:00
2007-09-25 01:25:27 +04:00
return ocfs2_dir_foreach_blk_el ( inode , f_version , f_pos , priv , filldir ,
filldir_err ) ;
2007-09-13 00:01:18 +04:00
}
2007-09-11 04:50:51 +04:00
/*
* This is intended to be called from inside other kernel functions ,
* so we fake some arguments .
*/
int ocfs2_dir_foreach ( struct inode * inode , loff_t * f_pos , void * priv ,
filldir_t filldir )
{
2007-09-25 01:25:27 +04:00
int ret = 0 , filldir_err = 0 ;
2007-10-17 10:27:21 +04:00
u64 version = inode - > i_version ;
2007-09-11 04:50:51 +04:00
while ( * f_pos < i_size_read ( inode ) ) {
ret = ocfs2_dir_foreach_blk ( inode , & version , f_pos , priv ,
2007-09-25 01:25:27 +04:00
filldir , & filldir_err ) ;
if ( ret | | filldir_err )
2007-09-11 04:50:51 +04:00
break ;
}
2007-09-25 01:25:27 +04:00
if ( ret > 0 )
ret = - EIO ;
2007-09-11 04:50:51 +04:00
return 0 ;
}
2007-09-11 04:17:52 +04:00
/*
* ocfs2_readdir ( )
*
*/
int ocfs2_readdir ( struct file * filp , void * dirent , filldir_t filldir )
{
int error = 0 ;
struct inode * inode = filp - > f_path . dentry - > d_inode ;
int lock_level = 0 ;
mlog_entry ( " dirino=%llu \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2007-10-19 02:30:42 +04:00
error = ocfs2_inode_lock_atime ( inode , filp - > f_vfsmnt , & lock_level ) ;
2007-09-11 04:17:52 +04:00
if ( lock_level & & error > = 0 ) {
/* We release EX lock which used to update atime
* and get PR lock again to reduce contention
* on commonly accessed directories . */
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 1 ) ;
2007-09-11 04:17:52 +04:00
lock_level = 0 ;
2007-10-19 02:30:42 +04:00
error = ocfs2_inode_lock ( inode , NULL , 0 ) ;
2007-09-11 04:17:52 +04:00
}
if ( error < 0 ) {
if ( error ! = - ENOENT )
mlog_errno ( error ) ;
/* we haven't got any yet, so propagate the error. */
goto bail_nolock ;
}
error = ocfs2_dir_foreach_blk ( inode , & filp - > f_version , & filp - > f_pos ,
2007-09-25 01:25:27 +04:00
dirent , filldir , NULL ) ;
2007-09-11 04:17:52 +04:00
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , lock_level ) ;
2005-12-16 01:31:24 +03:00
2006-04-22 00:49:02 +04:00
bail_nolock :
2007-09-11 04:17:52 +04:00
mlog_exit ( error ) ;
2005-12-16 01:31:24 +03:00
2007-09-11 04:17:52 +04:00
return error ;
2005-12-16 01:31:24 +03:00
}
/*
2006-01-10 02:59:24 +03:00
* NOTE : this should always be called with parent dir i_mutex taken .
2005-12-16 01:31:24 +03:00
*/
int ocfs2_find_files_on_disk ( const char * name ,
int namelen ,
u64 * blkno ,
struct inode * inode ,
struct buffer_head * * dirent_bh ,
struct ocfs2_dir_entry * * dirent )
{
int status = - ENOENT ;
2006-05-11 05:28:59 +04:00
mlog_entry ( " (name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p) \n " ,
namelen , name , blkno , inode , dirent_bh , dirent ) ;
2005-12-16 01:31:24 +03:00
* dirent_bh = ocfs2_find_entry ( name , namelen , inode , dirent ) ;
if ( ! * dirent_bh | | ! * dirent ) {
status = - ENOENT ;
goto leave ;
}
* blkno = le64_to_cpu ( ( * dirent ) - > inode ) ;
status = 0 ;
leave :
if ( status < 0 ) {
* dirent = NULL ;
2008-10-08 01:25:16 +04:00
brelse ( * dirent_bh ) ;
* dirent_bh = NULL ;
2005-12-16 01:31:24 +03:00
}
mlog_exit ( status ) ;
return status ;
}
2007-09-12 02:22:06 +04:00
/*
* Convenience function for callers which just want the block number
* mapped to a name and don ' t require the full dirent info , etc .
*/
int ocfs2_lookup_ino_from_name ( struct inode * dir , const char * name ,
int namelen , u64 * blkno )
{
int ret ;
struct buffer_head * bh = NULL ;
struct ocfs2_dir_entry * dirent = NULL ;
ret = ocfs2_find_files_on_disk ( name , namelen , blkno , dir , & bh , & dirent ) ;
brelse ( bh ) ;
return ret ;
}
2005-12-16 01:31:24 +03:00
/* Check for a name within a directory.
*
* Return 0 if the name does not exist
* Return - EEXIST if the directory contains the name
*
2006-01-10 02:59:24 +03:00
* Callers should have i_mutex + a cluster lock on dir
2005-12-16 01:31:24 +03:00
*/
int ocfs2_check_dir_for_entry ( struct inode * dir ,
const char * name ,
int namelen )
{
int ret ;
struct buffer_head * dirent_bh = NULL ;
struct ocfs2_dir_entry * dirent = NULL ;
2006-03-03 21:24:33 +03:00
mlog_entry ( " dir %llu, name '%.*s' \n " ,
( unsigned long long ) OCFS2_I ( dir ) - > ip_blkno , namelen , name ) ;
2005-12-16 01:31:24 +03:00
ret = - EEXIST ;
dirent_bh = ocfs2_find_entry ( name , namelen , dir , & dirent ) ;
if ( dirent_bh )
goto bail ;
ret = 0 ;
bail :
2008-10-08 01:25:16 +04:00
brelse ( dirent_bh ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( ret ) ;
return ret ;
}
2007-09-12 22:19:00 +04:00
struct ocfs2_empty_dir_priv {
unsigned seen_dot ;
unsigned seen_dot_dot ;
unsigned seen_other ;
} ;
static int ocfs2_empty_dir_filldir ( void * priv , const char * name , int name_len ,
loff_t pos , u64 ino , unsigned type )
{
struct ocfs2_empty_dir_priv * p = priv ;
/*
* Check the positions of " . " and " .. " records to be sure
* they ' re in the correct place .
*/
if ( name_len = = 1 & & ! strncmp ( " . " , name , 1 ) & & pos = = 0 ) {
p - > seen_dot = 1 ;
return 0 ;
}
if ( name_len = = 2 & & ! strncmp ( " .. " , name , 2 ) & &
pos = = OCFS2_DIR_REC_LEN ( 1 ) ) {
p - > seen_dot_dot = 1 ;
return 0 ;
}
p - > seen_other = 1 ;
return 1 ;
}
2005-12-16 01:31:24 +03:00
/*
* routine to check that the specified directory is empty ( for rmdir )
2007-09-12 22:19:00 +04:00
*
* Returns 1 if dir is empty , zero otherwise .
2005-12-16 01:31:24 +03:00
*/
int ocfs2_empty_dir ( struct inode * inode )
{
2007-09-12 22:19:00 +04:00
int ret ;
loff_t start = 0 ;
struct ocfs2_empty_dir_priv priv ;
2005-12-16 01:31:24 +03:00
2007-09-12 22:19:00 +04:00
memset ( & priv , 0 , sizeof ( priv ) ) ;
2005-12-16 01:31:24 +03:00
2007-09-12 22:19:00 +04:00
ret = ocfs2_dir_foreach ( inode , & start , & priv , ocfs2_empty_dir_filldir ) ;
if ( ret )
mlog_errno ( ret ) ;
if ( ! priv . seen_dot | | ! priv . seen_dot_dot ) {
mlog ( ML_ERROR , " bad directory (dir #%llu) - no `.' or `..' \n " ,
2006-03-03 21:24:33 +03:00
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2007-09-12 22:19:00 +04:00
/*
* XXX : Is it really safe to allow an unlink to continue ?
*/
2005-12-16 01:31:24 +03:00
return 1 ;
}
2007-09-12 22:19:00 +04:00
return ! priv . seen_other ;
2005-12-16 01:31:24 +03:00
}
2007-09-14 03:33:54 +04:00
static void ocfs2_fill_initial_dirents ( struct inode * inode ,
struct inode * parent ,
char * start , unsigned int size )
{
struct ocfs2_dir_entry * de = ( struct ocfs2_dir_entry * ) start ;
de - > inode = cpu_to_le64 ( OCFS2_I ( inode ) - > ip_blkno ) ;
de - > name_len = 1 ;
de - > rec_len =
cpu_to_le16 ( OCFS2_DIR_REC_LEN ( de - > name_len ) ) ;
strcpy ( de - > name , " . " ) ;
ocfs2_set_de_type ( de , S_IFDIR ) ;
de = ( struct ocfs2_dir_entry * ) ( ( char * ) de + le16_to_cpu ( de - > rec_len ) ) ;
de - > inode = cpu_to_le64 ( OCFS2_I ( parent ) - > ip_blkno ) ;
de - > rec_len = cpu_to_le16 ( size - OCFS2_DIR_REC_LEN ( 1 ) ) ;
de - > name_len = 2 ;
strcpy ( de - > name , " .. " ) ;
ocfs2_set_de_type ( de , S_IFDIR ) ;
}
/*
* This works together with code in ocfs2_mknod_locked ( ) which sets
* the inline - data flag and initializes the inline - data section .
*/
static int ocfs2_fill_new_dir_id ( struct ocfs2_super * osb ,
handle_t * handle ,
struct inode * parent ,
struct inode * inode ,
struct buffer_head * di_bh )
{
int ret ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_inline_data * data = & di - > id2 . i_data ;
unsigned int size = le16_to_cpu ( data - > id_count ) ;
ret = ocfs2_journal_access ( handle , inode , di_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ocfs2_fill_initial_dirents ( inode , parent , data - > id_data , size ) ;
ocfs2_journal_dirty ( handle , di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
i_size_write ( inode , size ) ;
inode - > i_nlink = 2 ;
inode - > i_blocks = ocfs2_inode_sector_count ( inode ) ;
ret = ocfs2_mark_inode_dirty ( handle , inode , di_bh ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
out :
return ret ;
}
static int ocfs2_fill_new_dir_el ( struct ocfs2_super * osb ,
handle_t * handle ,
struct inode * parent ,
struct inode * inode ,
struct buffer_head * fe_bh ,
struct ocfs2_alloc_context * data_ac )
2007-09-08 05:21:26 +04:00
{
int status ;
struct buffer_head * new_bh = NULL ;
mlog_entry_void ( ) ;
status = ocfs2_do_extend_dir ( osb - > sb , handle , inode , fe_bh ,
data_ac , NULL , & new_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
ocfs2_set_new_buffer_uptodate ( inode , new_bh ) ;
status = ocfs2_journal_access ( handle , inode , new_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
memset ( new_bh - > b_data , 0 , osb - > sb - > s_blocksize ) ;
2007-09-14 03:33:54 +04:00
ocfs2_fill_initial_dirents ( inode , parent , new_bh - > b_data ,
osb - > sb - > s_blocksize ) ;
2007-09-08 05:21:26 +04:00
status = ocfs2_journal_dirty ( handle , new_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
i_size_write ( inode , inode - > i_sb - > s_blocksize ) ;
inode - > i_nlink = 2 ;
inode - > i_blocks = ocfs2_inode_sector_count ( inode ) ;
status = ocfs2_mark_inode_dirty ( handle , inode , fe_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
status = 0 ;
bail :
2008-10-08 01:25:16 +04:00
brelse ( new_bh ) ;
2007-09-08 05:21:26 +04:00
mlog_exit ( status ) ;
return status ;
}
2007-09-14 03:33:54 +04:00
int ocfs2_fill_new_dir ( struct ocfs2_super * osb ,
handle_t * handle ,
struct inode * parent ,
struct inode * inode ,
struct buffer_head * fe_bh ,
struct ocfs2_alloc_context * data_ac )
{
BUG_ON ( ! ocfs2_supports_inline_data ( osb ) & & data_ac = = NULL ) ;
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL )
return ocfs2_fill_new_dir_id ( osb , handle , parent , inode , fe_bh ) ;
return ocfs2_fill_new_dir_el ( osb , handle , parent , inode , fe_bh ,
data_ac ) ;
}
static void ocfs2_expand_last_dirent ( char * start , unsigned int old_size ,
unsigned int new_size )
{
struct ocfs2_dir_entry * de ;
struct ocfs2_dir_entry * prev_de ;
char * de_buf , * limit ;
unsigned int bytes = new_size - old_size ;
limit = start + old_size ;
de_buf = start ;
de = ( struct ocfs2_dir_entry * ) de_buf ;
do {
prev_de = de ;
de_buf + = le16_to_cpu ( de - > rec_len ) ;
de = ( struct ocfs2_dir_entry * ) de_buf ;
} while ( de_buf < limit ) ;
le16_add_cpu ( & prev_de - > rec_len , bytes ) ;
}
/*
* We allocate enough clusters to fulfill " blocks_wanted " , but set
* i_size to exactly one block . Ocfs2_extend_dir ( ) will handle the
* rest automatically for us .
*
* * first_block_bh is a pointer to the 1 st data block allocated to the
* directory .
*/
static int ocfs2_expand_inline_dir ( struct inode * dir , struct buffer_head * di_bh ,
unsigned int blocks_wanted ,
struct buffer_head * * first_block_bh )
{
int ret , credits = OCFS2_INLINE_TO_EXTENTS_CREDITS ;
u32 alloc , bit_off , len ;
struct super_block * sb = dir - > i_sb ;
u64 blkno , bytes = blocks_wanted < < sb - > s_blocksize_bits ;
struct ocfs2_super * osb = OCFS2_SB ( dir - > i_sb ) ;
struct ocfs2_inode_info * oi = OCFS2_I ( dir ) ;
struct ocfs2_alloc_context * data_ac ;
struct buffer_head * dirdata_bh = NULL ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
handle_t * handle ;
2008-08-21 06:36:33 +04:00
struct ocfs2_extent_tree et ;
2008-08-22 23:46:09 +04:00
ocfs2_init_dinode_extent_tree ( & et , dir , di_bh ) ;
2007-09-14 03:33:54 +04:00
alloc = ocfs2_clusters_for_bytes ( sb , bytes ) ;
/*
* We should never need more than 2 clusters for this -
* maximum dirent size is far less than one block . In fact ,
* the only time we ' d need more than one cluster is if
* blocksize = = clustersize and the dirent won ' t fit in the
* extra space that the expansion to a single block gives . As
* of today , that only happens on 4 k / 4 k file systems .
*/
BUG_ON ( alloc > 2 ) ;
ret = ocfs2_reserve_clusters ( osb , alloc , & data_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
down_write ( & oi - > ip_alloc_sem ) ;
/*
2008-02-03 18:33:42 +03:00
* Prepare for worst case allocation scenario of two separate
2007-09-14 03:33:54 +04:00
* extents .
*/
if ( alloc = = 2 )
credits + = OCFS2_SUBALLOC_ALLOC ;
handle = ocfs2_start_trans ( osb , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out_sem ;
}
/*
* Try to claim as many clusters as the bitmap can give though
* if we only get one now , that ' s enough to continue . The rest
* will be claimed after the conversion to extents .
*/
ret = ocfs2_claim_clusters ( osb , handle , data_ac , 1 , & bit_off , & len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
/*
* Operations are carefully ordered so that we set up the new
* data block first . The conversion from inline data to
* extents follows .
*/
blkno = ocfs2_clusters_to_blocks ( dir - > i_sb , bit_off ) ;
dirdata_bh = sb_getblk ( sb , blkno ) ;
if ( ! dirdata_bh ) {
ret = - EIO ;
mlog_errno ( ret ) ;
goto out_commit ;
}
ocfs2_set_new_buffer_uptodate ( dir , dirdata_bh ) ;
ret = ocfs2_journal_access ( handle , dir , dirdata_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
memcpy ( dirdata_bh - > b_data , di - > id2 . i_data . id_data , i_size_read ( dir ) ) ;
memset ( dirdata_bh - > b_data + i_size_read ( dir ) , 0 ,
sb - > s_blocksize - i_size_read ( dir ) ) ;
ocfs2_expand_last_dirent ( dirdata_bh - > b_data , i_size_read ( dir ) ,
sb - > s_blocksize ) ;
ret = ocfs2_journal_dirty ( handle , dirdata_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
/*
* Set extent , i_size , etc on the directory . After this , the
* inode should contain the same exact dirents as before and
* be fully accessible from system calls .
*
* We let the later dirent insert modify c / mtime - to the user
* the data hasn ' t changed .
*/
ret = ocfs2_journal_access ( handle , dir , di_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
spin_lock ( & oi - > ip_lock ) ;
oi - > ip_dyn_features & = ~ OCFS2_INLINE_DATA_FL ;
di - > i_dyn_features = cpu_to_le16 ( oi - > ip_dyn_features ) ;
spin_unlock ( & oi - > ip_lock ) ;
ocfs2_dinode_new_extent_list ( dir , di ) ;
i_size_write ( dir , sb - > s_blocksize ) ;
dir - > i_mtime = dir - > i_ctime = CURRENT_TIME ;
di - > i_size = cpu_to_le64 ( sb - > s_blocksize ) ;
di - > i_ctime = di - > i_mtime = cpu_to_le64 ( dir - > i_ctime . tv_sec ) ;
di - > i_ctime_nsec = di - > i_mtime_nsec = cpu_to_le32 ( dir - > i_ctime . tv_nsec ) ;
/*
* This should never fail as our extent list is empty and all
* related blocks have been journaled already .
*/
2008-08-21 06:36:33 +04:00
ret = ocfs2_insert_extent ( osb , handle , dir , & et , 0 , blkno , len ,
0 , NULL ) ;
2007-09-14 03:33:54 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
2008-08-21 10:14:27 +04:00
goto out_commit ;
2007-09-14 03:33:54 +04:00
}
2008-08-05 22:32:46 +04:00
/*
* Set i_blocks after the extent insert for the most up to
* date ip_clusters value .
*/
dir - > i_blocks = ocfs2_inode_sector_count ( dir ) ;
2007-09-14 03:33:54 +04:00
ret = ocfs2_journal_dirty ( handle , di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
/*
* We asked for two clusters , but only got one in the 1 st
* pass . Claim the 2 nd cluster as a separate extent .
*/
if ( alloc > len ) {
ret = ocfs2_claim_clusters ( osb , handle , data_ac , 1 , & bit_off ,
& len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
blkno = ocfs2_clusters_to_blocks ( dir - > i_sb , bit_off ) ;
2008-08-21 06:36:33 +04:00
ret = ocfs2_insert_extent ( osb , handle , dir , & et , 1 ,
blkno , len , 0 , NULL ) ;
2007-09-14 03:33:54 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
2008-08-21 10:14:27 +04:00
goto out_commit ;
2007-09-14 03:33:54 +04:00
}
}
* first_block_bh = dirdata_bh ;
dirdata_bh = NULL ;
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out_sem :
up_write ( & oi - > ip_alloc_sem ) ;
out :
if ( data_ac )
ocfs2_free_alloc_context ( data_ac ) ;
brelse ( dirdata_bh ) ;
return ret ;
}
2005-12-16 01:31:24 +03:00
/* returns a bh of the 1st new block in the allocation. */
2007-09-08 05:21:26 +04:00
static int ocfs2_do_extend_dir ( struct super_block * sb ,
handle_t * handle ,
struct inode * dir ,
struct buffer_head * parent_fe_bh ,
struct ocfs2_alloc_context * data_ac ,
struct ocfs2_alloc_context * meta_ac ,
struct buffer_head * * new_bh )
2005-12-16 01:31:24 +03:00
{
int status ;
int extend ;
2007-03-23 02:53:23 +03:00
u64 p_blkno , v_blkno ;
2005-12-16 01:31:24 +03:00
spin_lock ( & OCFS2_I ( dir ) - > ip_lock ) ;
extend = ( i_size_read ( dir ) = = ocfs2_clusters_to_bytes ( sb , OCFS2_I ( dir ) - > ip_clusters ) ) ;
spin_unlock ( & OCFS2_I ( dir ) - > ip_lock ) ;
if ( extend ) {
2007-01-16 22:32:23 +03:00
u32 offset = OCFS2_I ( dir ) - > ip_clusters ;
2008-08-18 13:38:45 +04:00
status = ocfs2_add_inode_data ( OCFS2_SB ( sb ) , dir , & offset ,
1 , 0 , parent_fe_bh , handle ,
data_ac , meta_ac , NULL ) ;
2005-12-16 01:31:24 +03:00
BUG_ON ( status = = - EAGAIN ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
}
2007-03-23 02:53:23 +03:00
v_blkno = ocfs2_blocks_for_bytes ( sb , i_size_read ( dir ) ) ;
status = ocfs2_extent_map_get_blocks ( dir , v_blkno , & p_blkno , NULL , NULL ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
* new_bh = sb_getblk ( sb , p_blkno ) ;
if ( ! * new_bh ) {
status = - EIO ;
mlog_errno ( status ) ;
goto bail ;
}
status = 0 ;
bail :
mlog_exit ( status ) ;
return status ;
}
2007-09-14 03:33:54 +04:00
/*
* Assumes you already have a cluster lock on the directory .
*
* ' blocks_wanted ' is only used if we have an inline directory which
* is to be turned into an extent based one . The size of the dirent to
* insert might be larger than the space gained by growing to just one
* block , so we may have to grow the inode by two blocks in that case .
*/
2005-12-16 01:31:24 +03:00
static int ocfs2_extend_dir ( struct ocfs2_super * osb ,
struct inode * dir ,
struct buffer_head * parent_fe_bh ,
2007-09-14 03:33:54 +04:00
unsigned int blocks_wanted ,
2005-12-16 01:31:24 +03:00
struct buffer_head * * new_de_bh )
{
int status = 0 ;
2007-03-29 05:27:07 +04:00
int credits , num_free_extents , drop_alloc_sem = 0 ;
2005-12-16 01:31:24 +03:00
loff_t dir_i_size ;
struct ocfs2_dinode * fe = ( struct ocfs2_dinode * ) parent_fe_bh - > b_data ;
2008-08-18 13:38:43 +04:00
struct ocfs2_extent_list * el = & fe - > id2 . i_list ;
2005-12-16 01:31:24 +03:00
struct ocfs2_alloc_context * data_ac = NULL ;
struct ocfs2_alloc_context * meta_ac = NULL ;
2006-10-10 05:11:45 +04:00
handle_t * handle = NULL ;
2005-12-16 01:31:24 +03:00
struct buffer_head * new_bh = NULL ;
struct ocfs2_dir_entry * de ;
struct super_block * sb = osb - > sb ;
2008-08-21 06:36:33 +04:00
struct ocfs2_extent_tree et ;
2005-12-16 01:31:24 +03:00
mlog_entry_void ( ) ;
2007-09-14 03:33:54 +04:00
if ( OCFS2_I ( dir ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
status = ocfs2_expand_inline_dir ( dir , parent_fe_bh ,
blocks_wanted , & new_bh ) ;
if ( status ) {
mlog_errno ( status ) ;
goto bail ;
}
if ( blocks_wanted = = 1 ) {
/*
* If the new dirent will fit inside the space
* created by pushing out to one block , then
* we can complete the operation
* here . Otherwise we have to expand i_size
* and format the 2 nd block below .
*/
BUG_ON ( new_bh = = NULL ) ;
goto bail_bh ;
}
/*
* Get rid of ' new_bh ' - we want to format the 2 nd
* data block and return that instead .
*/
brelse ( new_bh ) ;
new_bh = NULL ;
dir_i_size = i_size_read ( dir ) ;
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS ;
goto do_extend ;
}
2005-12-16 01:31:24 +03:00
dir_i_size = i_size_read ( dir ) ;
2006-03-03 21:24:33 +03:00
mlog ( 0 , " extending dir %llu (i_size = %lld) \n " ,
( unsigned long long ) OCFS2_I ( dir ) - > ip_blkno , dir_i_size ) ;
2005-12-16 01:31:24 +03:00
/* dir->i_size is always block aligned. */
spin_lock ( & OCFS2_I ( dir ) - > ip_lock ) ;
if ( dir_i_size = = ocfs2_clusters_to_bytes ( sb , OCFS2_I ( dir ) - > ip_clusters ) ) {
spin_unlock ( & OCFS2_I ( dir ) - > ip_lock ) ;
2008-08-22 23:46:09 +04:00
ocfs2_init_dinode_extent_tree ( & et , dir , parent_fe_bh ) ;
2008-08-21 06:36:33 +04:00
num_free_extents = ocfs2_num_free_extents ( osb , dir , & et ) ;
2005-12-16 01:31:24 +03:00
if ( num_free_extents < 0 ) {
status = num_free_extents ;
mlog_errno ( status ) ;
goto bail ;
}
if ( ! num_free_extents ) {
2008-08-18 13:38:43 +04:00
status = ocfs2_reserve_new_metadata ( osb , el , & meta_ac ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto bail ;
}
}
2006-10-07 05:34:35 +04:00
status = ocfs2_reserve_clusters ( osb , 1 , & data_ac ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto bail ;
}
2008-08-18 13:38:43 +04:00
credits = ocfs2_calc_extend_credits ( sb , el , 1 ) ;
2005-12-16 01:31:24 +03:00
} else {
spin_unlock ( & OCFS2_I ( dir ) - > ip_lock ) ;
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS ;
}
2007-09-14 03:33:54 +04:00
do_extend :
2007-03-29 05:27:07 +04:00
down_write ( & OCFS2_I ( dir ) - > ip_alloc_sem ) ;
drop_alloc_sem = 1 ;
2006-10-10 04:26:22 +04:00
handle = ocfs2_start_trans ( osb , credits ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
handle = NULL ;
mlog_errno ( status ) ;
goto bail ;
}
status = ocfs2_do_extend_dir ( osb - > sb , handle , dir , parent_fe_bh ,
data_ac , meta_ac , & new_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
ocfs2_set_new_buffer_uptodate ( dir , new_bh ) ;
status = ocfs2_journal_access ( handle , dir , new_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
memset ( new_bh - > b_data , 0 , sb - > s_blocksize ) ;
de = ( struct ocfs2_dir_entry * ) new_bh - > b_data ;
de - > inode = 0 ;
de - > rec_len = cpu_to_le16 ( sb - > s_blocksize ) ;
status = ocfs2_journal_dirty ( handle , new_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
dir_i_size + = dir - > i_sb - > s_blocksize ;
i_size_write ( dir , dir_i_size ) ;
2007-03-23 02:53:23 +03:00
dir - > i_blocks = ocfs2_inode_sector_count ( dir ) ;
2005-12-16 01:31:24 +03:00
status = ocfs2_mark_inode_dirty ( handle , dir , parent_fe_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
2007-09-14 03:33:54 +04:00
bail_bh :
2005-12-16 01:31:24 +03:00
* new_de_bh = new_bh ;
get_bh ( * new_de_bh ) ;
bail :
2007-03-29 05:27:07 +04:00
if ( drop_alloc_sem )
up_write ( & OCFS2_I ( dir ) - > ip_alloc_sem ) ;
2005-12-16 01:31:24 +03:00
if ( handle )
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
if ( data_ac )
ocfs2_free_alloc_context ( data_ac ) ;
if ( meta_ac )
ocfs2_free_alloc_context ( meta_ac ) ;
2008-10-08 01:25:16 +04:00
brelse ( new_bh ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( status ) ;
return status ;
}
2007-09-14 03:33:54 +04:00
static int ocfs2_find_dir_space_id ( struct inode * dir , struct buffer_head * di_bh ,
const char * name , int namelen ,
struct buffer_head * * ret_de_bh ,
unsigned int * blocks_wanted )
2005-12-16 01:31:24 +03:00
{
2007-09-14 03:33:54 +04:00
int ret ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_dir_entry * de , * last_de = NULL ;
char * de_buf , * limit ;
unsigned long offset = 0 ;
unsigned int rec_len , new_rec_len ;
de_buf = di - > id2 . i_data . id_data ;
limit = de_buf + i_size_read ( dir ) ;
rec_len = OCFS2_DIR_REC_LEN ( namelen ) ;
2005-12-16 01:31:24 +03:00
2007-09-14 03:33:54 +04:00
while ( de_buf < limit ) {
de = ( struct ocfs2_dir_entry * ) de_buf ;
2005-12-16 01:31:24 +03:00
2007-09-14 03:33:54 +04:00
if ( ! ocfs2_check_dir_entry ( dir , de , di_bh , offset ) ) {
ret = - ENOENT ;
goto out ;
}
if ( ocfs2_match ( namelen , name , de ) ) {
ret = - EEXIST ;
goto out ;
}
if ( ocfs2_dirent_would_fit ( de , rec_len ) ) {
/* Ok, we found a spot. Return this bh and let
* the caller actually fill it in . */
* ret_de_bh = di_bh ;
get_bh ( * ret_de_bh ) ;
ret = 0 ;
goto out ;
}
2005-12-16 01:31:24 +03:00
2007-09-14 03:33:54 +04:00
last_de = de ;
de_buf + = le16_to_cpu ( de - > rec_len ) ;
offset + = le16_to_cpu ( de - > rec_len ) ;
}
2005-12-16 01:31:24 +03:00
2007-09-14 03:33:54 +04:00
/*
* We ' re going to require expansion of the directory - figure
* out how many blocks we ' ll need so that a place for the
* dirent can be found .
*/
* blocks_wanted = 1 ;
new_rec_len = le16_to_cpu ( last_de - > rec_len ) + ( dir - > i_sb - > s_blocksize - i_size_read ( dir ) ) ;
if ( new_rec_len < ( rec_len + OCFS2_DIR_REC_LEN ( last_de - > name_len ) ) )
* blocks_wanted = 2 ;
2005-12-16 01:31:24 +03:00
2007-09-14 03:33:54 +04:00
ret = - ENOSPC ;
out :
return ret ;
}
static int ocfs2_find_dir_space_el ( struct inode * dir , const char * name ,
int namelen , struct buffer_head * * ret_de_bh )
{
unsigned long offset ;
struct buffer_head * bh = NULL ;
unsigned short rec_len ;
struct ocfs2_dir_entry * de ;
struct super_block * sb = dir - > i_sb ;
int status ;
2005-12-16 01:31:24 +03:00
bh = ocfs2_bread ( dir , 0 , & status , 0 ) ;
if ( ! bh ) {
mlog_errno ( status ) ;
goto bail ;
}
rec_len = OCFS2_DIR_REC_LEN ( namelen ) ;
offset = 0 ;
de = ( struct ocfs2_dir_entry * ) bh - > b_data ;
while ( 1 ) {
if ( ( char * ) de > = sb - > s_blocksize + bh - > b_data ) {
brelse ( bh ) ;
bh = NULL ;
if ( i_size_read ( dir ) < = offset ) {
2007-09-14 03:33:54 +04:00
/*
* Caller will have to expand this
* directory .
*/
status = - ENOSPC ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
bh = ocfs2_bread ( dir ,
offset > > sb - > s_blocksize_bits ,
& status ,
0 ) ;
if ( ! bh ) {
mlog_errno ( status ) ;
goto bail ;
}
/* move to next block */
de = ( struct ocfs2_dir_entry * ) bh - > b_data ;
}
if ( ! ocfs2_check_dir_entry ( dir , de , bh , offset ) ) {
status = - ENOENT ;
goto bail ;
}
if ( ocfs2_match ( namelen , name , de ) ) {
status = - EEXIST ;
goto bail ;
}
2007-09-14 03:29:01 +04:00
if ( ocfs2_dirent_would_fit ( de , rec_len ) ) {
2005-12-16 01:31:24 +03:00
/* Ok, we found a spot. Return this bh and let
* the caller actually fill it in . */
* ret_de_bh = bh ;
get_bh ( * ret_de_bh ) ;
status = 0 ;
goto bail ;
}
offset + = le16_to_cpu ( de - > rec_len ) ;
de = ( struct ocfs2_dir_entry * ) ( ( char * ) de + le16_to_cpu ( de - > rec_len ) ) ;
}
status = 0 ;
bail :
2008-10-08 01:25:16 +04:00
brelse ( bh ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( status ) ;
return status ;
}
2007-09-14 03:33:54 +04:00
int ocfs2_prepare_dir_for_insert ( struct ocfs2_super * osb ,
struct inode * dir ,
struct buffer_head * parent_fe_bh ,
const char * name ,
int namelen ,
struct buffer_head * * ret_de_bh )
{
int ret ;
unsigned int blocks_wanted = 1 ;
struct buffer_head * bh = NULL ;
mlog ( 0 , " getting ready to insert namelen %d into dir %llu \n " ,
namelen , ( unsigned long long ) OCFS2_I ( dir ) - > ip_blkno ) ;
* ret_de_bh = NULL ;
if ( ! namelen ) {
ret = - EINVAL ;
mlog_errno ( ret ) ;
goto out ;
}
if ( OCFS2_I ( dir ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
ret = ocfs2_find_dir_space_id ( dir , parent_fe_bh , name ,
namelen , & bh , & blocks_wanted ) ;
} else
ret = ocfs2_find_dir_space_el ( dir , name , namelen , & bh ) ;
if ( ret & & ret ! = - ENOSPC ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( ret = = - ENOSPC ) {
/*
* We have to expand the directory to add this name .
*/
BUG_ON ( bh ) ;
ret = ocfs2_extend_dir ( osb , dir , parent_fe_bh , blocks_wanted ,
& bh ) ;
if ( ret ) {
if ( ret ! = - ENOSPC )
mlog_errno ( ret ) ;
goto out ;
}
BUG_ON ( ! bh ) ;
}
* ret_de_bh = bh ;
bh = NULL ;
out :
2008-10-08 01:25:16 +04:00
brelse ( bh ) ;
2007-09-14 03:33:54 +04:00
return ret ;
}