2006-10-11 12:20:50 +04:00
/*
2006-10-11 12:20:53 +04:00
* linux / fs / ext4 / ialloc . c
2006-10-11 12:20:50 +04:00
*
* Copyright ( C ) 1992 , 1993 , 1994 , 1995
* Remy Card ( card @ masi . ibp . fr )
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie ( Paris VI )
*
* BSD ufs - inspired inode and directory allocation by
* Stephen Tweedie ( sct @ redhat . com ) , 1993
* Big - endian to little - endian byte - swapping / bitmaps by
* David S . Miller ( davem @ caip . rutgers . edu ) , 1995
*/
# include <linux/time.h>
# include <linux/fs.h>
2006-10-11 12:21:01 +04:00
# include <linux/jbd2.h>
2006-10-11 12:20:53 +04:00
# include <linux/ext4_fs.h>
2006-10-11 12:21:01 +04:00
# include <linux/ext4_jbd2.h>
2006-10-11 12:20:50 +04:00
# include <linux/stat.h>
# include <linux/string.h>
# include <linux/quotaops.h>
# include <linux/buffer_head.h>
# include <linux/random.h>
# include <linux/bitops.h>
2006-10-11 12:21:05 +04:00
# include <linux/blkdev.h>
2006-10-11 12:20:50 +04:00
# include <asm/byteorder.h>
# include "xattr.h"
# include "acl.h"
/*
* ialloc . c contains the inodes allocation and deallocation routines
*/
/*
* The free inodes are managed by bitmaps . A file system contains several
* blocks groups . Each group contains 1 bitmap block for blocks , 1 bitmap
* block for inodes , N blocks for the inode table and data blocks .
*
* The file system contains group descriptors which are located after the
* super block . Each descriptor contains the number of the bitmap block and
* the free blocks count in the block .
*/
/*
* Read the inode allocation bitmap for a given block_group , reading
* into the specified slot in the superblock ' s bitmap cache .
*
* Return buffer_head of bitmap on success or NULL .
*/
static struct buffer_head *
read_inode_bitmap ( struct super_block * sb , unsigned long block_group )
{
2006-10-11 12:20:53 +04:00
struct ext4_group_desc * desc ;
2006-10-11 12:20:50 +04:00
struct buffer_head * bh = NULL ;
2006-10-11 12:20:53 +04:00
desc = ext4_get_group_desc ( sb , block_group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! desc )
goto error_out ;
2006-10-11 12:21:15 +04:00
bh = sb_bread ( sb , ext4_inode_bitmap ( sb , desc ) ) ;
2006-10-11 12:20:50 +04:00
if ( ! bh )
2006-10-11 12:20:53 +04:00
ext4_error ( sb , " read_inode_bitmap " ,
2006-10-11 12:20:50 +04:00
" Cannot read inode bitmap - "
2006-10-11 12:21:10 +04:00
" block_group = %lu, inode_bitmap = %llu " ,
2006-10-11 12:21:15 +04:00
block_group , ext4_inode_bitmap ( sb , desc ) ) ;
2006-10-11 12:20:50 +04:00
error_out :
return bh ;
}
/*
* NOTE ! When we get the inode , we ' re the only people
* that have access to it , and as such there are no
* race conditions we have to worry about . The inode
* is not on the hash - lists , and it cannot be reached
* through the filesystem because the directory entry
* has been deleted earlier .
*
* HOWEVER : we must make sure that we get no aliases ,
* which means that we have to call " clear_inode() "
* _before_ we mark the inode not in use in the inode
* bitmaps . Otherwise a newly created file might use
* the same inode number ( not actually the same pointer
* though ) , and then we ' d have two inodes sharing the
* same inode number and space on the harddisk .
*/
2006-10-11 12:20:53 +04:00
void ext4_free_inode ( handle_t * handle , struct inode * inode )
2006-10-11 12:20:50 +04:00
{
struct super_block * sb = inode - > i_sb ;
int is_directory ;
unsigned long ino ;
struct buffer_head * bitmap_bh = NULL ;
struct buffer_head * bh2 ;
unsigned long block_group ;
unsigned long bit ;
2006-10-11 12:20:53 +04:00
struct ext4_group_desc * gdp ;
struct ext4_super_block * es ;
struct ext4_sb_info * sbi ;
2006-10-11 12:20:50 +04:00
int fatal = 0 , err ;
if ( atomic_read ( & inode - > i_count ) > 1 ) {
2006-10-11 12:20:53 +04:00
printk ( " ext4_free_inode: inode has count=%d \n " ,
2006-10-11 12:20:50 +04:00
atomic_read ( & inode - > i_count ) ) ;
return ;
}
if ( inode - > i_nlink ) {
2006-10-11 12:20:53 +04:00
printk ( " ext4_free_inode: inode has nlink=%d \n " ,
2006-10-11 12:20:50 +04:00
inode - > i_nlink ) ;
return ;
}
if ( ! sb ) {
2006-10-11 12:20:53 +04:00
printk ( " ext4_free_inode: inode on nonexistent device \n " ) ;
2006-10-11 12:20:50 +04:00
return ;
}
2006-10-11 12:20:53 +04:00
sbi = EXT4_SB ( sb ) ;
2006-10-11 12:20:50 +04:00
ino = inode - > i_ino ;
2006-10-11 12:20:53 +04:00
ext4_debug ( " freeing inode %lu \n " , ino ) ;
2006-10-11 12:20:50 +04:00
/*
* Note : we must free any quota before locking the superblock ,
* as writing the quota to disk may need the lock as well .
*/
DQUOT_INIT ( inode ) ;
2006-10-11 12:20:53 +04:00
ext4_xattr_delete_inode ( handle , inode ) ;
2006-10-11 12:20:50 +04:00
DQUOT_FREE_INODE ( inode ) ;
DQUOT_DROP ( inode ) ;
is_directory = S_ISDIR ( inode - > i_mode ) ;
/* Do this BEFORE marking the inode not in use or returning an error */
clear_inode ( inode ) ;
2006-10-11 12:20:53 +04:00
es = EXT4_SB ( sb ) - > s_es ;
if ( ino < EXT4_FIRST_INO ( sb ) | | ino > le32_to_cpu ( es - > s_inodes_count ) ) {
ext4_error ( sb , " ext4_free_inode " ,
2006-10-11 12:20:50 +04:00
" reserved or nonexistent inode %lu " , ino ) ;
goto error_return ;
}
2006-10-11 12:20:53 +04:00
block_group = ( ino - 1 ) / EXT4_INODES_PER_GROUP ( sb ) ;
bit = ( ino - 1 ) % EXT4_INODES_PER_GROUP ( sb ) ;
2006-10-11 12:20:50 +04:00
bitmap_bh = read_inode_bitmap ( sb , block_group ) ;
if ( ! bitmap_bh )
goto error_return ;
BUFFER_TRACE ( bitmap_bh , " get_write_access " ) ;
2006-10-11 12:20:53 +04:00
fatal = ext4_journal_get_write_access ( handle , bitmap_bh ) ;
2006-10-11 12:20:50 +04:00
if ( fatal )
goto error_return ;
/* Ok, now we can actually update the inode bitmaps.. */
2006-10-11 12:20:53 +04:00
if ( ! ext4_clear_bit_atomic ( sb_bgl_lock ( sbi , block_group ) ,
2006-10-11 12:20:50 +04:00
bit , bitmap_bh - > b_data ) )
2006-10-11 12:20:53 +04:00
ext4_error ( sb , " ext4_free_inode " ,
2006-10-11 12:20:50 +04:00
" bit already cleared for inode %lu " , ino ) ;
else {
2006-10-11 12:20:53 +04:00
gdp = ext4_get_group_desc ( sb , block_group , & bh2 ) ;
2006-10-11 12:20:50 +04:00
BUFFER_TRACE ( bh2 , " get_write_access " ) ;
2006-10-11 12:20:53 +04:00
fatal = ext4_journal_get_write_access ( handle , bh2 ) ;
2006-10-11 12:20:50 +04:00
if ( fatal ) goto error_return ;
if ( gdp ) {
spin_lock ( sb_bgl_lock ( sbi , block_group ) ) ;
gdp - > bg_free_inodes_count = cpu_to_le16 (
le16_to_cpu ( gdp - > bg_free_inodes_count ) + 1 ) ;
if ( is_directory )
gdp - > bg_used_dirs_count = cpu_to_le16 (
le16_to_cpu ( gdp - > bg_used_dirs_count ) - 1 ) ;
spin_unlock ( sb_bgl_lock ( sbi , block_group ) ) ;
percpu_counter_inc ( & sbi - > s_freeinodes_counter ) ;
if ( is_directory )
percpu_counter_dec ( & sbi - > s_dirs_counter ) ;
}
2006-10-11 12:20:53 +04:00
BUFFER_TRACE ( bh2 , " call ext4_journal_dirty_metadata " ) ;
err = ext4_journal_dirty_metadata ( handle , bh2 ) ;
2006-10-11 12:20:50 +04:00
if ( ! fatal ) fatal = err ;
}
2006-10-11 12:20:53 +04:00
BUFFER_TRACE ( bitmap_bh , " call ext4_journal_dirty_metadata " ) ;
err = ext4_journal_dirty_metadata ( handle , bitmap_bh ) ;
2006-10-11 12:20:50 +04:00
if ( ! fatal )
fatal = err ;
sb - > s_dirt = 1 ;
error_return :
brelse ( bitmap_bh ) ;
2006-10-11 12:20:53 +04:00
ext4_std_error ( sb , fatal ) ;
2006-10-11 12:20:50 +04:00
}
/*
* There are two policies for allocating an inode . If the new inode is
* a directory , then a forward search is made for a block group with both
* free space and a low directory - to - inode ratio ; if that fails , then of
* the groups with above - average free space , that group with the fewest
* directories already is chosen .
*
* For other inodes , search forward from the parent directory \ ' s block
* group to find a free inode .
*/
static int find_group_dir ( struct super_block * sb , struct inode * parent )
{
2006-10-11 12:20:53 +04:00
int ngroups = EXT4_SB ( sb ) - > s_groups_count ;
2006-10-11 12:20:50 +04:00
unsigned int freei , avefreei ;
2006-10-11 12:20:53 +04:00
struct ext4_group_desc * desc , * best_desc = NULL ;
2006-10-11 12:20:50 +04:00
int group , best_group = - 1 ;
2006-10-11 12:20:53 +04:00
freei = percpu_counter_read_positive ( & EXT4_SB ( sb ) - > s_freeinodes_counter ) ;
2006-10-11 12:20:50 +04:00
avefreei = freei / ngroups ;
for ( group = 0 ; group < ngroups ; group + + ) {
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! desc | | ! desc - > bg_free_inodes_count )
continue ;
if ( le16_to_cpu ( desc - > bg_free_inodes_count ) < avefreei )
continue ;
if ( ! best_desc | |
( le16_to_cpu ( desc - > bg_free_blocks_count ) >
le16_to_cpu ( best_desc - > bg_free_blocks_count ) ) ) {
best_group = group ;
best_desc = desc ;
}
}
return best_group ;
}
/*
* Orlov ' s allocator for directories .
*
* We always try to spread first - level directories .
*
* If there are blockgroups with both free inodes and free blocks counts
* not worse than average we return one with smallest directory count .
* Otherwise we simply return a random group .
*
* For the rest rules look so :
*
* It ' s OK to put directory into a group unless
* it has too many directories already ( max_dirs ) or
* it has too few free inodes left ( min_inodes ) or
* it has too few free blocks left ( min_blocks ) or
* it ' s already running too large debt ( max_debt ) .
* Parent ' s group is prefered , if it doesn ' t satisfy these
* conditions we search cyclically through the rest . If none
* of the groups look good we just look for a group with more
* free inodes than average ( starting at parent ' s group ) .
*
* Debt is incremented each time we allocate a directory and decremented
* when we allocate an inode , within 0 - - 255.
*/
# define INODE_COST 64
# define BLOCK_COST 256
static int find_group_orlov ( struct super_block * sb , struct inode * parent )
{
2006-10-11 12:20:53 +04:00
int parent_group = EXT4_I ( parent ) - > i_block_group ;
struct ext4_sb_info * sbi = EXT4_SB ( sb ) ;
struct ext4_super_block * es = sbi - > s_es ;
2006-10-11 12:20:50 +04:00
int ngroups = sbi - > s_groups_count ;
2006-10-11 12:20:53 +04:00
int inodes_per_group = EXT4_INODES_PER_GROUP ( sb ) ;
2006-10-11 12:20:50 +04:00
unsigned int freei , avefreei ;
2006-10-11 12:20:53 +04:00
ext4_fsblk_t freeb , avefreeb ;
ext4_fsblk_t blocks_per_dir ;
2006-10-11 12:20:50 +04:00
unsigned int ndirs ;
int max_debt , max_dirs , min_inodes ;
2006-10-11 12:20:53 +04:00
ext4_grpblk_t min_blocks ;
2006-10-11 12:20:50 +04:00
int group = - 1 , i ;
2006-10-11 12:20:53 +04:00
struct ext4_group_desc * desc ;
2006-10-11 12:20:50 +04:00
freei = percpu_counter_read_positive ( & sbi - > s_freeinodes_counter ) ;
avefreei = freei / ngroups ;
freeb = percpu_counter_read_positive ( & sbi - > s_freeblocks_counter ) ;
2006-10-11 12:21:05 +04:00
avefreeb = freeb ;
2006-10-11 12:21:19 +04:00
do_div ( avefreeb , ngroups ) ;
2006-10-11 12:20:50 +04:00
ndirs = percpu_counter_read_positive ( & sbi - > s_dirs_counter ) ;
if ( ( parent = = sb - > s_root - > d_inode ) | |
2006-10-11 12:20:53 +04:00
( EXT4_I ( parent ) - > i_flags & EXT4_TOPDIR_FL ) ) {
2006-10-11 12:20:50 +04:00
int best_ndir = inodes_per_group ;
int best_group = - 1 ;
get_random_bytes ( & group , sizeof ( group ) ) ;
parent_group = ( unsigned ) group % ngroups ;
for ( i = 0 ; i < ngroups ; i + + ) {
group = ( parent_group + i ) % ngroups ;
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! desc | | ! desc - > bg_free_inodes_count )
continue ;
if ( le16_to_cpu ( desc - > bg_used_dirs_count ) > = best_ndir )
continue ;
if ( le16_to_cpu ( desc - > bg_free_inodes_count ) < avefreei )
continue ;
if ( le16_to_cpu ( desc - > bg_free_blocks_count ) < avefreeb )
continue ;
best_group = group ;
best_ndir = le16_to_cpu ( desc - > bg_used_dirs_count ) ;
}
if ( best_group > = 0 )
return best_group ;
goto fallback ;
}
2006-10-11 12:21:10 +04:00
blocks_per_dir = ext4_blocks_count ( es ) - freeb ;
2006-10-11 12:21:19 +04:00
do_div ( blocks_per_dir , ndirs ) ;
2006-10-11 12:20:50 +04:00
max_dirs = ndirs / ngroups + inodes_per_group / 16 ;
min_inodes = avefreei - inodes_per_group / 4 ;
2006-10-11 12:20:53 +04:00
min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP ( sb ) / 4 ;
2006-10-11 12:20:50 +04:00
2006-10-11 12:21:05 +04:00
max_debt = EXT4_BLOCKS_PER_GROUP ( sb ) ;
2006-10-11 12:21:19 +04:00
max_debt / = max_t ( int , blocks_per_dir , BLOCK_COST ) ;
2006-10-11 12:20:50 +04:00
if ( max_debt * INODE_COST > inodes_per_group )
max_debt = inodes_per_group / INODE_COST ;
if ( max_debt > 255 )
max_debt = 255 ;
if ( max_debt = = 0 )
max_debt = 1 ;
for ( i = 0 ; i < ngroups ; i + + ) {
group = ( parent_group + i ) % ngroups ;
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! desc | | ! desc - > bg_free_inodes_count )
continue ;
if ( le16_to_cpu ( desc - > bg_used_dirs_count ) > = max_dirs )
continue ;
if ( le16_to_cpu ( desc - > bg_free_inodes_count ) < min_inodes )
continue ;
if ( le16_to_cpu ( desc - > bg_free_blocks_count ) < min_blocks )
continue ;
return group ;
}
fallback :
for ( i = 0 ; i < ngroups ; i + + ) {
group = ( parent_group + i ) % ngroups ;
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! desc | | ! desc - > bg_free_inodes_count )
continue ;
if ( le16_to_cpu ( desc - > bg_free_inodes_count ) > = avefreei )
return group ;
}
if ( avefreei ) {
/*
* The free - inodes counter is approximate , and for really small
* filesystems the above test can fail to find any blockgroups
*/
avefreei = 0 ;
goto fallback ;
}
return - 1 ;
}
static int find_group_other ( struct super_block * sb , struct inode * parent )
{
2006-10-11 12:20:53 +04:00
int parent_group = EXT4_I ( parent ) - > i_block_group ;
int ngroups = EXT4_SB ( sb ) - > s_groups_count ;
struct ext4_group_desc * desc ;
2006-10-11 12:20:50 +04:00
int group , i ;
/*
* Try to place the inode in its parent directory
*/
group = parent_group ;
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( desc & & le16_to_cpu ( desc - > bg_free_inodes_count ) & &
le16_to_cpu ( desc - > bg_free_blocks_count ) )
return group ;
/*
* We ' re going to place this inode in a different blockgroup from its
* parent . We want to cause files in a common directory to all land in
* the same blockgroup . But we want files which are in a different
* directory which shares a blockgroup with our parent to land in a
* different blockgroup .
*
* So add our directory ' s i_ino into the starting point for the hash .
*/
group = ( group + parent - > i_ino ) % ngroups ;
/*
* Use a quadratic hash to find a group with a free inode and some free
* blocks .
*/
for ( i = 1 ; i < ngroups ; i < < = 1 ) {
group + = i ;
if ( group > = ngroups )
group - = ngroups ;
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( desc & & le16_to_cpu ( desc - > bg_free_inodes_count ) & &
le16_to_cpu ( desc - > bg_free_blocks_count ) )
return group ;
}
/*
* That failed : try linear search for a free inode , even if that group
* has no free blocks .
*/
group = parent_group ;
for ( i = 0 ; i < ngroups ; i + + ) {
if ( + + group > = ngroups )
group = 0 ;
2007-10-17 10:26:30 +04:00
desc = ext4_get_group_desc ( sb , group , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( desc & & le16_to_cpu ( desc - > bg_free_inodes_count ) )
return group ;
}
return - 1 ;
}
/*
* There are two policies for allocating an inode . If the new inode is
* a directory , then a forward search is made for a block group with both
* free space and a low directory - to - inode ratio ; if that fails , then of
* the groups with above - average free space , that group with the fewest
* directories already is chosen .
*
* For other inodes , search forward from the parent directory ' s block
* group to find a free inode .
*/
2006-10-11 12:20:53 +04:00
struct inode * ext4_new_inode ( handle_t * handle , struct inode * dir , int mode )
2006-10-11 12:20:50 +04:00
{
struct super_block * sb ;
struct buffer_head * bitmap_bh = NULL ;
struct buffer_head * bh2 ;
int group ;
unsigned long ino = 0 ;
struct inode * inode ;
2006-10-11 12:20:53 +04:00
struct ext4_group_desc * gdp = NULL ;
struct ext4_super_block * es ;
struct ext4_inode_info * ei ;
struct ext4_sb_info * sbi ;
2006-10-11 12:20:50 +04:00
int err = 0 ;
struct inode * ret ;
int i ;
/* Cannot create files in a deleted directory */
if ( ! dir | | ! dir - > i_nlink )
return ERR_PTR ( - EPERM ) ;
sb = dir - > i_sb ;
inode = new_inode ( sb ) ;
if ( ! inode )
return ERR_PTR ( - ENOMEM ) ;
2006-10-11 12:20:53 +04:00
ei = EXT4_I ( inode ) ;
2006-10-11 12:20:50 +04:00
2006-10-11 12:20:53 +04:00
sbi = EXT4_SB ( sb ) ;
2006-10-11 12:20:50 +04:00
es = sbi - > s_es ;
if ( S_ISDIR ( mode ) ) {
if ( test_opt ( sb , OLDALLOC ) )
group = find_group_dir ( sb , dir ) ;
else
group = find_group_orlov ( sb , dir ) ;
} else
group = find_group_other ( sb , dir ) ;
err = - ENOSPC ;
if ( group = = - 1 )
goto out ;
for ( i = 0 ; i < sbi - > s_groups_count ; i + + ) {
err = - EIO ;
2006-10-11 12:20:53 +04:00
gdp = ext4_get_group_desc ( sb , group , & bh2 ) ;
2006-10-11 12:20:50 +04:00
if ( ! gdp )
goto fail ;
brelse ( bitmap_bh ) ;
bitmap_bh = read_inode_bitmap ( sb , group ) ;
if ( ! bitmap_bh )
goto fail ;
ino = 0 ;
repeat_in_this_group :
2006-10-11 12:20:53 +04:00
ino = ext4_find_next_zero_bit ( ( unsigned long * )
bitmap_bh - > b_data , EXT4_INODES_PER_GROUP ( sb ) , ino ) ;
if ( ino < EXT4_INODES_PER_GROUP ( sb ) ) {
2006-10-11 12:20:50 +04:00
BUFFER_TRACE ( bitmap_bh , " get_write_access " ) ;
2006-10-11 12:20:53 +04:00
err = ext4_journal_get_write_access ( handle , bitmap_bh ) ;
2006-10-11 12:20:50 +04:00
if ( err )
goto fail ;
2006-10-11 12:20:53 +04:00
if ( ! ext4_set_bit_atomic ( sb_bgl_lock ( sbi , group ) ,
2006-10-11 12:20:50 +04:00
ino , bitmap_bh - > b_data ) ) {
/* we won it */
BUFFER_TRACE ( bitmap_bh ,
2006-10-11 12:20:53 +04:00
" call ext4_journal_dirty_metadata " ) ;
err = ext4_journal_dirty_metadata ( handle ,
2006-10-11 12:20:50 +04:00
bitmap_bh ) ;
if ( err )
goto fail ;
goto got ;
}
/* we lost it */
2006-10-11 12:21:01 +04:00
jbd2_journal_release_buffer ( handle , bitmap_bh ) ;
2006-10-11 12:20:50 +04:00
2006-10-11 12:20:53 +04:00
if ( + + ino < EXT4_INODES_PER_GROUP ( sb ) )
2006-10-11 12:20:50 +04:00
goto repeat_in_this_group ;
}
/*
* This case is possible in concurrent environment . It is very
* rare . We cannot repeat the find_group_xxx ( ) call because
* that will simply return the same blockgroup , because the
* group descriptor metadata has not yet been updated .
* So we just go onto the next blockgroup .
*/
if ( + + group = = sbi - > s_groups_count )
group = 0 ;
}
err = - ENOSPC ;
goto out ;
got :
2006-10-11 12:20:53 +04:00
ino + = group * EXT4_INODES_PER_GROUP ( sb ) + 1 ;
if ( ino < EXT4_FIRST_INO ( sb ) | | ino > le32_to_cpu ( es - > s_inodes_count ) ) {
ext4_error ( sb , " ext4_new_inode " ,
2006-10-11 12:20:50 +04:00
" reserved inode or inode > inodes count - "
" block_group = %d, inode=%lu " , group , ino ) ;
err = - EIO ;
goto fail ;
}
BUFFER_TRACE ( bh2 , " get_write_access " ) ;
2006-10-11 12:20:53 +04:00
err = ext4_journal_get_write_access ( handle , bh2 ) ;
2006-10-11 12:20:50 +04:00
if ( err ) goto fail ;
spin_lock ( sb_bgl_lock ( sbi , group ) ) ;
gdp - > bg_free_inodes_count =
cpu_to_le16 ( le16_to_cpu ( gdp - > bg_free_inodes_count ) - 1 ) ;
if ( S_ISDIR ( mode ) ) {
gdp - > bg_used_dirs_count =
cpu_to_le16 ( le16_to_cpu ( gdp - > bg_used_dirs_count ) + 1 ) ;
}
spin_unlock ( sb_bgl_lock ( sbi , group ) ) ;
2006-10-11 12:20:53 +04:00
BUFFER_TRACE ( bh2 , " call ext4_journal_dirty_metadata " ) ;
err = ext4_journal_dirty_metadata ( handle , bh2 ) ;
2006-10-11 12:20:50 +04:00
if ( err ) goto fail ;
percpu_counter_dec ( & sbi - > s_freeinodes_counter ) ;
if ( S_ISDIR ( mode ) )
percpu_counter_inc ( & sbi - > s_dirs_counter ) ;
sb - > s_dirt = 1 ;
inode - > i_uid = current - > fsuid ;
if ( test_opt ( sb , GRPID ) )
inode - > i_gid = dir - > i_gid ;
else if ( dir - > i_mode & S_ISGID ) {
inode - > i_gid = dir - > i_gid ;
if ( S_ISDIR ( mode ) )
mode | = S_ISGID ;
} else
inode - > i_gid = current - > fsgid ;
inode - > i_mode = mode ;
inode - > i_ino = ino ;
/* This is the optimal IO size (for stat), not the fs block size */
inode - > i_blocks = 0 ;
2007-07-18 17:15:20 +04:00
inode - > i_mtime = inode - > i_atime = inode - > i_ctime = ei - > i_crtime =
ext4_current_time ( inode ) ;
2006-10-11 12:20:50 +04:00
memset ( ei - > i_data , 0 , sizeof ( ei - > i_data ) ) ;
ei - > i_dir_start_lookup = 0 ;
ei - > i_disksize = 0 ;
2006-10-11 12:20:53 +04:00
ei - > i_flags = EXT4_I ( dir ) - > i_flags & ~ EXT4_INDEX_FL ;
2006-10-11 12:20:50 +04:00
if ( S_ISLNK ( mode ) )
2006-10-11 12:20:53 +04:00
ei - > i_flags & = ~ ( EXT4_IMMUTABLE_FL | EXT4_APPEND_FL ) ;
2006-10-11 12:20:50 +04:00
/* dirsync only applies to directories */
if ( ! S_ISDIR ( mode ) )
2006-10-11 12:20:53 +04:00
ei - > i_flags & = ~ EXT4_DIRSYNC_FL ;
# ifdef EXT4_FRAGMENTS
2006-10-11 12:20:50 +04:00
ei - > i_faddr = 0 ;
ei - > i_frag_no = 0 ;
ei - > i_frag_size = 0 ;
# endif
ei - > i_file_acl = 0 ;
ei - > i_dir_acl = 0 ;
ei - > i_dtime = 0 ;
ei - > i_block_alloc_info = NULL ;
ei - > i_block_group = group ;
2006-10-11 12:20:53 +04:00
ext4_set_inode_flags ( inode ) ;
2006-10-11 12:20:50 +04:00
if ( IS_DIRSYNC ( inode ) )
handle - > h_sync = 1 ;
insert_inode_hash ( inode ) ;
spin_lock ( & sbi - > s_next_gen_lock ) ;
inode - > i_generation = sbi - > s_next_generation + + ;
spin_unlock ( & sbi - > s_next_gen_lock ) ;
2006-10-11 12:20:53 +04:00
ei - > i_state = EXT4_STATE_NEW ;
2007-07-18 17:15:20 +04:00
ei - > i_extra_isize = EXT4_SB ( sb ) - > s_want_extra_isize ;
2006-10-11 12:20:50 +04:00
ret = inode ;
if ( DQUOT_ALLOC_INODE ( inode ) ) {
err = - EDQUOT ;
goto fail_drop ;
}
2006-10-11 12:20:53 +04:00
err = ext4_init_acl ( handle , inode , dir ) ;
2006-10-11 12:20:50 +04:00
if ( err )
goto fail_free_drop ;
2006-10-11 12:20:53 +04:00
err = ext4_init_security ( handle , inode , dir ) ;
2006-10-11 12:20:50 +04:00
if ( err )
goto fail_free_drop ;
2006-10-11 12:20:53 +04:00
err = ext4_mark_inode_dirty ( handle , inode ) ;
2006-10-11 12:20:50 +04:00
if ( err ) {
2006-10-11 12:20:53 +04:00
ext4_std_error ( sb , err ) ;
2006-10-11 12:20:50 +04:00
goto fail_free_drop ;
}
2006-10-11 12:21:03 +04:00
if ( test_opt ( sb , EXTENTS ) ) {
EXT4_I ( inode ) - > i_flags | = EXT4_EXTENTS_FL ;
ext4_ext_tree_init ( handle , inode ) ;
if ( ! EXT4_HAS_INCOMPAT_FEATURE ( sb , EXT4_FEATURE_INCOMPAT_EXTENTS ) ) {
err = ext4_journal_get_write_access ( handle , EXT4_SB ( sb ) - > s_sbh ) ;
if ( err ) goto fail ;
EXT4_SET_INCOMPAT_FEATURE ( sb , EXT4_FEATURE_INCOMPAT_EXTENTS ) ;
BUFFER_TRACE ( EXT4_SB ( sb ) - > s_sbh , " call ext4_journal_dirty_metadata " ) ;
err = ext4_journal_dirty_metadata ( handle , EXT4_SB ( sb ) - > s_sbh ) ;
}
}
2006-10-11 12:20:50 +04:00
2006-10-11 12:20:53 +04:00
ext4_debug ( " allocating inode %lu \n " , inode - > i_ino ) ;
2006-10-11 12:20:50 +04:00
goto really_out ;
fail :
2006-10-11 12:20:53 +04:00
ext4_std_error ( sb , err ) ;
2006-10-11 12:20:50 +04:00
out :
iput ( inode ) ;
ret = ERR_PTR ( err ) ;
really_out :
brelse ( bitmap_bh ) ;
return ret ;
fail_free_drop :
DQUOT_FREE_INODE ( inode ) ;
fail_drop :
DQUOT_DROP ( inode ) ;
inode - > i_flags | = S_NOQUOTA ;
inode - > i_nlink = 0 ;
iput ( inode ) ;
brelse ( bitmap_bh ) ;
return ERR_PTR ( err ) ;
}
/* Verify that we are loading a valid orphan from disk */
2006-10-11 12:20:53 +04:00
struct inode * ext4_orphan_get ( struct super_block * sb , unsigned long ino )
2006-10-11 12:20:50 +04:00
{
2006-10-11 12:20:53 +04:00
unsigned long max_ino = le32_to_cpu ( EXT4_SB ( sb ) - > s_es - > s_inodes_count ) ;
2006-10-11 12:20:50 +04:00
unsigned long block_group ;
int bit ;
struct buffer_head * bitmap_bh = NULL ;
struct inode * inode = NULL ;
/* Error cases - e2fsck has already cleaned up for us */
if ( ino > max_ino ) {
2006-10-11 12:20:53 +04:00
ext4_warning ( sb , __FUNCTION__ ,
2006-10-11 12:20:50 +04:00
" bad orphan ino %lu! e2fsck was run? " , ino ) ;
goto out ;
}
2006-10-11 12:20:53 +04:00
block_group = ( ino - 1 ) / EXT4_INODES_PER_GROUP ( sb ) ;
bit = ( ino - 1 ) % EXT4_INODES_PER_GROUP ( sb ) ;
2006-10-11 12:20:50 +04:00
bitmap_bh = read_inode_bitmap ( sb , block_group ) ;
if ( ! bitmap_bh ) {
2006-10-11 12:20:53 +04:00
ext4_warning ( sb , __FUNCTION__ ,
2006-10-11 12:20:50 +04:00
" inode bitmap error for orphan %lu " , ino ) ;
goto out ;
}
/* Having the inode bit set should be a 100% indicator that this
* is a valid orphan ( no e2fsck run on fs ) . Orphans also include
* inodes that were being truncated , so we can ' t check i_nlink = = 0.
*/
2006-10-11 12:20:53 +04:00
if ( ! ext4_test_bit ( bit , bitmap_bh - > b_data ) | |
2006-10-11 12:20:50 +04:00
! ( inode = iget ( sb , ino ) ) | | is_bad_inode ( inode ) | |
NEXT_ORPHAN ( inode ) > max_ino ) {
2006-10-11 12:20:53 +04:00
ext4_warning ( sb , __FUNCTION__ ,
2006-10-11 12:20:50 +04:00
" bad orphan inode %lu! e2fsck was run? " , ino ) ;
2006-10-11 12:20:53 +04:00
printk ( KERN_NOTICE " ext4_test_bit(bit=%d, block=%llu) = %d \n " ,
2006-10-11 12:20:50 +04:00
bit , ( unsigned long long ) bitmap_bh - > b_blocknr ,
2006-10-11 12:20:53 +04:00
ext4_test_bit ( bit , bitmap_bh - > b_data ) ) ;
2006-10-11 12:20:50 +04:00
printk ( KERN_NOTICE " inode=%p \n " , inode ) ;
if ( inode ) {
printk ( KERN_NOTICE " is_bad_inode(inode)=%d \n " ,
is_bad_inode ( inode ) ) ;
printk ( KERN_NOTICE " NEXT_ORPHAN(inode)=%u \n " ,
NEXT_ORPHAN ( inode ) ) ;
printk ( KERN_NOTICE " max_ino=%lu \n " , max_ino ) ;
}
/* Avoid freeing blocks if we got a bad deleted inode */
if ( inode & & inode - > i_nlink = = 0 )
inode - > i_blocks = 0 ;
iput ( inode ) ;
inode = NULL ;
}
out :
brelse ( bitmap_bh ) ;
return inode ;
}
2006-10-11 12:20:53 +04:00
unsigned long ext4_count_free_inodes ( struct super_block * sb )
2006-10-11 12:20:50 +04:00
{
unsigned long desc_count ;
2006-10-11 12:20:53 +04:00
struct ext4_group_desc * gdp ;
2006-10-11 12:20:50 +04:00
int i ;
2006-10-11 12:20:53 +04:00
# ifdef EXT4FS_DEBUG
struct ext4_super_block * es ;
2006-10-11 12:20:50 +04:00
unsigned long bitmap_count , x ;
struct buffer_head * bitmap_bh = NULL ;
2006-10-11 12:20:53 +04:00
es = EXT4_SB ( sb ) - > s_es ;
2006-10-11 12:20:50 +04:00
desc_count = 0 ;
bitmap_count = 0 ;
gdp = NULL ;
2006-10-11 12:20:53 +04:00
for ( i = 0 ; i < EXT4_SB ( sb ) - > s_groups_count ; i + + ) {
gdp = ext4_get_group_desc ( sb , i , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! gdp )
continue ;
desc_count + = le16_to_cpu ( gdp - > bg_free_inodes_count ) ;
brelse ( bitmap_bh ) ;
bitmap_bh = read_inode_bitmap ( sb , i ) ;
if ( ! bitmap_bh )
continue ;
2006-10-11 12:20:53 +04:00
x = ext4_count_free ( bitmap_bh , EXT4_INODES_PER_GROUP ( sb ) / 8 ) ;
2006-10-11 12:20:50 +04:00
printk ( " group %d: stored = %d, counted = %lu \n " ,
i , le16_to_cpu ( gdp - > bg_free_inodes_count ) , x ) ;
bitmap_count + = x ;
}
brelse ( bitmap_bh ) ;
2006-10-11 12:20:53 +04:00
printk ( " ext4_count_free_inodes: stored = %u, computed = %lu, %lu \n " ,
2006-10-11 12:20:50 +04:00
le32_to_cpu ( es - > s_free_inodes_count ) , desc_count , bitmap_count ) ;
return desc_count ;
# else
desc_count = 0 ;
2006-10-11 12:20:53 +04:00
for ( i = 0 ; i < EXT4_SB ( sb ) - > s_groups_count ; i + + ) {
gdp = ext4_get_group_desc ( sb , i , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! gdp )
continue ;
desc_count + = le16_to_cpu ( gdp - > bg_free_inodes_count ) ;
cond_resched ( ) ;
}
return desc_count ;
# endif
}
/* Called at mount-time, super-block is locked */
2006-10-11 12:20:53 +04:00
unsigned long ext4_count_dirs ( struct super_block * sb )
2006-10-11 12:20:50 +04:00
{
unsigned long count = 0 ;
int i ;
2006-10-11 12:20:53 +04:00
for ( i = 0 ; i < EXT4_SB ( sb ) - > s_groups_count ; i + + ) {
struct ext4_group_desc * gdp = ext4_get_group_desc ( sb , i , NULL ) ;
2006-10-11 12:20:50 +04:00
if ( ! gdp )
continue ;
count + = le16_to_cpu ( gdp - > bg_used_dirs_count ) ;
}
return count ;
}