2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* file . c
*
* File open , close , extend , truncate
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
2006-01-11 23:17:46 +03:00
# include <linux/capability.h>
2005-12-16 01:31:24 +03:00
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/highmem.h>
# include <linux/pagemap.h>
# include <linux/uio.h>
2006-10-04 04:53:05 +04:00
# include <linux/sched.h>
2007-06-04 11:59:47 +04:00
# include <linux/splice.h>
2006-11-15 10:48:42 +03:00
# include <linux/mount.h>
2007-02-10 07:24:12 +03:00
# include <linux/writeback.h>
2007-07-19 11:14:38 +04:00
# include <linux/falloc.h>
2005-12-16 01:31:24 +03:00
# define MLOG_MASK_PREFIX ML_INODE
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
# include "aops.h"
# include "dir.h"
# include "dlmglue.h"
# include "extent_map.h"
# include "file.h"
# include "sysfile.h"
# include "inode.h"
2006-07-04 04:27:12 +04:00
# include "ioctl.h"
2005-12-16 01:31:24 +03:00
# include "journal.h"
2007-12-21 03:49:04 +03:00
# include "locks.h"
2005-12-16 01:31:24 +03:00
# include "mmap.h"
# include "suballoc.h"
# include "super.h"
# include "buffer_head_io.h"
static int ocfs2_sync_inode ( struct inode * inode )
{
filemap_fdatawrite ( inode - > i_mapping ) ;
return sync_mapping_buffers ( inode - > i_mapping ) ;
}
2007-12-21 03:49:04 +03:00
static int ocfs2_init_file_private ( struct inode * inode , struct file * file )
{
struct ocfs2_file_private * fp ;
fp = kzalloc ( sizeof ( struct ocfs2_file_private ) , GFP_KERNEL ) ;
if ( ! fp )
return - ENOMEM ;
fp - > fp_file = file ;
mutex_init ( & fp - > fp_mutex ) ;
ocfs2_file_lock_res_init ( & fp - > fp_flock , fp ) ;
file - > private_data = fp ;
return 0 ;
}
static void ocfs2_free_file_private ( struct inode * inode , struct file * file )
{
struct ocfs2_file_private * fp = file - > private_data ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
if ( fp ) {
ocfs2_simple_drop_lockres ( osb , & fp - > fp_flock ) ;
ocfs2_lock_res_free ( & fp - > fp_flock ) ;
kfree ( fp ) ;
file - > private_data = NULL ;
}
}
2005-12-16 01:31:24 +03:00
static int ocfs2_file_open ( struct inode * inode , struct file * file )
{
int status ;
int mode = file - > f_flags ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
mlog_entry ( " (0x%p, 0x%p, '%.*s') \n " , inode , file ,
2006-12-08 13:37:25 +03:00
file - > f_path . dentry - > d_name . len , file - > f_path . dentry - > d_name . name ) ;
2005-12-16 01:31:24 +03:00
spin_lock ( & oi - > ip_lock ) ;
/* Check that the inode hasn't been wiped from disk by another
* node . If it hasn ' t then we ' re safe as long as we hold the
* spin lock until our increment of open count . */
if ( OCFS2_I ( inode ) - > ip_flags & OCFS2_INODE_DELETED ) {
spin_unlock ( & oi - > ip_lock ) ;
status = - ENOENT ;
goto leave ;
}
if ( mode & O_DIRECT )
oi - > ip_flags | = OCFS2_INODE_OPEN_DIRECT ;
oi - > ip_open_count + + ;
spin_unlock ( & oi - > ip_lock ) ;
2007-12-21 03:49:04 +03:00
status = ocfs2_init_file_private ( inode , file ) ;
if ( status ) {
/*
* We want to set open count back if we ' re failing the
* open .
*/
spin_lock ( & oi - > ip_lock ) ;
oi - > ip_open_count - - ;
spin_unlock ( & oi - > ip_lock ) ;
}
2005-12-16 01:31:24 +03:00
leave :
mlog_exit ( status ) ;
return status ;
}
static int ocfs2_file_release ( struct inode * inode , struct file * file )
{
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
mlog_entry ( " (0x%p, 0x%p, '%.*s') \n " , inode , file ,
2006-12-08 13:37:25 +03:00
file - > f_path . dentry - > d_name . len ,
file - > f_path . dentry - > d_name . name ) ;
2005-12-16 01:31:24 +03:00
spin_lock ( & oi - > ip_lock ) ;
if ( ! - - oi - > ip_open_count )
oi - > ip_flags & = ~ OCFS2_INODE_OPEN_DIRECT ;
spin_unlock ( & oi - > ip_lock ) ;
2007-12-21 03:49:04 +03:00
ocfs2_free_file_private ( inode , file ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( 0 ) ;
return 0 ;
}
2007-12-21 03:49:04 +03:00
static int ocfs2_dir_open ( struct inode * inode , struct file * file )
{
return ocfs2_init_file_private ( inode , file ) ;
}
static int ocfs2_dir_release ( struct inode * inode , struct file * file )
{
ocfs2_free_file_private ( inode , file ) ;
return 0 ;
}
2005-12-16 01:31:24 +03:00
static int ocfs2_sync_file ( struct file * file ,
struct dentry * dentry ,
int datasync )
{
int err = 0 ;
journal_t * journal ;
struct inode * inode = dentry - > d_inode ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
mlog_entry ( " (0x%p, 0x%p, %d, '%.*s') \n " , file , dentry , datasync ,
dentry - > d_name . len , dentry - > d_name . name ) ;
err = ocfs2_sync_inode ( dentry - > d_inode ) ;
if ( err )
goto bail ;
journal = osb - > journal - > j_journal ;
err = journal_force_commit ( journal ) ;
bail :
mlog_exit ( err ) ;
return ( err < 0 ) ? - EIO : 0 ;
}
2006-11-15 10:48:42 +03:00
int ocfs2_should_update_atime ( struct inode * inode ,
struct vfsmount * vfsmnt )
{
struct timespec now ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
if ( ocfs2_is_hard_readonly ( osb ) | | ocfs2_is_soft_readonly ( osb ) )
return 0 ;
if ( ( inode - > i_flags & S_NOATIME ) | |
( ( inode - > i_sb - > s_flags & MS_NODIRATIME ) & & S_ISDIR ( inode - > i_mode ) ) )
return 0 ;
2006-12-20 02:25:52 +03:00
/*
* We can be called with no vfsmnt structure - NFSD will
* sometimes do this .
*
* Note that our action here is different than touch_atime ( ) -
* if we can ' t tell whether this is a noatime mount , then we
* don ' t know whether to trust the value of s_atime_quantum .
*/
if ( vfsmnt = = NULL )
return 0 ;
2006-11-15 10:48:42 +03:00
if ( ( vfsmnt - > mnt_flags & MNT_NOATIME ) | |
( ( vfsmnt - > mnt_flags & MNT_NODIRATIME ) & & S_ISDIR ( inode - > i_mode ) ) )
return 0 ;
2006-12-13 11:34:35 +03:00
if ( vfsmnt - > mnt_flags & MNT_RELATIME ) {
if ( ( timespec_compare ( & inode - > i_atime , & inode - > i_mtime ) < = 0 ) | |
( timespec_compare ( & inode - > i_atime , & inode - > i_ctime ) < = 0 ) )
return 1 ;
return 0 ;
}
2006-11-15 10:48:42 +03:00
now = CURRENT_TIME ;
if ( ( now . tv_sec - inode - > i_atime . tv_sec < = osb - > s_atime_quantum ) )
return 0 ;
else
return 1 ;
}
int ocfs2_update_inode_atime ( struct inode * inode ,
struct buffer_head * bh )
{
int ret ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
handle_t * handle ;
2007-07-20 22:24:53 +04:00
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) bh - > b_data ;
2006-11-15 10:48:42 +03:00
mlog_entry_void ( ) ;
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
if ( handle = = NULL ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
2007-07-20 22:24:53 +04:00
ret = ocfs2_journal_access ( handle , inode , bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
/*
* Don ' t use ocfs2_mark_inode_dirty ( ) here as we don ' t always
* have i_mutex to guard against concurrent changes to other
* inode fields .
*/
2006-11-15 10:48:42 +03:00
inode - > i_atime = CURRENT_TIME ;
2007-07-20 22:24:53 +04:00
di - > i_atime = cpu_to_le64 ( inode - > i_atime . tv_sec ) ;
di - > i_atime_nsec = cpu_to_le32 ( inode - > i_atime . tv_nsec ) ;
ret = ocfs2_journal_dirty ( handle , bh ) ;
2006-11-15 10:48:42 +03:00
if ( ret < 0 )
mlog_errno ( ret ) ;
2007-07-20 22:24:53 +04:00
out_commit :
2006-11-15 10:48:42 +03:00
ocfs2_commit_trans ( OCFS2_SB ( inode - > i_sb ) , handle ) ;
out :
mlog_exit ( ret ) ;
return ret ;
}
2007-04-26 11:29:35 +04:00
static int ocfs2_set_inode_size ( handle_t * handle ,
struct inode * inode ,
struct buffer_head * fe_bh ,
u64 new_i_size )
2005-12-16 01:31:24 +03:00
{
int status ;
mlog_entry_void ( ) ;
i_size_write ( inode , new_i_size ) ;
2007-03-23 02:53:23 +03:00
inode - > i_blocks = ocfs2_inode_sector_count ( inode ) ;
2005-12-16 01:31:24 +03:00
inode - > i_ctime = inode - > i_mtime = CURRENT_TIME ;
status = ocfs2_mark_inode_dirty ( handle , inode , fe_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
bail :
mlog_exit ( status ) ;
return status ;
}
static int ocfs2_simple_size_update ( struct inode * inode ,
struct buffer_head * di_bh ,
u64 new_i_size )
{
int ret ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
2006-10-10 05:11:45 +04:00
handle_t * handle = NULL ;
2005-12-16 01:31:24 +03:00
2006-10-10 04:26:22 +04:00
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
2005-12-16 01:31:24 +03:00
if ( handle = = NULL ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_set_inode_size ( handle , inode , di_bh ,
new_i_size ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
out :
return ret ;
}
static int ocfs2_orphan_for_truncate ( struct ocfs2_super * osb ,
struct inode * inode ,
struct buffer_head * fe_bh ,
u64 new_i_size )
{
int status ;
2006-10-10 05:11:45 +04:00
handle_t * handle ;
2007-02-16 22:46:50 +03:00
struct ocfs2_dinode * di ;
2007-07-07 01:41:18 +04:00
u64 cluster_bytes ;
2005-12-16 01:31:24 +03:00
mlog_entry_void ( ) ;
/* TODO: This needs to actually orphan the inode in this
* transaction . */
2006-10-10 04:26:22 +04:00
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
mlog_errno ( status ) ;
goto out ;
}
2007-02-16 22:46:50 +03:00
status = ocfs2_journal_access ( handle , inode , fe_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto out_commit ;
}
/*
* Do this before setting i_size .
*/
2007-07-07 01:41:18 +04:00
cluster_bytes = ocfs2_align_bytes_to_clusters ( inode - > i_sb , new_i_size ) ;
status = ocfs2_zero_range_for_truncate ( inode , handle , new_i_size ,
cluster_bytes ) ;
2007-02-16 22:46:50 +03:00
if ( status ) {
mlog_errno ( status ) ;
goto out_commit ;
}
i_size_write ( inode , new_i_size ) ;
inode - > i_ctime = inode - > i_mtime = CURRENT_TIME ;
di = ( struct ocfs2_dinode * ) fe_bh - > b_data ;
di - > i_size = cpu_to_le64 ( new_i_size ) ;
di - > i_ctime = di - > i_mtime = cpu_to_le64 ( inode - > i_ctime . tv_sec ) ;
di - > i_ctime_nsec = di - > i_mtime_nsec = cpu_to_le32 ( inode - > i_ctime . tv_nsec ) ;
status = ocfs2_journal_dirty ( handle , fe_bh ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 )
mlog_errno ( status ) ;
2007-02-16 22:46:50 +03:00
out_commit :
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
out :
2007-02-16 22:46:50 +03:00
2005-12-16 01:31:24 +03:00
mlog_exit ( status ) ;
return status ;
}
static int ocfs2_truncate_file ( struct inode * inode ,
struct buffer_head * di_bh ,
u64 new_i_size )
{
int status = 0 ;
struct ocfs2_dinode * fe = NULL ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_truncate_context * tc = NULL ;
2006-03-03 21:24:33 +03:00
mlog_entry ( " (inode = %llu, new_i_size = %llu \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
( unsigned long long ) new_i_size ) ;
2005-12-16 01:31:24 +03:00
fe = ( struct ocfs2_dinode * ) di_bh - > b_data ;
if ( ! OCFS2_IS_VALID_DINODE ( fe ) ) {
OCFS2_RO_ON_INVALID_DINODE ( inode - > i_sb , fe ) ;
status = - EIO ;
goto bail ;
}
mlog_bug_on_msg ( le64_to_cpu ( fe - > i_size ) ! = i_size_read ( inode ) ,
2006-03-03 21:24:33 +03:00
" Inode %llu, inode i_size = %lld != di "
" i_size = %llu, i_flags = 0x%x \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
2005-12-16 01:31:24 +03:00
i_size_read ( inode ) ,
2006-03-03 21:24:33 +03:00
( unsigned long long ) le64_to_cpu ( fe - > i_size ) ,
le32_to_cpu ( fe - > i_flags ) ) ;
2005-12-16 01:31:24 +03:00
if ( new_i_size > le64_to_cpu ( fe - > i_size ) ) {
2006-03-03 21:24:33 +03:00
mlog ( 0 , " asked to truncate file with size (%llu) to size (%llu)! \n " ,
( unsigned long long ) le64_to_cpu ( fe - > i_size ) ,
( unsigned long long ) new_i_size ) ;
2005-12-16 01:31:24 +03:00
status = - EINVAL ;
mlog_errno ( status ) ;
goto bail ;
}
2006-03-03 21:24:33 +03:00
mlog ( 0 , " inode %llu, i_size = %llu, new_i_size = %llu \n " ,
( unsigned long long ) le64_to_cpu ( fe - > i_blkno ) ,
( unsigned long long ) le64_to_cpu ( fe - > i_size ) ,
( unsigned long long ) new_i_size ) ;
2005-12-16 01:31:24 +03:00
/* lets handle the simple truncate cases before doing any more
* cluster locking . */
if ( new_i_size = = le64_to_cpu ( fe - > i_size ) )
goto bail ;
2007-05-10 00:40:18 +04:00
down_write ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
2007-10-19 02:23:46 +04:00
/*
* The inode lock forced other nodes to sync and drop their
* pages , which ( correctly ) happens even if we have a truncate
* without allocation change - ocfs2 cluster sizes can be much
* greater than page size , so we have to truncate them
* anyway .
*/
2007-05-10 00:40:18 +04:00
unmap_mapping_range ( inode - > i_mapping , new_i_size + PAGE_SIZE - 1 , 0 , 1 ) ;
truncate_inode_pages ( inode - > i_mapping , new_i_size ) ;
2007-09-08 01:46:51 +04:00
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
status = ocfs2_truncate_inline ( inode , di_bh , new_i_size ,
2007-11-20 22:56:39 +03:00
i_size_read ( inode ) , 1 ) ;
2007-09-08 01:46:51 +04:00
if ( status )
mlog_errno ( status ) ;
2007-10-19 02:23:46 +04:00
goto bail_unlock_sem ;
2007-09-08 01:46:51 +04:00
}
2005-12-16 01:31:24 +03:00
/* alright, we're going to need to do a full blown alloc size
* change . Orphan the inode so that recovery can complete the
* truncate if necessary . This does the task of marking
* i_size . */
status = ocfs2_orphan_for_truncate ( osb , inode , di_bh , new_i_size ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
2007-10-19 02:23:46 +04:00
goto bail_unlock_sem ;
2005-12-16 01:31:24 +03:00
}
status = ocfs2_prepare_truncate ( osb , inode , di_bh , & tc ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
2007-10-19 02:23:46 +04:00
goto bail_unlock_sem ;
2005-12-16 01:31:24 +03:00
}
status = ocfs2_commit_truncate ( osb , inode , di_bh , tc ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
2007-10-19 02:23:46 +04:00
goto bail_unlock_sem ;
2005-12-16 01:31:24 +03:00
}
/* TODO: orphan dir cleanup here. */
2007-10-19 02:23:46 +04:00
bail_unlock_sem :
2007-05-10 00:40:18 +04:00
up_write ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
2005-12-16 01:31:24 +03:00
bail :
mlog_exit ( status ) ;
return status ;
}
/*
* extend allocation only here .
* we ' ll update all the disk stuff , and oip - > alloc_size
*
* expect stuff to be locked , a transaction started and enough data /
* metadata reservations in the contexts .
*
* Will return - EAGAIN , and a reason if a restart is needed .
* If passed in , * reason will always be set , even in error .
*/
int ocfs2_do_extend_allocation ( struct ocfs2_super * osb ,
struct inode * inode ,
2007-01-16 22:32:23 +03:00
u32 * logical_offset ,
2005-12-16 01:31:24 +03:00
u32 clusters_to_add ,
2007-03-10 03:43:28 +03:00
int mark_unwritten ,
2005-12-16 01:31:24 +03:00
struct buffer_head * fe_bh ,
2006-10-10 05:11:45 +04:00
handle_t * handle ,
2005-12-16 01:31:24 +03:00
struct ocfs2_alloc_context * data_ac ,
struct ocfs2_alloc_context * meta_ac ,
enum ocfs2_alloc_restarted * reason_ret )
{
int status = 0 ;
int free_extents ;
struct ocfs2_dinode * fe = ( struct ocfs2_dinode * ) fe_bh - > b_data ;
enum ocfs2_alloc_restarted reason = RESTART_NONE ;
u32 bit_off , num_bits ;
u64 block ;
2007-03-10 03:43:28 +03:00
u8 flags = 0 ;
2005-12-16 01:31:24 +03:00
BUG_ON ( ! clusters_to_add ) ;
2007-03-10 03:43:28 +03:00
if ( mark_unwritten )
flags = OCFS2_EXT_UNWRITTEN ;
2008-08-18 13:38:42 +04:00
free_extents = ocfs2_num_free_extents ( osb , inode , fe_bh ) ;
2005-12-16 01:31:24 +03:00
if ( free_extents < 0 ) {
status = free_extents ;
mlog_errno ( status ) ;
goto leave ;
}
/* there are two cases which could cause us to EAGAIN in the
* we - need - more - metadata case :
* 1 ) we haven ' t reserved * any *
* 2 ) we are so fragmented , we ' ve needed to add metadata too
* many times . */
if ( ! free_extents & & ! meta_ac ) {
mlog ( 0 , " we haven't reserved any metadata! \n " ) ;
status = - EAGAIN ;
reason = RESTART_META ;
goto leave ;
} else if ( ( ! free_extents )
& & ( ocfs2_alloc_context_bits_left ( meta_ac )
2008-08-18 13:38:43 +04:00
< ocfs2_extend_meta_needed ( & fe - > id2 . i_list ) ) ) {
2005-12-16 01:31:24 +03:00
mlog ( 0 , " filesystem is really fragmented... \n " ) ;
status = - EAGAIN ;
reason = RESTART_META ;
goto leave ;
}
2007-09-17 07:10:16 +04:00
status = __ocfs2_claim_clusters ( osb , handle , data_ac , 1 ,
clusters_to_add , & bit_off , & num_bits ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto leave ;
}
BUG_ON ( num_bits > clusters_to_add ) ;
/* reserve our write early -- insert_extent may update the inode */
status = ocfs2_journal_access ( handle , inode , fe_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
block = ocfs2_clusters_to_blocks ( osb - > sb , bit_off ) ;
2006-03-03 21:24:33 +03:00
mlog ( 0 , " Allocating %u clusters at block %u for inode %llu \n " ,
num_bits , bit_off , ( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2007-01-16 22:32:23 +03:00
status = ocfs2_insert_extent ( osb , handle , inode , fe_bh ,
* logical_offset , block , num_bits ,
2007-03-10 03:43:28 +03:00
flags , meta_ac ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
status = ocfs2_journal_dirty ( handle , fe_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
clusters_to_add - = num_bits ;
2007-01-16 22:32:23 +03:00
* logical_offset + = num_bits ;
2005-12-16 01:31:24 +03:00
if ( clusters_to_add ) {
mlog ( 0 , " need to alloc once more, clusters = %u, wanted = "
" %u \n " , fe - > i_clusters , clusters_to_add ) ;
status = - EAGAIN ;
reason = RESTART_TRANS ;
}
leave :
mlog_exit ( status ) ;
if ( reason_ret )
* reason_ret = reason ;
return status ;
}
2007-01-18 00:07:24 +03:00
/*
* For a given allocation , determine which allocators will need to be
* accessed , and lock them , reserving the appropriate number of bits .
*
2007-03-10 03:43:28 +03:00
* Sparse file systems call this from ocfs2_write_begin_nolock ( )
* and ocfs2_allocate_unwritten_extents ( ) .
*
* File systems which don ' t support holes call this from
* ocfs2_extend_allocation ( ) .
2007-01-18 00:07:24 +03:00
*/
2008-08-18 13:38:42 +04:00
int ocfs2_lock_allocators ( struct inode * inode , struct buffer_head * di_bh ,
2007-06-18 22:22:56 +04:00
u32 clusters_to_add , u32 extents_to_split ,
2007-02-10 07:24:12 +03:00
struct ocfs2_alloc_context * * data_ac ,
struct ocfs2_alloc_context * * meta_ac )
2007-01-18 00:07:24 +03:00
{
2007-07-04 00:34:11 +04:00
int ret = 0 , num_free_extents ;
2007-06-18 22:22:56 +04:00
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split ;
2007-01-18 00:07:24 +03:00
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
2008-08-18 13:38:42 +04:00
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
2007-01-18 00:07:24 +03:00
* meta_ac = NULL ;
2007-07-04 00:34:11 +04:00
if ( data_ac )
* data_ac = NULL ;
BUG_ON ( clusters_to_add ! = 0 & & data_ac = = NULL ) ;
2007-01-18 00:07:24 +03:00
mlog ( 0 , " extend inode %llu, i_size = %lld, di->i_clusters = %u, "
2007-06-18 22:22:56 +04:00
" clusters_to_add = %u, extents_to_split = %u \n " ,
2007-12-19 17:25:42 +03:00
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno , ( long long ) i_size_read ( inode ) ,
2007-06-18 22:22:56 +04:00
le32_to_cpu ( di - > i_clusters ) , clusters_to_add , extents_to_split ) ;
2007-01-18 00:07:24 +03:00
2008-08-18 13:38:42 +04:00
num_free_extents = ocfs2_num_free_extents ( osb , inode , di_bh ) ;
2007-01-18 00:07:24 +03:00
if ( num_free_extents < 0 ) {
ret = num_free_extents ;
mlog_errno ( ret ) ;
goto out ;
}
/*
* Sparse allocation file systems need to be more conservative
* with reserving room for expansion - the actual allocation
* happens while we ' ve got a journal handle open so re - taking
* a cluster lock ( because we ran out of room for another
* extent ) will violate ordering rules .
*
2007-02-10 07:24:12 +03:00
* Most of the time we ' ll only be seeing this 1 cluster at a time
2007-01-18 00:07:24 +03:00
* anyway .
2007-06-18 22:22:56 +04:00
*
* Always lock for any unwritten extents - we might want to
* add blocks during a split .
2007-01-18 00:07:24 +03:00
*/
if ( ! num_free_extents | |
2007-06-18 22:22:56 +04:00
( ocfs2_sparse_alloc ( osb ) & & num_free_extents < max_recs_needed ) ) {
2008-08-18 13:38:43 +04:00
ret = ocfs2_reserve_new_metadata ( osb , & di - > id2 . i_list , meta_ac ) ;
2007-01-18 00:07:24 +03:00
if ( ret < 0 ) {
if ( ret ! = - ENOSPC )
mlog_errno ( ret ) ;
goto out ;
}
}
2007-07-04 00:34:11 +04:00
if ( clusters_to_add = = 0 )
goto out ;
2007-01-18 00:07:24 +03:00
ret = ocfs2_reserve_clusters ( osb , clusters_to_add , data_ac ) ;
if ( ret < 0 ) {
if ( ret ! = - ENOSPC )
mlog_errno ( ret ) ;
goto out ;
}
out :
if ( ret ) {
if ( * meta_ac ) {
ocfs2_free_alloc_context ( * meta_ac ) ;
* meta_ac = NULL ;
}
/*
* We cannot have an error and a non null * data_ac .
*/
}
return ret ;
}
2007-03-10 03:43:28 +03:00
static int __ocfs2_extend_allocation ( struct inode * inode , u32 logical_start ,
u32 clusters_to_add , int mark_unwritten )
2005-12-16 01:31:24 +03:00
{
int status = 0 ;
int restart_func = 0 ;
2007-01-18 00:07:24 +03:00
int credits ;
2007-03-10 03:43:28 +03:00
u32 prev_clusters ;
2005-12-16 01:31:24 +03:00
struct buffer_head * bh = NULL ;
struct ocfs2_dinode * fe = NULL ;
2006-10-10 05:11:45 +04:00
handle_t * handle = NULL ;
2005-12-16 01:31:24 +03:00
struct ocfs2_alloc_context * data_ac = NULL ;
struct ocfs2_alloc_context * meta_ac = NULL ;
enum ocfs2_alloc_restarted why ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
mlog_entry ( " (clusters_to_add = %u) \n " , clusters_to_add ) ;
2007-01-16 22:32:23 +03:00
/*
* This function only exists for file systems which don ' t
* support holes .
*/
2007-03-10 03:43:28 +03:00
BUG_ON ( mark_unwritten & & ! ocfs2_sparse_alloc ( osb ) ) ;
2007-01-16 22:32:23 +03:00
2005-12-16 01:31:24 +03:00
status = ocfs2_read_block ( osb , OCFS2_I ( inode ) - > ip_blkno , & bh ,
OCFS2_BH_CACHED , inode ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
fe = ( struct ocfs2_dinode * ) bh - > b_data ;
if ( ! OCFS2_IS_VALID_DINODE ( fe ) ) {
OCFS2_RO_ON_INVALID_DINODE ( inode - > i_sb , fe ) ;
status = - EIO ;
goto leave ;
}
restart_all :
BUG_ON ( le32_to_cpu ( fe - > i_clusters ) ! = OCFS2_I ( inode ) - > ip_clusters ) ;
2008-08-18 13:38:42 +04:00
status = ocfs2_lock_allocators ( inode , bh , clusters_to_add , 0 , & data_ac ,
2007-02-10 07:24:12 +03:00
& meta_ac ) ;
if ( status ) {
mlog_errno ( status ) ;
goto leave ;
}
2008-08-18 13:38:43 +04:00
credits = ocfs2_calc_extend_credits ( osb - > sb , & fe - > id2 . i_list ,
clusters_to_add ) ;
2006-10-10 04:26:22 +04:00
handle = ocfs2_start_trans ( osb , credits ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
handle = NULL ;
mlog_errno ( status ) ;
goto leave ;
}
restarted_transaction :
/* reserve a write to the file entry early on - that we if we
* run out of credits in the allocation path , we can still
* update i_size . */
status = ocfs2_journal_access ( handle , inode , bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
prev_clusters = OCFS2_I ( inode ) - > ip_clusters ;
status = ocfs2_do_extend_allocation ( osb ,
inode ,
2007-01-16 22:32:23 +03:00
& logical_start ,
2005-12-16 01:31:24 +03:00
clusters_to_add ,
2007-03-10 03:43:28 +03:00
mark_unwritten ,
2005-12-16 01:31:24 +03:00
bh ,
handle ,
data_ac ,
meta_ac ,
& why ) ;
if ( ( status < 0 ) & & ( status ! = - EAGAIN ) ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto leave ;
}
status = ocfs2_journal_dirty ( handle , bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
spin_lock ( & OCFS2_I ( inode ) - > ip_lock ) ;
clusters_to_add - = ( OCFS2_I ( inode ) - > ip_clusters - prev_clusters ) ;
spin_unlock ( & OCFS2_I ( inode ) - > ip_lock ) ;
if ( why ! = RESTART_NONE & & clusters_to_add ) {
if ( why = = RESTART_META ) {
mlog ( 0 , " restarting function. \n " ) ;
restart_func = 1 ;
} else {
BUG_ON ( why ! = RESTART_TRANS ) ;
mlog ( 0 , " restarting transaction. \n " ) ;
/* TODO: This can be more intelligent. */
credits = ocfs2_calc_extend_credits ( osb - > sb ,
2008-08-18 13:38:43 +04:00
& fe - > id2 . i_list ,
2005-12-16 01:31:24 +03:00
clusters_to_add ) ;
2006-10-10 05:11:45 +04:00
status = ocfs2_extend_trans ( handle , credits ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
/* handle still has to be committed at
* this point . */
status = - ENOMEM ;
mlog_errno ( status ) ;
goto leave ;
}
goto restarted_transaction ;
}
}
2006-03-03 21:24:33 +03:00
mlog ( 0 , " fe: i_clusters = %u, i_size=%llu \n " ,
2007-04-28 03:01:25 +04:00
le32_to_cpu ( fe - > i_clusters ) ,
( unsigned long long ) le64_to_cpu ( fe - > i_size ) ) ;
2005-12-16 01:31:24 +03:00
mlog ( 0 , " inode: ip_clusters=%u, i_size=%lld \n " ,
2007-12-19 17:25:42 +03:00
OCFS2_I ( inode ) - > ip_clusters , ( long long ) i_size_read ( inode ) ) ;
2005-12-16 01:31:24 +03:00
leave :
if ( handle ) {
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
handle = NULL ;
}
if ( data_ac ) {
ocfs2_free_alloc_context ( data_ac ) ;
data_ac = NULL ;
}
if ( meta_ac ) {
ocfs2_free_alloc_context ( meta_ac ) ;
meta_ac = NULL ;
}
if ( ( ! status ) & & restart_func ) {
restart_func = 0 ;
goto restart_all ;
}
if ( bh ) {
brelse ( bh ) ;
bh = NULL ;
}
mlog_exit ( status ) ;
return status ;
}
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
2006-05-06 06:04:03 +04:00
* worry about recursive locking in - > prepare_write ( ) and
* - > commit_write ( ) . */
2005-12-16 01:31:24 +03:00
static int ocfs2_write_zero_page ( struct inode * inode ,
u64 size )
{
struct address_space * mapping = inode - > i_mapping ;
struct page * page ;
unsigned long index ;
unsigned int offset ;
2006-10-10 05:11:45 +04:00
handle_t * handle = NULL ;
2005-12-16 01:31:24 +03:00
int ret ;
offset = ( size & ( PAGE_CACHE_SIZE - 1 ) ) ; /* Within page */
/* ugh. in prepare/commit_write, if from==to==start of block, we
* * skip the prepare . make sure we never send an offset for the start
* * of a block
*/
if ( ( offset & ( inode - > i_sb - > s_blocksize - 1 ) ) = = 0 ) {
offset + + ;
}
index = size > > PAGE_CACHE_SHIFT ;
page = grab_cache_page ( mapping , index ) ;
if ( ! page ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
2006-05-06 06:04:03 +04:00
ret = ocfs2_prepare_write_nolock ( inode , page , offset , offset ) ;
2005-12-16 01:31:24 +03:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out_unlock ;
}
if ( ocfs2_should_order_data ( inode ) ) {
handle = ocfs2_start_walk_page_trans ( inode , page , offset ,
offset ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
handle = NULL ;
goto out_unlock ;
}
}
/* must not update i_size! */
ret = block_commit_write ( page , offset , offset ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
else
ret = 0 ;
if ( handle )
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( OCFS2_SB ( inode - > i_sb ) , handle ) ;
2005-12-16 01:31:24 +03:00
out_unlock :
unlock_page ( page ) ;
page_cache_release ( page ) ;
out :
return ret ;
}
static int ocfs2_zero_extend ( struct inode * inode ,
u64 zero_to_size )
{
int ret = 0 ;
u64 start_off ;
struct super_block * sb = inode - > i_sb ;
start_off = ocfs2_align_bytes_to_blocks ( sb , i_size_read ( inode ) ) ;
while ( start_off < zero_to_size ) {
ret = ocfs2_write_zero_page ( inode , start_off ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
start_off + = sb - > s_blocksize ;
2006-10-04 04:53:05 +04:00
/*
* Very large extends have the potential to lock up
* the cpu for extended periods of time .
*/
cond_resched ( ) ;
2005-12-16 01:31:24 +03:00
}
out :
return ret ;
}
2007-08-29 04:13:23 +04:00
int ocfs2_extend_no_holes ( struct inode * inode , u64 new_i_size , u64 zero_to )
{
int ret ;
u32 clusters_to_add ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
clusters_to_add = ocfs2_clusters_for_bytes ( inode - > i_sb , new_i_size ) ;
if ( clusters_to_add < oi - > ip_clusters )
clusters_to_add = 0 ;
else
clusters_to_add - = oi - > ip_clusters ;
if ( clusters_to_add ) {
ret = __ocfs2_extend_allocation ( inode , oi - > ip_clusters ,
clusters_to_add , 0 ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
/*
* Call this even if we don ' t add any clusters to the tree . We
* still need to zero the area between the old i_size and the
* new i_size .
*/
ret = ocfs2_zero_extend ( inode , zero_to ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
out :
return ret ;
}
2005-12-16 01:31:24 +03:00
static int ocfs2_extend_file ( struct inode * inode ,
struct buffer_head * di_bh ,
2007-08-29 04:13:23 +04:00
u64 new_i_size )
2005-12-16 01:31:24 +03:00
{
2007-10-19 02:23:46 +04:00
int ret = 0 ;
2007-09-08 01:46:51 +04:00
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
2005-12-16 01:31:24 +03:00
2007-08-29 04:13:23 +04:00
BUG_ON ( ! di_bh ) ;
2006-05-06 06:04:03 +04:00
2005-12-16 01:31:24 +03:00
/* setattr sometimes calls us like this. */
if ( new_i_size = = 0 )
goto out ;
if ( i_size_read ( inode ) = = new_i_size )
goto out ;
BUG_ON ( new_i_size < i_size_read ( inode ) ) ;
2007-09-08 01:46:51 +04:00
/*
* Fall through for converting inline data , even if the fs
* supports sparse files .
*
* The check for inline data here is legal - nobody can add
* the feature since we have i_mutex . We must check it again
* after acquiring ip_alloc_sem though , as paths like mmap
* might have raced us to converting the inode to extents .
*/
if ( ! ( oi - > ip_dyn_features & OCFS2_INLINE_DATA_FL )
& & ocfs2_sparse_alloc ( OCFS2_SB ( inode - > i_sb ) ) )
2007-01-17 23:53:31 +03:00
goto out_update_size ;
2005-12-16 01:31:24 +03:00
2006-10-04 04:44:42 +04:00
/*
2007-08-29 04:13:23 +04:00
* The alloc sem blocks people in read / write from reading our
* allocation until we ' re done changing it . We depend on
* i_mutex to block other extend / truncate calls while we ' re
* here .
2006-10-04 04:44:42 +04:00
*/
2007-09-08 01:46:51 +04:00
down_write ( & oi - > ip_alloc_sem ) ;
if ( oi - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
/*
* We can optimize small extends by keeping the inodes
* inline data .
*/
if ( ocfs2_size_fits_inline_data ( di_bh , new_i_size ) ) {
up_write ( & oi - > ip_alloc_sem ) ;
goto out_update_size ;
}
ret = ocfs2_convert_inline_data_to_extents ( inode , di_bh ) ;
if ( ret ) {
up_write ( & oi - > ip_alloc_sem ) ;
mlog_errno ( ret ) ;
2007-10-19 02:23:46 +04:00
goto out ;
2007-09-08 01:46:51 +04:00
}
}
if ( ! ocfs2_sparse_alloc ( OCFS2_SB ( inode - > i_sb ) ) )
ret = ocfs2_extend_no_holes ( inode , new_i_size , new_i_size ) ;
up_write ( & oi - > ip_alloc_sem ) ;
2007-08-29 04:13:23 +04:00
2006-10-04 04:44:42 +04:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
2007-10-19 02:23:46 +04:00
goto out ;
2006-05-06 06:04:03 +04:00
}
2007-01-17 23:53:31 +03:00
out_update_size :
2007-08-29 04:13:23 +04:00
ret = ocfs2_simple_size_update ( inode , di_bh , new_i_size ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
2005-12-16 01:31:24 +03:00
out :
return ret ;
}
int ocfs2_setattr ( struct dentry * dentry , struct iattr * attr )
{
int status = 0 , size_change ;
struct inode * inode = dentry - > d_inode ;
struct super_block * sb = inode - > i_sb ;
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
struct buffer_head * bh = NULL ;
2006-10-10 05:11:45 +04:00
handle_t * handle = NULL ;
2005-12-16 01:31:24 +03:00
mlog_entry ( " (0x%p, '%.*s') \n " , dentry ,
dentry - > d_name . len , dentry - > d_name . name ) ;
2008-04-18 21:23:53 +04:00
/* ensuring we don't even attempt to truncate a symlink */
if ( S_ISLNK ( inode - > i_mode ) )
attr - > ia_valid & = ~ ATTR_SIZE ;
2005-12-16 01:31:24 +03:00
if ( attr - > ia_valid & ATTR_MODE )
mlog ( 0 , " mode change: %d \n " , attr - > ia_mode ) ;
if ( attr - > ia_valid & ATTR_UID )
mlog ( 0 , " uid change: %d \n " , attr - > ia_uid ) ;
if ( attr - > ia_valid & ATTR_GID )
mlog ( 0 , " gid change: %d \n " , attr - > ia_gid ) ;
if ( attr - > ia_valid & ATTR_SIZE )
mlog ( 0 , " size change... \n " ) ;
if ( attr - > ia_valid & ( ATTR_ATIME | ATTR_MTIME | ATTR_CTIME ) )
mlog ( 0 , " time change... \n " ) ;
# define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
| ATTR_GID | ATTR_UID | ATTR_MODE )
if ( ! ( attr - > ia_valid & OCFS2_VALID_ATTRS ) ) {
mlog ( 0 , " can't handle attrs: 0x%x \n " , attr - > ia_valid ) ;
return 0 ;
}
status = inode_change_ok ( inode , attr ) ;
if ( status )
return status ;
size_change = S_ISREG ( inode - > i_mode ) & & attr - > ia_valid & ATTR_SIZE ;
if ( size_change ) {
status = ocfs2_rw_lock ( inode , 1 ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
}
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( inode , & bh , 1 ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOENT )
mlog_errno ( status ) ;
goto bail_unlock_rw ;
}
if ( size_change & & attr - > ia_size ! = i_size_read ( inode ) ) {
2007-07-20 23:02:14 +04:00
if ( attr - > ia_size > sb - > s_maxbytes ) {
status = - EFBIG ;
goto bail_unlock ;
}
2005-12-16 01:31:24 +03:00
if ( i_size_read ( inode ) > attr - > ia_size )
status = ocfs2_truncate_file ( inode , bh , attr - > ia_size ) ;
else
2007-08-29 04:13:23 +04:00
status = ocfs2_extend_file ( inode , bh , attr - > ia_size ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
status = - ENOSPC ;
goto bail_unlock ;
}
}
2006-10-10 04:26:22 +04:00
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
mlog_errno ( status ) ;
goto bail_unlock ;
}
2007-05-10 02:16:19 +04:00
/*
* This will intentionally not wind up calling vmtruncate ( ) ,
* since all the work for a size change has been done above .
* Otherwise , we could get into problems with truncate as
* ip_alloc_sem is used there to protect against i_size
* changes .
*/
2005-12-16 01:31:24 +03:00
status = inode_setattr ( inode , attr ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail_commit ;
}
status = ocfs2_mark_inode_dirty ( handle , inode , bh ) ;
if ( status < 0 )
mlog_errno ( status ) ;
bail_commit :
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
bail_unlock :
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 1 ) ;
2005-12-16 01:31:24 +03:00
bail_unlock_rw :
if ( size_change )
ocfs2_rw_unlock ( inode , 1 ) ;
bail :
if ( bh )
brelse ( bh ) ;
mlog_exit ( status ) ;
return status ;
}
int ocfs2_getattr ( struct vfsmount * mnt ,
struct dentry * dentry ,
struct kstat * stat )
{
struct inode * inode = dentry - > d_inode ;
struct super_block * sb = dentry - > d_inode - > i_sb ;
struct ocfs2_super * osb = sb - > s_fs_info ;
int err ;
mlog_entry_void ( ) ;
err = ocfs2_inode_revalidate ( dentry ) ;
if ( err ) {
if ( err ! = - ENOENT )
mlog_errno ( err ) ;
goto bail ;
}
generic_fillattr ( inode , stat ) ;
/* We set the blksize from the cluster size for performance */
stat - > blksize = osb - > s_clustersize ;
bail :
mlog_exit ( err ) ;
return err ;
}
2008-07-16 05:03:57 +04:00
int ocfs2_permission ( struct inode * inode , int mask )
2006-11-27 04:59:21 +03:00
{
int ret ;
mlog_entry_void ( ) ;
2007-10-19 02:30:42 +04:00
ret = ocfs2_inode_lock ( inode , NULL , 0 ) ;
2006-11-27 04:59:21 +03:00
if ( ret ) {
2007-04-26 22:43:43 +04:00
if ( ret ! = - ENOENT )
mlog_errno ( ret ) ;
2006-11-27 04:59:21 +03:00
goto out ;
}
ret = generic_permission ( inode , mask , NULL ) ;
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 0 ) ;
2006-11-27 04:59:21 +03:00
out :
mlog_exit ( ret ) ;
return ret ;
}
2007-03-10 03:53:21 +03:00
static int __ocfs2_write_remove_suid ( struct inode * inode ,
struct buffer_head * bh )
2005-12-16 01:31:24 +03:00
{
int ret ;
2006-10-10 05:11:45 +04:00
handle_t * handle ;
2005-12-16 01:31:24 +03:00
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_dinode * di ;
2006-03-03 21:24:33 +03:00
mlog_entry ( " (Inode %llu, mode 0%o) \n " ,
2007-03-10 03:53:21 +03:00
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno , inode - > i_mode ) ;
2005-12-16 01:31:24 +03:00
2006-10-10 04:26:22 +04:00
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
2005-12-16 01:31:24 +03:00
if ( handle = = NULL ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access ( handle , inode , bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
2007-03-10 03:53:21 +03:00
goto out_trans ;
2005-12-16 01:31:24 +03:00
}
inode - > i_mode & = ~ S_ISUID ;
if ( ( inode - > i_mode & S_ISGID ) & & ( inode - > i_mode & S_IXGRP ) )
inode - > i_mode & = ~ S_ISGID ;
di = ( struct ocfs2_dinode * ) bh - > b_data ;
di - > i_mode = cpu_to_le16 ( inode - > i_mode ) ;
ret = ocfs2_journal_dirty ( handle , bh ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
2007-03-10 03:53:21 +03:00
2005-12-16 01:31:24 +03:00
out_trans :
2006-10-10 03:48:10 +04:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-16 01:31:24 +03:00
out :
mlog_exit ( ret ) ;
return ret ;
}
2007-02-10 07:24:12 +03:00
/*
* Will look for holes and unwritten extents in the range starting at
* pos for count bytes ( inclusive ) .
*/
static int ocfs2_check_range_for_holes ( struct inode * inode , loff_t pos ,
size_t count )
{
int ret = 0 ;
2007-03-10 03:21:46 +03:00
unsigned int extent_flags ;
2007-02-10 07:24:12 +03:00
u32 cpos , clusters , extent_len , phys_cpos ;
struct super_block * sb = inode - > i_sb ;
cpos = pos > > OCFS2_SB ( sb ) - > s_clustersize_bits ;
clusters = ocfs2_clusters_for_bytes ( sb , pos + count ) - cpos ;
while ( clusters ) {
2007-03-10 03:21:46 +03:00
ret = ocfs2_get_clusters ( inode , cpos , & phys_cpos , & extent_len ,
& extent_flags ) ;
2007-02-10 07:24:12 +03:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
2007-03-10 03:21:46 +03:00
if ( phys_cpos = = 0 | | ( extent_flags & OCFS2_EXT_UNWRITTEN ) ) {
2007-02-10 07:24:12 +03:00
ret = 1 ;
break ;
}
if ( extent_len > clusters )
extent_len = clusters ;
clusters - = extent_len ;
cpos + = extent_len ;
}
out :
return ret ;
}
2007-03-10 03:53:21 +03:00
static int ocfs2_write_remove_suid ( struct inode * inode )
{
int ret ;
struct buffer_head * bh = NULL ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
ret = ocfs2_read_block ( OCFS2_SB ( inode - > i_sb ) ,
oi - > ip_blkno , & bh , OCFS2_BH_CACHED , inode ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = __ocfs2_write_remove_suid ( inode , bh ) ;
out :
brelse ( bh ) ;
return ret ;
}
2007-03-10 03:43:28 +03:00
/*
* Allocate enough extents to cover the region starting at byte offset
* start for len bytes . Existing extents are skipped , any extents
* added are marked as " unwritten " .
*/
static int ocfs2_allocate_unwritten_extents ( struct inode * inode ,
u64 start , u64 len )
{
int ret ;
u32 cpos , phys_cpos , clusters , alloc_size ;
2007-09-08 01:46:51 +04:00
u64 end = start + len ;
struct buffer_head * di_bh = NULL ;
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
ret = ocfs2_read_block ( OCFS2_SB ( inode - > i_sb ) ,
OCFS2_I ( inode ) - > ip_blkno , & di_bh ,
OCFS2_BH_CACHED , inode ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/*
* Nothing to do if the requested reservation range
* fits within the inode .
*/
if ( ocfs2_size_fits_inline_data ( di_bh , end ) )
goto out ;
ret = ocfs2_convert_inline_data_to_extents ( inode , di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
2007-03-10 03:43:28 +03:00
/*
* We consider both start and len to be inclusive .
*/
cpos = start > > OCFS2_SB ( inode - > i_sb ) - > s_clustersize_bits ;
clusters = ocfs2_clusters_for_bytes ( inode - > i_sb , start + len ) ;
clusters - = cpos ;
while ( clusters ) {
ret = ocfs2_get_clusters ( inode , cpos , & phys_cpos ,
& alloc_size , NULL ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/*
* Hole or existing extent len can be arbitrary , so
* cap it to our own allocation request .
*/
if ( alloc_size > clusters )
alloc_size = clusters ;
if ( phys_cpos ) {
/*
* We already have an allocation at this
* region so we can safely skip it .
*/
goto next ;
}
ret = __ocfs2_extend_allocation ( inode , cpos , alloc_size , 1 ) ;
if ( ret ) {
if ( ret ! = - ENOSPC )
mlog_errno ( ret ) ;
goto out ;
}
next :
cpos + = alloc_size ;
clusters - = alloc_size ;
}
ret = 0 ;
out :
2007-09-08 01:46:51 +04:00
brelse ( di_bh ) ;
2007-03-10 03:43:28 +03:00
return ret ;
}
2007-07-04 00:34:11 +04:00
static int __ocfs2_remove_inode_range ( struct inode * inode ,
struct buffer_head * di_bh ,
u32 cpos , u32 phys_cpos , u32 len ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret ;
u64 phys_blkno = ocfs2_clusters_to_blocks ( inode - > i_sb , phys_cpos ) ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct inode * tl_inode = osb - > osb_tl_inode ;
handle_t * handle ;
struct ocfs2_alloc_context * meta_ac = NULL ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
2008-08-18 13:38:42 +04:00
ret = ocfs2_lock_allocators ( inode , di_bh , 0 , 1 , NULL , & meta_ac ) ;
2007-07-04 00:34:11 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
return ret ;
}
mutex_lock ( & tl_inode - > i_mutex ) ;
if ( ocfs2_truncate_log_needs_flush ( osb ) ) {
ret = __ocfs2_flush_truncate_log ( osb ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
}
handle = ocfs2_start_trans ( osb , OCFS2_REMOVE_EXTENT_CREDITS ) ;
if ( handle = = NULL ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access ( handle , inode , di_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_remove_extent ( inode , di_bh , cpos , len , handle , meta_ac ,
dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
OCFS2_I ( inode ) - > ip_clusters - = len ;
di - > i_clusters = cpu_to_le32 ( OCFS2_I ( inode ) - > ip_clusters ) ;
ret = ocfs2_journal_dirty ( handle , di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
ret = ocfs2_truncate_log_append ( osb , handle , phys_blkno , len ) ;
if ( ret )
mlog_errno ( ret ) ;
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
mutex_unlock ( & tl_inode - > i_mutex ) ;
if ( meta_ac )
ocfs2_free_alloc_context ( meta_ac ) ;
return ret ;
}
/*
* Truncate a byte range , avoiding pages within partial clusters . This
* preserves those pages for the zeroing code to write to .
*/
static void ocfs2_truncate_cluster_pages ( struct inode * inode , u64 byte_start ,
u64 byte_len )
{
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
loff_t start , end ;
struct address_space * mapping = inode - > i_mapping ;
start = ( loff_t ) ocfs2_align_bytes_to_clusters ( inode - > i_sb , byte_start ) ;
end = byte_start + byte_len ;
end = end & ~ ( osb - > s_clustersize - 1 ) ;
if ( start < end ) {
unmap_mapping_range ( mapping , start , end - start , 0 ) ;
truncate_inode_pages_range ( mapping , start , end - 1 ) ;
}
}
static int ocfs2_zero_partial_clusters ( struct inode * inode ,
u64 start , u64 len )
{
int ret = 0 ;
u64 tmpend , end = start + len ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
unsigned int csize = osb - > s_clustersize ;
handle_t * handle ;
/*
* The " start " and " end " values are NOT necessarily part of
* the range whose allocation is being deleted . Rather , this
* is what the user passed in with the request . We must zero
* partial clusters here . There ' s no need to worry about
* physical allocation - the zeroing code knows to skip holes .
*/
mlog ( 0 , " byte start: %llu, end: %llu \n " ,
( unsigned long long ) start , ( unsigned long long ) end ) ;
/*
* If both edges are on a cluster boundary then there ' s no
* zeroing required as the region is part of the allocation to
* be truncated .
*/
if ( ( start & ( csize - 1 ) ) = = 0 & & ( end & ( csize - 1 ) ) = = 0 )
goto out ;
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
if ( handle = = NULL ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
/*
* We want to get the byte offset of the end of the 1 st cluster .
*/
tmpend = ( u64 ) osb - > s_clustersize + ( start & ~ ( osb - > s_clustersize - 1 ) ) ;
if ( tmpend > end )
tmpend = end ;
mlog ( 0 , " 1st range: start: %llu, tmpend: %llu \n " ,
( unsigned long long ) start , ( unsigned long long ) tmpend ) ;
ret = ocfs2_zero_range_for_truncate ( inode , handle , start , tmpend ) ;
if ( ret )
mlog_errno ( ret ) ;
if ( tmpend < end ) {
/*
* This may make start and end equal , but the zeroing
* code will skip any work in that case so there ' s no
* need to catch it up here .
*/
start = end & ~ ( osb - > s_clustersize - 1 ) ;
mlog ( 0 , " 2nd range: start: %llu, end: %llu \n " ,
( unsigned long long ) start , ( unsigned long long ) end ) ;
ret = ocfs2_zero_range_for_truncate ( inode , handle , start , end ) ;
if ( ret )
mlog_errno ( ret ) ;
}
ocfs2_commit_trans ( osb , handle ) ;
out :
return ret ;
}
static int ocfs2_remove_inode_range ( struct inode * inode ,
struct buffer_head * di_bh , u64 byte_start ,
u64 byte_len )
{
int ret = 0 ;
u32 trunc_start , trunc_len , cpos , phys_cpos , alloc_size ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_cached_dealloc_ctxt dealloc ;
2007-11-20 22:56:39 +03:00
struct address_space * mapping = inode - > i_mapping ;
2007-07-04 00:34:11 +04:00
ocfs2_init_dealloc_ctxt ( & dealloc ) ;
if ( byte_len = = 0 )
return 0 ;
2007-09-08 01:46:51 +04:00
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
ret = ocfs2_truncate_inline ( inode , di_bh , byte_start ,
2007-11-20 22:56:39 +03:00
byte_start + byte_len , 0 ) ;
if ( ret ) {
2007-09-08 01:46:51 +04:00
mlog_errno ( ret ) ;
2007-11-20 22:56:39 +03:00
goto out ;
}
/*
* There ' s no need to get fancy with the page cache
* truncate of an inline - data inode . We ' re talking
* about less than a page here , which will be cached
* in the dinode buffer anyway .
*/
unmap_mapping_range ( mapping , 0 , 0 , 0 ) ;
truncate_inode_pages ( mapping , 0 ) ;
goto out ;
2007-09-08 01:46:51 +04:00
}
2007-07-04 00:34:11 +04:00
trunc_start = ocfs2_clusters_for_bytes ( osb - > sb , byte_start ) ;
trunc_len = ( byte_start + byte_len ) > > osb - > s_clustersize_bits ;
if ( trunc_len > = trunc_start )
trunc_len - = trunc_start ;
else
trunc_len = 0 ;
mlog ( 0 , " Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ,
( unsigned long long ) byte_start ,
( unsigned long long ) byte_len , trunc_start , trunc_len ) ;
ret = ocfs2_zero_partial_clusters ( inode , byte_start , byte_len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
cpos = trunc_start ;
while ( trunc_len ) {
ret = ocfs2_get_clusters ( inode , cpos , & phys_cpos ,
& alloc_size , NULL ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( alloc_size > trunc_len )
alloc_size = trunc_len ;
/* Only do work for non-holes */
if ( phys_cpos ! = 0 ) {
ret = __ocfs2_remove_inode_range ( inode , di_bh , cpos ,
phys_cpos , alloc_size ,
& dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
cpos + = alloc_size ;
trunc_len - = alloc_size ;
}
ocfs2_truncate_cluster_pages ( inode , byte_start , byte_len ) ;
out :
ocfs2_schedule_truncate_log_flush ( osb , 1 ) ;
ocfs2_run_deallocs ( osb , & dealloc ) ;
return ret ;
}
2007-03-10 03:53:21 +03:00
/*
* Parts of this function taken from xfs_change_file_space ( )
*/
2007-07-19 11:14:38 +04:00
static int __ocfs2_change_file_space ( struct file * file , struct inode * inode ,
loff_t f_pos , unsigned int cmd ,
struct ocfs2_space_resv * sr ,
int change_size )
2007-03-10 03:53:21 +03:00
{
int ret ;
s64 llen ;
2007-07-19 11:14:38 +04:00
loff_t size ;
2007-03-10 03:53:21 +03:00
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct buffer_head * di_bh = NULL ;
handle_t * handle ;
2007-07-20 22:28:30 +04:00
unsigned long long max_off = inode - > i_sb - > s_maxbytes ;
2007-03-10 03:53:21 +03:00
if ( ocfs2_is_hard_readonly ( osb ) | | ocfs2_is_soft_readonly ( osb ) )
return - EROFS ;
mutex_lock ( & inode - > i_mutex ) ;
/*
* This prevents concurrent writes on other nodes
*/
ret = ocfs2_rw_lock ( inode , 1 ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2007-10-19 02:30:42 +04:00
ret = ocfs2_inode_lock ( inode , & di_bh , 1 ) ;
2007-03-10 03:53:21 +03:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out_rw_unlock ;
}
if ( inode - > i_flags & ( S_IMMUTABLE | S_APPEND ) ) {
ret = - EPERM ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
switch ( sr - > l_whence ) {
case 0 : /*SEEK_SET*/
break ;
case 1 : /*SEEK_CUR*/
2007-07-19 11:14:38 +04:00
sr - > l_start + = f_pos ;
2007-03-10 03:53:21 +03:00
break ;
case 2 : /*SEEK_END*/
sr - > l_start + = i_size_read ( inode ) ;
break ;
default :
ret = - EINVAL ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
sr - > l_whence = 0 ;
llen = sr - > l_len > 0 ? sr - > l_len - 1 : sr - > l_len ;
if ( sr - > l_start < 0
| | sr - > l_start > max_off
| | ( sr - > l_start + llen ) < 0
| | ( sr - > l_start + llen ) > max_off ) {
ret = - EINVAL ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
2007-07-19 11:14:38 +04:00
size = sr - > l_start + sr - > l_len ;
2007-03-10 03:53:21 +03:00
if ( cmd = = OCFS2_IOC_RESVSP | | cmd = = OCFS2_IOC_RESVSP64 ) {
if ( sr - > l_len < = 0 ) {
ret = - EINVAL ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
}
2007-07-19 11:14:38 +04:00
if ( file & & should_remove_suid ( file - > f_path . dentry ) ) {
2007-03-10 03:53:21 +03:00
ret = __ocfs2_write_remove_suid ( inode , di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
}
down_write ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
switch ( cmd ) {
case OCFS2_IOC_RESVSP :
case OCFS2_IOC_RESVSP64 :
/*
* This takes unsigned offsets , but the signed ones we
* pass have been checked against overflow above .
*/
ret = ocfs2_allocate_unwritten_extents ( inode , sr - > l_start ,
sr - > l_len ) ;
break ;
case OCFS2_IOC_UNRESVSP :
case OCFS2_IOC_UNRESVSP64 :
ret = ocfs2_remove_inode_range ( inode , di_bh , sr - > l_start ,
sr - > l_len ) ;
break ;
default :
ret = - EINVAL ;
}
up_write ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
if ( ret ) {
mlog_errno ( ret ) ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
/*
* We update c / mtime for these changes
*/
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
2007-10-19 02:30:42 +04:00
goto out_inode_unlock ;
2007-03-10 03:53:21 +03:00
}
2007-07-19 11:14:38 +04:00
if ( change_size & & i_size_read ( inode ) < size )
i_size_write ( inode , size ) ;
2007-03-10 03:53:21 +03:00
inode - > i_ctime = inode - > i_mtime = CURRENT_TIME ;
ret = ocfs2_mark_inode_dirty ( handle , inode , di_bh ) ;
if ( ret < 0 )
mlog_errno ( ret ) ;
ocfs2_commit_trans ( osb , handle ) ;
2007-10-19 02:30:42 +04:00
out_inode_unlock :
2007-03-10 03:53:21 +03:00
brelse ( di_bh ) ;
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 1 ) ;
2007-03-10 03:53:21 +03:00
out_rw_unlock :
ocfs2_rw_unlock ( inode , 1 ) ;
out :
2008-07-21 11:59:15 +04:00
mutex_unlock ( & inode - > i_mutex ) ;
2007-03-10 03:53:21 +03:00
return ret ;
}
2007-07-19 11:14:38 +04:00
int ocfs2_change_file_space ( struct file * file , unsigned int cmd ,
struct ocfs2_space_resv * sr )
{
struct inode * inode = file - > f_path . dentry - > d_inode ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ; ;
if ( ( cmd = = OCFS2_IOC_RESVSP | | cmd = = OCFS2_IOC_RESVSP64 ) & &
! ocfs2_writes_unwritten_extents ( osb ) )
return - ENOTTY ;
else if ( ( cmd = = OCFS2_IOC_UNRESVSP | | cmd = = OCFS2_IOC_UNRESVSP64 ) & &
! ocfs2_sparse_alloc ( osb ) )
return - ENOTTY ;
if ( ! S_ISREG ( inode - > i_mode ) )
return - EINVAL ;
if ( ! ( file - > f_mode & FMODE_WRITE ) )
return - EBADF ;
return __ocfs2_change_file_space ( file , inode , file - > f_pos , cmd , sr , 0 ) ;
}
static long ocfs2_fallocate ( struct inode * inode , int mode , loff_t offset ,
loff_t len )
{
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_space_resv sr ;
int change_size = 1 ;
if ( ! ocfs2_writes_unwritten_extents ( osb ) )
return - EOPNOTSUPP ;
if ( S_ISDIR ( inode - > i_mode ) )
return - ENODEV ;
if ( mode & FALLOC_FL_KEEP_SIZE )
change_size = 0 ;
sr . l_whence = 0 ;
sr . l_start = ( s64 ) offset ;
sr . l_len = ( s64 ) len ;
return __ocfs2_change_file_space ( NULL , inode , offset ,
OCFS2_IOC_RESVSP64 , & sr , change_size ) ;
}
2006-10-18 05:29:52 +04:00
static int ocfs2_prepare_inode_for_write ( struct dentry * dentry ,
loff_t * ppos ,
size_t count ,
2007-02-10 07:24:12 +03:00
int appending ,
int * direct_io )
2005-12-16 01:31:24 +03:00
{
2007-08-29 04:13:23 +04:00
int ret = 0 , meta_level = 0 ;
2006-10-18 05:29:52 +04:00
struct inode * inode = dentry - > d_inode ;
2007-08-29 04:13:23 +04:00
loff_t saved_pos , end ;
2005-12-16 01:31:24 +03:00
/*
2007-08-29 04:13:23 +04:00
* We start with a read level meta lock and only jump to an ex
* if we need to make modifications here .
2005-12-16 01:31:24 +03:00
*/
for ( ; ; ) {
2007-10-19 02:30:42 +04:00
ret = ocfs2_inode_lock ( inode , NULL , meta_level ) ;
2005-12-16 01:31:24 +03:00
if ( ret < 0 ) {
meta_level = - 1 ;
mlog_errno ( ret ) ;
goto out ;
}
/* Clear suid / sgid if necessary. We do this here
* instead of later in the write path because
* remove_suid ( ) calls - > setattr without any hint that
* we may have already done our cluster locking . Since
* ocfs2_setattr ( ) * must * take cluster locks to
* proceeed , this will lead us to recursively lock the
* inode . There ' s also the dinode i_size state which
* can be lost via setattr during extending writes ( we
* set inode - > i_size at the end of a write . */
2006-10-18 05:29:52 +04:00
if ( should_remove_suid ( dentry ) ) {
2005-12-16 01:31:24 +03:00
if ( meta_level = = 0 ) {
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , meta_level ) ;
2005-12-16 01:31:24 +03:00
meta_level = 1 ;
continue ;
}
ret = ocfs2_write_remove_suid ( inode ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
2006-10-18 05:29:52 +04:00
goto out_unlock ;
2005-12-16 01:31:24 +03:00
}
}
/* work on a copy of ppos until we're sure that we won't have
* to recalculate it due to relocking . */
2006-10-18 05:29:52 +04:00
if ( appending ) {
2005-12-16 01:31:24 +03:00
saved_pos = i_size_read ( inode ) ;
mlog ( 0 , " O_APPEND: inode->i_size=%llu \n " , saved_pos ) ;
} else {
2006-10-18 05:29:52 +04:00
saved_pos = * ppos ;
2005-12-16 01:31:24 +03:00
}
2007-01-17 23:53:31 +03:00
2007-08-29 04:13:23 +04:00
end = saved_pos + count ;
2007-02-10 07:24:12 +03:00
2007-08-29 04:13:23 +04:00
/*
* Skip the O_DIRECT checks if we don ' t need
* them .
*/
if ( ! direct_io | | ! ( * direct_io ) )
2007-02-10 07:24:12 +03:00
break ;
2007-09-08 01:46:51 +04:00
/*
* There ' s no sane way to do direct writes to an inode
* with inline data .
*/
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
* direct_io = 0 ;
break ;
}
2007-01-17 23:53:31 +03:00
/*
2007-08-29 04:13:23 +04:00
* Allowing concurrent direct writes means
* i_size changes wouldn ' t be synchronized , so
* one node could wind up truncating another
* nodes writes .
2007-01-17 23:53:31 +03:00
*/
2007-08-29 04:13:23 +04:00
if ( end > i_size_read ( inode ) ) {
* direct_io = 0 ;
2005-12-16 01:31:24 +03:00
break ;
}
2007-08-29 04:13:23 +04:00
/*
* We don ' t fill holes during direct io , so
* check for them here . If any are found , the
* caller will have to retake some cluster
* locks and initiate the io as buffered .
*/
ret = ocfs2_check_range_for_holes ( inode , saved_pos , count ) ;
if ( ret = = 1 ) {
* direct_io = 0 ;
ret = 0 ;
} else if ( ret < 0 )
mlog_errno ( ret ) ;
2005-12-16 01:31:24 +03:00
break ;
}
2006-10-18 05:29:52 +04:00
if ( appending )
* ppos = saved_pos ;
out_unlock :
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , meta_level ) ;
2006-10-18 05:29:52 +04:00
out :
return ret ;
}
static ssize_t ocfs2_file_aio_write ( struct kiocb * iocb ,
const struct iovec * iov ,
unsigned long nr_segs ,
loff_t pos )
{
2007-02-10 07:24:12 +03:00
int ret , direct_io , appending , rw_level , have_alloc_sem = 0 ;
2007-10-16 12:25:24 +04:00
int can_do_direct ;
2007-02-10 07:24:12 +03:00
ssize_t written = 0 ;
size_t ocount ; /* original count */
size_t count ; /* after file limit checks */
2007-10-19 01:14:45 +04:00
loff_t old_size , * ppos = & iocb - > ki_pos ;
u32 old_clusters ;
2007-02-10 07:24:12 +03:00
struct file * file = iocb - > ki_filp ;
struct inode * inode = file - > f_path . dentry - > d_inode ;
2007-10-19 01:14:45 +04:00
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
2007-02-10 07:24:12 +03:00
mlog_entry ( " (0x%p, %u, '%.*s') \n " , file ,
2006-10-18 05:29:52 +04:00
( unsigned int ) nr_segs ,
2007-02-10 07:24:12 +03:00
file - > f_path . dentry - > d_name . len ,
file - > f_path . dentry - > d_name . name ) ;
2006-10-18 05:29:52 +04:00
if ( iocb - > ki_left = = 0 )
return 0 ;
2007-02-10 07:24:12 +03:00
vfs_check_frozen ( inode - > i_sb , SB_FREEZE_WRITE ) ;
appending = file - > f_flags & O_APPEND ? 1 : 0 ;
direct_io = file - > f_flags & O_DIRECT ? 1 : 0 ;
2006-10-18 05:29:52 +04:00
mutex_lock ( & inode - > i_mutex ) ;
2007-02-10 07:24:12 +03:00
relock :
2006-10-18 05:29:52 +04:00
/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2007-02-10 07:24:12 +03:00
if ( direct_io ) {
2006-10-18 05:29:52 +04:00
down_read ( & inode - > i_alloc_sem ) ;
2007-02-10 07:24:12 +03:00
have_alloc_sem = 1 ;
2006-10-18 05:29:52 +04:00
}
/* concurrent O_DIRECT writes are allowed */
2007-02-10 07:24:12 +03:00
rw_level = ! direct_io ;
2006-10-18 05:29:52 +04:00
ret = ocfs2_rw_lock ( inode , rw_level ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
2007-02-10 07:24:12 +03:00
goto out_sems ;
2006-10-18 05:29:52 +04:00
}
2007-02-10 07:24:12 +03:00
can_do_direct = direct_io ;
ret = ocfs2_prepare_inode_for_write ( file - > f_path . dentry , ppos ,
iocb - > ki_left , appending ,
& can_do_direct ) ;
2006-10-18 05:29:52 +04:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
2005-12-16 01:31:24 +03:00
2007-02-10 07:24:12 +03:00
/*
* We can ' t complete the direct I / O as requested , fall back to
* buffered I / O .
*/
if ( direct_io & & ! can_do_direct ) {
ocfs2_rw_unlock ( inode , rw_level ) ;
up_read ( & inode - > i_alloc_sem ) ;
have_alloc_sem = 0 ;
rw_level = - 1 ;
direct_io = 0 ;
goto relock ;
}
2007-10-19 01:14:45 +04:00
/*
* To later detect whether a journal commit for sync writes is
* necessary , we sample i_size , and cluster count here .
*/
old_size = i_size_read ( inode ) ;
old_clusters = OCFS2_I ( inode ) - > ip_clusters ;
2005-12-16 01:31:24 +03:00
/* communicate with ocfs2_dio_end_io */
2007-04-17 04:28:51 +04:00
ocfs2_iocb_set_rw_locked ( iocb , rw_level ) ;
2005-12-16 01:31:24 +03:00
2007-02-10 07:24:12 +03:00
if ( direct_io ) {
2007-10-16 12:25:24 +04:00
ret = generic_segment_checks ( iov , & nr_segs , & ocount ,
VERIFY_READ ) ;
if ( ret )
goto out_dio ;
ret = generic_write_checks ( file , ppos , & count ,
S_ISBLK ( inode - > i_mode ) ) ;
if ( ret )
goto out_dio ;
2007-02-10 07:24:12 +03:00
written = generic_file_direct_write ( iocb , iov , & nr_segs , * ppos ,
ppos , count , ocount ) ;
if ( written < 0 ) {
ret = written ;
goto out_dio ;
}
} else {
2007-10-16 12:25:24 +04:00
written = generic_file_aio_write_nolock ( iocb , iov , nr_segs ,
* ppos ) ;
2007-02-10 07:24:12 +03:00
}
2005-12-16 01:31:24 +03:00
2007-02-10 07:24:12 +03:00
out_dio :
2005-12-16 01:31:24 +03:00
/* buffered aio wouldn't have proper lock coverage today */
2007-02-10 07:24:12 +03:00
BUG_ON ( ret = = - EIOCBQUEUED & & ! ( file - > f_flags & O_DIRECT ) ) ;
2005-12-16 01:31:24 +03:00
2007-10-19 01:14:45 +04:00
if ( ( file - > f_flags & O_SYNC & & ! direct_io ) | | IS_SYNC ( inode ) ) {
/*
* The generic write paths have handled getting data
* to disk , but since we don ' t make use of the dirty
* inode list , a manual journal commit is necessary
* here .
*/
if ( old_size ! = i_size_read ( inode ) | |
old_clusters ! = OCFS2_I ( inode ) - > ip_clusters ) {
ret = journal_force_commit ( osb - > journal - > j_journal ) ;
if ( ret < 0 )
written = ret ;
}
}
2005-12-16 01:31:24 +03:00
/*
* deep in g_f_a_w_n ( ) - > ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
* it can unlock our rw lock . ( it ' s the clustered equivalent of
* i_alloc_sem ; protects truncate from racing with pending ios ) .
* Unfortunately there are error cases which call end_io and others
* that don ' t . so we don ' t have to unlock the rw_lock if either an
* async dio is going to do it in the future or an end_io after an
* error has already done it .
*/
if ( ret = = - EIOCBQUEUED | | ! ocfs2_iocb_is_rw_locked ( iocb ) ) {
rw_level = - 1 ;
have_alloc_sem = 0 ;
}
out :
2007-02-10 07:24:12 +03:00
if ( rw_level ! = - 1 )
ocfs2_rw_unlock ( inode , rw_level ) ;
out_sems :
2005-12-16 01:31:24 +03:00
if ( have_alloc_sem )
up_read ( & inode - > i_alloc_sem ) ;
2007-02-10 07:24:12 +03:00
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode - > i_mutex ) ;
2005-12-16 01:31:24 +03:00
mlog_exit ( ret ) ;
2007-02-10 07:24:12 +03:00
return written ? written : ret ;
2005-12-16 01:31:24 +03:00
}
2006-10-18 05:29:52 +04:00
static ssize_t ocfs2_file_splice_write ( struct pipe_inode_info * pipe ,
struct file * out ,
loff_t * ppos ,
size_t len ,
unsigned int flags )
{
int ret ;
2006-12-08 13:37:25 +03:00
struct inode * inode = out - > f_path . dentry - > d_inode ;
2006-10-18 05:29:52 +04:00
mlog_entry ( " (0x%p, 0x%p, %u, '%.*s') \n " , out , pipe ,
( unsigned int ) len ,
2006-12-08 13:37:25 +03:00
out - > f_path . dentry - > d_name . len ,
out - > f_path . dentry - > d_name . name ) ;
2006-10-18 05:29:52 +04:00
inode_double_lock ( inode , pipe - > inode ) ;
ret = ocfs2_rw_lock ( inode , 1 ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
2007-02-10 07:24:12 +03:00
ret = ocfs2_prepare_inode_for_write ( out - > f_path . dentry , ppos , len , 0 ,
NULL ) ;
2006-10-18 05:29:52 +04:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out_unlock ;
}
2007-10-16 12:25:24 +04:00
ret = generic_file_splice_write_nolock ( pipe , out , ppos , len , flags ) ;
2006-10-18 05:29:52 +04:00
out_unlock :
ocfs2_rw_unlock ( inode , 1 ) ;
out :
inode_double_unlock ( inode , pipe - > inode ) ;
mlog_exit ( ret ) ;
return ret ;
}
static ssize_t ocfs2_file_splice_read ( struct file * in ,
loff_t * ppos ,
struct pipe_inode_info * pipe ,
size_t len ,
unsigned int flags )
{
int ret = 0 ;
2006-12-08 13:37:25 +03:00
struct inode * inode = in - > f_path . dentry - > d_inode ;
2006-10-18 05:29:52 +04:00
mlog_entry ( " (0x%p, 0x%p, %u, '%.*s') \n " , in , pipe ,
( unsigned int ) len ,
2006-12-08 13:37:25 +03:00
in - > f_path . dentry - > d_name . len ,
in - > f_path . dentry - > d_name . name ) ;
2006-10-18 05:29:52 +04:00
/*
* See the comment in ocfs2_file_aio_read ( )
*/
2007-10-19 02:30:42 +04:00
ret = ocfs2_inode_lock ( inode , NULL , 0 ) ;
2006-10-18 05:29:52 +04:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto bail ;
}
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 0 ) ;
2006-10-18 05:29:52 +04:00
ret = generic_file_splice_read ( in , ppos , pipe , len , flags ) ;
bail :
mlog_exit ( ret ) ;
return ret ;
}
2005-12-16 01:31:24 +03:00
static ssize_t ocfs2_file_aio_read ( struct kiocb * iocb ,
2006-10-01 10:28:46 +04:00
const struct iovec * iov ,
unsigned long nr_segs ,
2005-12-16 01:31:24 +03:00
loff_t pos )
{
2006-11-15 10:49:02 +03:00
int ret = 0 , rw_level = - 1 , have_alloc_sem = 0 , lock_level = 0 ;
2005-12-16 01:31:24 +03:00
struct file * filp = iocb - > ki_filp ;
2006-12-08 13:37:25 +03:00
struct inode * inode = filp - > f_path . dentry - > d_inode ;
2005-12-16 01:31:24 +03:00
2006-10-01 10:28:46 +04:00
mlog_entry ( " (0x%p, %u, '%.*s') \n " , filp ,
( unsigned int ) nr_segs ,
2006-12-08 13:37:25 +03:00
filp - > f_path . dentry - > d_name . len ,
filp - > f_path . dentry - > d_name . name ) ;
2005-12-16 01:31:24 +03:00
if ( ! inode ) {
ret = - EINVAL ;
mlog_errno ( ret ) ;
goto bail ;
}
/*
* buffered reads protect themselves in - > readpage ( ) . O_DIRECT reads
* need locks to protect pending reads from racing with truncate .
*/
if ( filp - > f_flags & O_DIRECT ) {
down_read ( & inode - > i_alloc_sem ) ;
have_alloc_sem = 1 ;
ret = ocfs2_rw_lock ( inode , 0 ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto bail ;
}
rw_level = 0 ;
/* communicate with ocfs2_dio_end_io */
2007-04-17 04:28:51 +04:00
ocfs2_iocb_set_rw_locked ( iocb , rw_level ) ;
2005-12-16 01:31:24 +03:00
}
2006-05-06 06:04:35 +04:00
/*
* We ' re fine letting folks race truncates and extending
* writes with read across the cluster , just like they can
* locally . Hence no rw_lock during read .
*
* Take and drop the meta data lock to update inode fields
* like i_size . This allows the checks down below
* generic_file_aio_read ( ) a chance of actually working .
*/
2007-10-19 02:30:42 +04:00
ret = ocfs2_inode_lock_atime ( inode , filp - > f_vfsmnt , & lock_level ) ;
2006-05-06 06:04:35 +04:00
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto bail ;
}
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , lock_level ) ;
2006-05-06 06:04:35 +04:00
2006-10-01 10:28:46 +04:00
ret = generic_file_aio_read ( iocb , iov , nr_segs , iocb - > ki_pos ) ;
2005-12-16 01:31:24 +03:00
if ( ret = = - EINVAL )
2008-06-09 22:24:41 +04:00
mlog ( 0 , " generic_file_aio_read returned -EINVAL \n " ) ;
2005-12-16 01:31:24 +03:00
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON ( ret = = - EIOCBQUEUED & & ! ( filp - > f_flags & O_DIRECT ) ) ;
/* see ocfs2_file_aio_write */
if ( ret = = - EIOCBQUEUED | | ! ocfs2_iocb_is_rw_locked ( iocb ) ) {
rw_level = - 1 ;
have_alloc_sem = 0 ;
}
bail :
if ( have_alloc_sem )
up_read ( & inode - > i_alloc_sem ) ;
if ( rw_level ! = - 1 )
ocfs2_rw_unlock ( inode , rw_level ) ;
mlog_exit ( ret ) ;
return ret ;
}
2007-02-12 11:55:39 +03:00
const struct inode_operations ocfs2_file_iops = {
2005-12-16 01:31:24 +03:00
. setattr = ocfs2_setattr ,
. getattr = ocfs2_getattr ,
2006-11-27 04:59:21 +03:00
. permission = ocfs2_permission ,
2007-07-19 11:14:38 +04:00
. fallocate = ocfs2_fallocate ,
2008-10-04 01:32:11 +04:00
. fiemap = ocfs2_fiemap ,
2005-12-16 01:31:24 +03:00
} ;
2007-02-12 11:55:39 +03:00
const struct inode_operations ocfs2_special_file_iops = {
2005-12-16 01:31:24 +03:00
. setattr = ocfs2_setattr ,
. getattr = ocfs2_getattr ,
2006-11-27 04:59:21 +03:00
. permission = ocfs2_permission ,
2005-12-16 01:31:24 +03:00
} ;
2008-07-22 01:29:16 +04:00
/*
* Other than - > lock , keep ocfs2_fops and ocfs2_dops in sync with
* ocfs2_fops_no_plocks and ocfs2_dops_no_plocks !
*/
2006-03-28 13:56:42 +04:00
const struct file_operations ocfs2_fops = {
2007-12-19 17:24:52 +03:00
. llseek = generic_file_llseek ,
2005-12-16 01:31:24 +03:00
. read = do_sync_read ,
. write = do_sync_write ,
. mmap = ocfs2_mmap ,
. fsync = ocfs2_sync_file ,
. release = ocfs2_file_release ,
. open = ocfs2_file_open ,
. aio_read = ocfs2_file_aio_read ,
. aio_write = ocfs2_file_aio_write ,
2008-01-27 05:17:17 +03:00
. unlocked_ioctl = ocfs2_ioctl ,
2007-03-10 02:56:28 +03:00
# ifdef CONFIG_COMPAT
. compat_ioctl = ocfs2_compat_ioctl ,
# endif
2008-07-22 01:29:16 +04:00
. lock = ocfs2_lock ,
2007-12-21 03:49:04 +03:00
. flock = ocfs2_flock ,
2006-10-18 05:29:52 +04:00
. splice_read = ocfs2_file_splice_read ,
. splice_write = ocfs2_file_splice_write ,
2005-12-16 01:31:24 +03:00
} ;
2006-03-28 13:56:42 +04:00
const struct file_operations ocfs2_dops = {
2007-12-19 17:24:52 +03:00
. llseek = generic_file_llseek ,
2005-12-16 01:31:24 +03:00
. read = generic_read_dir ,
. readdir = ocfs2_readdir ,
. fsync = ocfs2_sync_file ,
2007-12-21 03:49:04 +03:00
. release = ocfs2_dir_release ,
. open = ocfs2_dir_open ,
2008-01-27 05:17:17 +03:00
. unlocked_ioctl = ocfs2_ioctl ,
2007-03-10 02:56:28 +03:00
# ifdef CONFIG_COMPAT
. compat_ioctl = ocfs2_compat_ioctl ,
2008-07-22 01:29:16 +04:00
# endif
. lock = ocfs2_lock ,
. flock = ocfs2_flock ,
} ;
/*
* POSIX - lockless variants of our file_operations .
*
* These will be used if the underlying cluster stack does not support
* posix file locking , if the user passes the " localflocks " mount
* option , or if we have a local - only fs .
*
* ocfs2_flock is in here because all stacks handle UNIX file locks ,
* so we still want it in the case of no stack support for
* plocks . Internally , it will do the right thing when asked to ignore
* the cluster .
*/
const struct file_operations ocfs2_fops_no_plocks = {
. llseek = generic_file_llseek ,
. read = do_sync_read ,
. write = do_sync_write ,
. mmap = ocfs2_mmap ,
. fsync = ocfs2_sync_file ,
. release = ocfs2_file_release ,
. open = ocfs2_file_open ,
. aio_read = ocfs2_file_aio_read ,
. aio_write = ocfs2_file_aio_write ,
. unlocked_ioctl = ocfs2_ioctl ,
# ifdef CONFIG_COMPAT
. compat_ioctl = ocfs2_compat_ioctl ,
# endif
. flock = ocfs2_flock ,
. splice_read = ocfs2_file_splice_read ,
. splice_write = ocfs2_file_splice_write ,
} ;
const struct file_operations ocfs2_dops_no_plocks = {
. llseek = generic_file_llseek ,
. read = generic_read_dir ,
. readdir = ocfs2_readdir ,
. fsync = ocfs2_sync_file ,
. release = ocfs2_dir_release ,
. open = ocfs2_dir_open ,
. unlocked_ioctl = ocfs2_ioctl ,
# ifdef CONFIG_COMPAT
. compat_ioctl = ocfs2_compat_ioctl ,
2007-03-10 02:56:28 +03:00
# endif
2007-12-21 03:49:04 +03:00
. flock = ocfs2_flock ,
2005-12-16 01:31:24 +03:00
} ;