2012-11-29 08:28:09 +04:00
/*
2012-11-02 12:09:44 +04:00
* fs / f2fs / file . c
*
* Copyright ( c ) 2012 Samsung Electronics Co . , Ltd .
* http : //www.samsung.com/
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*/
# include <linux/fs.h>
# include <linux/f2fs_fs.h>
# include <linux/stat.h>
# include <linux/buffer_head.h>
# include <linux/writeback.h>
2013-03-16 06:13:04 +04:00
# include <linux/blkdev.h>
2012-11-02 12:09:44 +04:00
# include <linux/falloc.h>
# include <linux/types.h>
2013-02-04 18:41:41 +04:00
# include <linux/compat.h>
2012-11-02 12:09:44 +04:00
# include <linux/uaccess.h>
# include <linux/mount.h>
2014-04-28 13:12:36 +04:00
# include <linux/pagevec.h>
2016-05-21 03:01:00 +03:00
# include <linux/uuid.h>
2016-07-09 01:16:47 +03:00
# include <linux/file.h>
2012-11-02 12:09:44 +04:00
# include "f2fs.h"
# include "node.h"
# include "segment.h"
# include "xattr.h"
# include "acl.h"
2015-07-10 13:08:10 +03:00
# include "gc.h"
2014-12-18 07:04:08 +03:00
# include "trace.h"
2013-04-19 20:28:40 +04:00
# include <trace/events/f2fs.h>
2012-11-02 12:09:44 +04:00
static int f2fs_vm_page_mkwrite ( struct vm_area_struct * vma ,
struct vm_fault * vmf )
{
struct page * page = vmf - > page ;
2013-02-28 01:59:05 +04:00
struct inode * inode = file_inode ( vma - > vm_file ) ;
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2012-11-02 12:09:44 +04:00
struct dnode_of_data dn ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:09:44 +04:00
sb_start_pagefault ( inode - > i_sb ) ;
2014-10-24 06:48:09 +04:00
f2fs_bug_on ( sbi , f2fs_has_inline_data ( inode ) ) ;
2014-08-08 03:32:25 +04:00
2012-11-02 12:09:44 +04:00
/* block allocation */
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2014-10-24 06:48:09 +04:00
set_new_dnode ( & dn , inode , NULL , NULL , 0 ) ;
2013-11-10 19:13:18 +04:00
err = f2fs_reserve_block ( & dn , page - > index ) ;
2014-10-18 07:33:55 +04:00
if ( err ) {
f2fs_unlock_op ( sbi ) ;
2013-11-10 19:13:18 +04:00
goto out ;
2014-10-18 07:33:55 +04:00
}
f2fs_put_dnode ( & dn ) ;
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:09:44 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , dn . node_changed ) ;
2015-12-23 00:23:35 +03:00
2013-04-28 04:04:18 +04:00
file_update_time ( vma - > vm_file ) ;
2012-11-02 12:09:44 +04:00
lock_page ( page ) ;
2013-12-06 10:00:58 +04:00
if ( unlikely ( page - > mapping ! = inode - > i_mapping | |
2013-04-28 04:04:18 +04:00
page_offset ( page ) > i_size_read ( inode ) | |
2013-12-06 10:00:58 +04:00
! PageUptodate ( page ) ) ) {
2012-11-02 12:09:44 +04:00
unlock_page ( page ) ;
err = - EFAULT ;
goto out ;
}
/*
* check to see if the page is mapped already ( no holes )
*/
if ( PageMappedToDisk ( page ) )
2013-04-28 04:04:18 +04:00
goto mapped ;
2012-11-02 12:09:44 +04:00
/* page is wholly or partially inside EOF */
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
if ( ( ( loff_t ) ( page - > index + 1 ) < < PAGE_SHIFT ) >
2015-09-11 09:43:52 +03:00
i_size_read ( inode ) ) {
2012-11-02 12:09:44 +04:00
unsigned offset ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
offset = i_size_read ( inode ) & ~ PAGE_MASK ;
zero_user_segment ( page , offset , PAGE_SIZE ) ;
2012-11-02 12:09:44 +04:00
}
set_page_dirty ( page ) ;
2016-07-01 04:49:15 +03:00
if ( ! PageUptodate ( page ) )
SetPageUptodate ( page ) ;
2012-11-02 12:09:44 +04:00
2013-10-25 09:26:31 +04:00
trace_f2fs_vm_page_mkwrite ( page , DATA ) ;
2013-04-28 04:04:18 +04:00
mapped :
/* fill the page */
2016-01-20 18:43:51 +03:00
f2fs_wait_on_page_writeback ( page , DATA , false ) ;
2015-10-08 08:27:34 +03:00
/* wait for GCed encrypted page writeback */
if ( f2fs_encrypted_inode ( inode ) & & S_ISREG ( inode - > i_mode ) )
f2fs_wait_on_encrypted_page_writeback ( sbi , dn . data_blkaddr ) ;
2015-07-13 12:43:19 +03:00
/* if gced page is attached, don't write to cold segment */
clear_cold_data ( page ) ;
2012-11-02 12:09:44 +04:00
out :
sb_end_pagefault ( inode - > i_sb ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2012-11-02 12:09:44 +04:00
return block_page_mkwrite_return ( err ) ;
}
static const struct vm_operations_struct f2fs_file_vm_ops = {
2013-01-17 13:37:41 +04:00
. fault = filemap_fault ,
2014-04-08 02:37:19 +04:00
. map_pages = filemap_map_pages ,
2013-01-17 13:37:41 +04:00
. page_mkwrite = f2fs_vm_page_mkwrite ,
2012-11-02 12:09:44 +04:00
} ;
2013-06-14 03:52:35 +04:00
static int get_parent_ino ( struct inode * inode , nid_t * pino )
{
struct dentry * dentry ;
inode = igrab ( inode ) ;
dentry = d_find_any_alias ( inode ) ;
iput ( inode ) ;
if ( ! dentry )
return 0 ;
2015-04-30 03:02:18 +03:00
if ( update_dent_inode ( inode , inode , & dentry - > d_name ) ) {
2013-07-22 17:12:56 +04:00
dput ( dentry ) ;
return 0 ;
}
2013-06-14 03:52:35 +04:00
2013-07-22 17:12:56 +04:00
* pino = parent_ino ( dentry ) ;
dput ( dentry ) ;
2013-06-14 03:52:35 +04:00
return 1 ;
}
2014-08-20 14:37:35 +04:00
static inline bool need_do_checkpoint ( struct inode * inode )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2014-08-20 14:37:35 +04:00
bool need_cp = false ;
if ( ! S_ISREG ( inode - > i_mode ) | | inode - > i_nlink ! = 1 )
need_cp = true ;
2016-08-30 04:23:45 +03:00
else if ( is_sbi_flag_set ( sbi , SBI_NEED_CP ) )
2015-04-30 03:02:18 +03:00
need_cp = true ;
2014-08-20 14:37:35 +04:00
else if ( file_wrong_pino ( inode ) )
need_cp = true ;
else if ( ! space_for_roll_forward ( sbi ) )
need_cp = true ;
else if ( ! is_checkpointed_node ( sbi , F2FS_I ( inode ) - > i_pino ) )
need_cp = true ;
else if ( F2FS_I ( inode ) - > xattr_ver = = cur_cp_version ( F2FS_CKPT ( sbi ) ) )
need_cp = true ;
2014-10-31 08:47:03 +03:00
else if ( test_opt ( sbi , FASTBOOT ) )
need_cp = true ;
2014-11-06 07:05:53 +03:00
else if ( sbi - > active_logs = = 2 )
need_cp = true ;
2014-08-20 14:37:35 +04:00
return need_cp ;
}
2014-12-08 09:29:40 +03:00
static bool need_inode_page_update ( struct f2fs_sb_info * sbi , nid_t ino )
{
struct page * i = find_get_page ( NODE_MAPPING ( sbi ) , ino ) ;
bool ret = false ;
/* But we need to avoid that there are some inode updates */
if ( ( i & & PageDirty ( i ) ) | | need_inode_block_update ( sbi , ino ) )
ret = true ;
f2fs_put_page ( i , 0 ) ;
return ret ;
}
2014-12-08 09:29:41 +03:00
static void try_to_fix_pino ( struct inode * inode )
{
struct f2fs_inode_info * fi = F2FS_I ( inode ) ;
nid_t pino ;
down_write ( & fi - > i_sem ) ;
fi - > xattr_ver = 0 ;
if ( file_wrong_pino ( inode ) & & inode - > i_nlink = = 1 & &
get_parent_ino ( inode , & pino ) ) {
2016-05-20 19:52:20 +03:00
f2fs_i_pino_write ( inode , pino ) ;
2014-12-08 09:29:41 +03:00
file_got_pino ( inode ) ;
}
2016-05-21 02:32:49 +03:00
up_write ( & fi - > i_sem ) ;
2014-12-08 09:29:41 +03:00
}
2016-04-15 19:43:17 +03:00
static int f2fs_do_sync_file ( struct file * file , loff_t start , loff_t end ,
int datasync , bool atomic )
2012-11-02 12:09:44 +04:00
{
struct inode * inode = file - > f_mapping - > host ;
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2014-09-11 01:58:18 +04:00
nid_t ino = inode - > i_ino ;
2012-11-02 12:09:44 +04:00
int ret = 0 ;
bool need_cp = false ;
struct writeback_control wbc = {
2014-03-03 06:28:40 +04:00
. sync_mode = WB_SYNC_ALL ,
2012-11-02 12:09:44 +04:00
. nr_to_write = LONG_MAX ,
. for_reclaim = 0 ,
} ;
2013-12-06 10:00:58 +04:00
if ( unlikely ( f2fs_readonly ( inode - > i_sb ) ) )
2012-12-01 05:56:01 +04:00
return 0 ;
2013-04-19 20:28:40 +04:00
trace_f2fs_sync_file_enter ( inode ) ;
2014-07-25 06:11:43 +04:00
/* if fdatasync is triggered, let's do in-place-update */
2016-01-01 00:49:17 +03:00
if ( datasync | | get_dirty_pages ( inode ) < = SM_I ( sbi ) - > min_fsync_blocks )
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_NEED_IPU ) ;
2012-11-02 12:09:44 +04:00
ret = filemap_write_and_wait_range ( inode - > i_mapping , start , end ) ;
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_NEED_IPU ) ;
2014-09-11 03:53:02 +04:00
2013-04-19 20:28:40 +04:00
if ( ret ) {
trace_f2fs_sync_file_exit ( inode , need_cp , datasync , ret ) ;
2012-11-02 12:09:44 +04:00
return ret ;
2013-04-19 20:28:40 +04:00
}
2012-11-02 12:09:44 +04:00
2014-12-08 09:29:40 +03:00
/* if the inode is dirty, let's recover all the time */
2016-05-21 06:42:37 +03:00
if ( ! datasync & & ! f2fs_skip_inode_update ( inode ) ) {
2015-08-16 07:51:05 +03:00
f2fs_write_inode ( inode , NULL ) ;
2014-12-08 09:29:40 +03:00
goto go_write ;
}
2014-07-25 06:08:02 +04:00
/*
* if there is no written data , don ' t waste time to write recovery info .
*/
2016-05-20 20:13:22 +03:00
if ( ! is_inode_flag_set ( inode , FI_APPEND_WRITE ) & &
2014-09-11 01:58:18 +04:00
! exist_written_data ( sbi , ino , APPEND_INO ) ) {
2014-09-11 02:04:03 +04:00
2014-12-08 09:29:40 +03:00
/* it may call write_inode just prior to fsync */
if ( need_inode_page_update ( sbi , ino ) )
2014-09-11 02:04:03 +04:00
goto go_write ;
2016-05-20 20:13:22 +03:00
if ( is_inode_flag_set ( inode , FI_UPDATE_WRITE ) | |
2014-09-11 01:58:18 +04:00
exist_written_data ( sbi , ino , UPDATE_INO ) )
2014-07-25 06:08:02 +04:00
goto flush_out ;
goto out ;
}
2014-09-11 02:04:03 +04:00
go_write :
2013-07-03 05:55:52 +04:00
/*
* Both of fdatasync ( ) and fsync ( ) are able to be recovered from
* sudden - power - off .
*/
2016-05-20 20:13:22 +03:00
down_read ( & F2FS_I ( inode ) - > i_sem ) ;
2014-08-20 14:37:35 +04:00
need_cp = need_do_checkpoint ( inode ) ;
2016-05-20 20:13:22 +03:00
up_read ( & F2FS_I ( inode ) - > i_sem ) ;
2014-03-20 14:10:08 +04:00
2012-11-02 12:09:44 +04:00
if ( need_cp ) {
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs ( inode - > i_sb , 1 ) ;
2014-03-20 14:10:08 +04:00
2014-12-08 09:29:41 +03:00
/*
* We ' ve secured consistency through sync_fs . Following pino
* will be used only for fsynced inodes after checkpoint .
*/
try_to_fix_pino ( inode ) ;
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_APPEND_WRITE ) ;
clear_inode_flag ( inode , FI_UPDATE_WRITE ) ;
2014-12-08 09:29:41 +03:00
goto out ;
}
f2fs: fix conditions to remain recovery information in f2fs_sync_file
This patch revisited whole the recovery information during the f2fs_sync_file.
In this patch, there are three information to make a decision.
a) IS_CHECKPOINTED, /* is it checkpointed before? */
b) HAS_FSYNCED_INODE, /* is the inode fsynced before? */
c) HAS_LAST_FSYNC, /* has the latest node fsync mark? */
And, the scenarios for our rule are based on:
[Term] F: fsync_mark, D: dentry_mark
1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)
For example, #3, the three conditions should be changed as follows.
inode(x) | CP | dnode(F) | inode(x) | inode(F)
a) x o o o o
b) x x x x o
c) x o o x o
If f2fs_sync_file stops ------^,
it should write inode(F) --------------^
So, the need_inode_block_update should return true, since
c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
For example, #8,
CP | alloc | dnode(F) | inode(x) | inode(DF)
a) o x x x x
b) x x x o
c) o o x o
If f2fs_sync_file stops -------^,
it should write inode(DF) --------------^
Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-09-16 01:50:48 +04:00
sync_nodes :
2016-05-21 06:42:37 +03:00
ret = fsync_node_pages ( sbi , inode , & wbc , atomic ) ;
2016-04-15 19:25:04 +03:00
if ( ret )
goto out ;
2014-12-08 09:29:41 +03:00
2015-01-10 03:27:17 +03:00
/* if cp_error was enabled, we should avoid infinite loop */
2015-12-24 13:04:56 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) ) {
ret = - EIO ;
2015-01-10 03:27:17 +03:00
goto out ;
2015-12-24 13:04:56 +03:00
}
2015-01-10 03:27:17 +03:00
2014-12-08 09:29:41 +03:00
if ( need_inode_block_update ( sbi , ino ) ) {
2016-07-01 05:09:37 +03:00
f2fs_mark_inode_dirty_sync ( inode ) ;
2014-12-08 09:29:41 +03:00
f2fs_write_inode ( inode , NULL ) ;
goto sync_nodes ;
2012-11-02 12:09:44 +04:00
}
2014-12-08 09:29:41 +03:00
ret = wait_on_node_pages_writeback ( sbi , ino ) ;
if ( ret )
goto out ;
/* once recovery info is written, don't need to tack this */
2015-12-15 08:29:47 +03:00
remove_ino_entry ( sbi , ino , APPEND_INO ) ;
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_APPEND_WRITE ) ;
2014-12-08 09:29:41 +03:00
flush_out :
2015-12-15 08:29:47 +03:00
remove_ino_entry ( sbi , ino , UPDATE_INO ) ;
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_UPDATE_WRITE ) ;
2014-12-08 09:29:41 +03:00
ret = f2fs_issue_flush ( sbi ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2012-11-02 12:09:44 +04:00
out :
2013-04-19 20:28:40 +04:00
trace_f2fs_sync_file_exit ( inode , need_cp , datasync , ret ) ;
2015-04-24 00:38:15 +03:00
f2fs_trace_ios ( NULL , 1 ) ;
2012-11-02 12:09:44 +04:00
return ret ;
}
2016-04-15 19:43:17 +03:00
int f2fs_sync_file ( struct file * file , loff_t start , loff_t end , int datasync )
{
return f2fs_do_sync_file ( file , start , end , datasync , false ) ;
}
2014-04-28 13:12:36 +04:00
static pgoff_t __get_first_dirty_index ( struct address_space * mapping ,
pgoff_t pgofs , int whence )
{
struct pagevec pvec ;
int nr_pages ;
if ( whence ! = SEEK_DATA )
return 0 ;
/* find first dirty page index */
pagevec_init ( & pvec , 0 ) ;
2014-07-31 04:25:54 +04:00
nr_pages = pagevec_lookup_tag ( & pvec , mapping , & pgofs ,
PAGECACHE_TAG_DIRTY , 1 ) ;
f2fs: fix incorrect upper bound when iterating inode mapping tree
1. Inode mapping tree can index page in range of [0, ULONG_MAX], however,
in some places, f2fs only search or iterate page in ragne of [0, LONG_MAX],
result in miss hitting in page cache.
2. filemap_fdatawait_range accepts range parameters in unit of bytes, so
the max range it covers should be [0, LLONG_MAX], if we use [0, LONG_MAX]
as range for waiting on writeback, big number of pages will not be covered.
This patch corrects above two issues.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-24 12:20:44 +03:00
pgofs = nr_pages ? pvec . pages [ 0 ] - > index : ULONG_MAX ;
2014-04-28 13:12:36 +04:00
pagevec_release ( & pvec ) ;
return pgofs ;
}
static bool __found_offset ( block_t blkaddr , pgoff_t dirty , pgoff_t pgofs ,
int whence )
{
switch ( whence ) {
case SEEK_DATA :
if ( ( blkaddr = = NEW_ADDR & & dirty = = pgofs ) | |
( blkaddr ! = NEW_ADDR & & blkaddr ! = NULL_ADDR ) )
return true ;
break ;
case SEEK_HOLE :
if ( blkaddr = = NULL_ADDR )
return true ;
break ;
}
return false ;
}
2014-04-23 10:10:24 +04:00
static loff_t f2fs_seek_block ( struct file * file , loff_t offset , int whence )
{
struct inode * inode = file - > f_mapping - > host ;
loff_t maxbytes = inode - > i_sb - > s_maxbytes ;
struct dnode_of_data dn ;
2014-04-28 13:12:36 +04:00
pgoff_t pgofs , end_offset , dirty ;
loff_t data_ofs = offset ;
loff_t isize ;
2014-04-23 10:10:24 +04:00
int err = 0 ;
2016-01-22 23:40:57 +03:00
inode_lock ( inode ) ;
2014-04-23 10:10:24 +04:00
isize = i_size_read ( inode ) ;
if ( offset > = isize )
goto fail ;
/* handle inline data case */
2014-09-24 14:19:10 +04:00
if ( f2fs_has_inline_data ( inode ) | | f2fs_has_inline_dentry ( inode ) ) {
2014-04-23 10:10:24 +04:00
if ( whence = = SEEK_HOLE )
data_ofs = isize ;
goto found ;
}
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pgofs = ( pgoff_t ) ( offset > > PAGE_SHIFT ) ;
2014-04-23 10:10:24 +04:00
2014-04-28 13:12:36 +04:00
dirty = __get_first_dirty_index ( inode - > i_mapping , pgofs , whence ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
for ( ; data_ofs < isize ; data_ofs = ( loff_t ) pgofs < < PAGE_SHIFT ) {
2014-04-23 10:10:24 +04:00
set_new_dnode ( & dn , inode , NULL , NULL , 0 ) ;
2016-06-17 03:03:23 +03:00
err = get_dnode_of_data ( & dn , pgofs , LOOKUP_NODE ) ;
2014-04-23 10:10:24 +04:00
if ( err & & err ! = - ENOENT ) {
goto fail ;
} else if ( err = = - ENOENT ) {
2014-08-06 18:22:50 +04:00
/* direct node does not exists */
2014-04-23 10:10:24 +04:00
if ( whence = = SEEK_DATA ) {
2016-01-26 10:40:44 +03:00
pgofs = get_next_page_offset ( & dn , pgofs ) ;
2014-04-23 10:10:24 +04:00
continue ;
} else {
goto found ;
}
}
2016-01-26 10:39:35 +03:00
end_offset = ADDRS_PER_PAGE ( dn . node_page , inode ) ;
2014-04-23 10:10:24 +04:00
/* find data/hole in dnode block */
for ( ; dn . ofs_in_node < end_offset ;
dn . ofs_in_node + + , pgofs + + ,
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
data_ofs = ( loff_t ) pgofs < < PAGE_SHIFT ) {
2014-04-23 10:10:24 +04:00
block_t blkaddr ;
blkaddr = datablock_addr ( dn . node_page , dn . ofs_in_node ) ;
2014-04-28 13:12:36 +04:00
if ( __found_offset ( blkaddr , dirty , pgofs , whence ) ) {
2014-04-23 10:10:24 +04:00
f2fs_put_dnode ( & dn ) ;
goto found ;
}
}
f2fs_put_dnode ( & dn ) ;
}
if ( whence = = SEEK_DATA )
goto fail ;
found :
2014-04-28 12:02:48 +04:00
if ( whence = = SEEK_HOLE & & data_ofs > isize )
data_ofs = isize ;
2016-01-22 23:40:57 +03:00
inode_unlock ( inode ) ;
2014-04-23 10:10:24 +04:00
return vfs_setpos ( file , data_ofs , maxbytes ) ;
fail :
2016-01-22 23:40:57 +03:00
inode_unlock ( inode ) ;
2014-04-23 10:10:24 +04:00
return - ENXIO ;
}
static loff_t f2fs_llseek ( struct file * file , loff_t offset , int whence )
{
struct inode * inode = file - > f_mapping - > host ;
loff_t maxbytes = inode - > i_sb - > s_maxbytes ;
switch ( whence ) {
case SEEK_SET :
case SEEK_CUR :
case SEEK_END :
return generic_file_llseek_size ( file , offset , whence ,
maxbytes , i_size_read ( inode ) ) ;
case SEEK_DATA :
case SEEK_HOLE :
2014-09-08 21:59:43 +04:00
if ( offset < 0 )
return - ENXIO ;
2014-04-23 10:10:24 +04:00
return f2fs_seek_block ( file , offset , whence ) ;
}
return - EINVAL ;
}
2012-11-02 12:09:44 +04:00
static int f2fs_file_mmap ( struct file * file , struct vm_area_struct * vma )
{
2014-10-24 06:48:09 +04:00
struct inode * inode = file_inode ( file ) ;
2015-12-22 22:09:35 +03:00
int err ;
2014-10-24 06:48:09 +04:00
2015-04-22 06:39:58 +03:00
if ( f2fs_encrypted_inode ( inode ) ) {
2015-05-16 02:26:10 +03:00
err = fscrypt_get_encryption_info ( inode ) ;
2015-04-22 06:39:58 +03:00
if ( err )
return 0 ;
2016-02-14 13:58:35 +03:00
if ( ! f2fs_encrypted_inode ( inode ) )
return - ENOKEY ;
2015-04-22 06:39:58 +03:00
}
2014-10-24 06:48:09 +04:00
/* we don't need to use inline_data strictly */
2015-12-22 22:09:35 +03:00
err = f2fs_convert_inline_inode ( inode ) ;
if ( err )
return err ;
2014-10-24 06:48:09 +04:00
2012-11-02 12:09:44 +04:00
file_accessed ( file ) ;
vma - > vm_ops = & f2fs_file_vm_ops ;
return 0 ;
}
2015-04-22 06:39:58 +03:00
static int f2fs_file_open ( struct inode * inode , struct file * filp )
{
int ret = generic_file_open ( inode , filp ) ;
2016-04-12 01:15:38 +03:00
struct dentry * dir ;
2015-04-22 06:39:58 +03:00
if ( ! ret & & f2fs_encrypted_inode ( inode ) ) {
2015-05-16 02:26:10 +03:00
ret = fscrypt_get_encryption_info ( inode ) ;
2015-04-22 06:39:58 +03:00
if ( ret )
2016-02-14 13:58:35 +03:00
return - EACCES ;
2015-05-16 02:26:10 +03:00
if ( ! fscrypt_has_encryption_key ( inode ) )
2016-02-14 13:58:35 +03:00
return - ENOKEY ;
2015-04-22 06:39:58 +03:00
}
2016-04-12 01:15:38 +03:00
dir = dget_parent ( file_dentry ( filp ) ) ;
if ( f2fs_encrypted_inode ( d_inode ( dir ) ) & &
! fscrypt_has_permitted_context ( d_inode ( dir ) , inode ) ) {
dput ( dir ) ;
2016-02-23 20:21:37 +03:00
return - EPERM ;
2016-04-12 01:15:38 +03:00
}
dput ( dir ) ;
2015-04-22 06:39:58 +03:00
return ret ;
}
2013-05-22 03:02:02 +04:00
int truncate_data_blocks_range ( struct dnode_of_data * dn , int count )
2012-11-02 12:09:44 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dn - > inode ) ;
2012-11-02 12:09:44 +04:00
struct f2fs_node * raw_node ;
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 15:34:48 +03:00
int nr_free = 0 , ofs = dn - > ofs_in_node , len = count ;
2012-11-02 12:09:44 +04:00
__le32 * addr ;
2013-07-15 13:57:38 +04:00
raw_node = F2FS_NODE ( dn - > node_page ) ;
2012-11-02 12:09:44 +04:00
addr = blkaddr_in_node ( raw_node ) + ofs ;
2014-01-18 00:44:39 +04:00
for ( ; count > 0 ; count - - , addr + + , dn - > ofs_in_node + + ) {
2012-11-02 12:09:44 +04:00
block_t blkaddr = le32_to_cpu ( * addr ) ;
if ( blkaddr = = NULL_ADDR )
continue ;
2014-12-31 09:57:55 +03:00
dn - > data_blkaddr = NULL_ADDR ;
2015-03-19 14:23:32 +03:00
set_data_blkaddr ( dn ) ;
2012-11-02 12:09:44 +04:00
invalidate_blocks ( sbi , blkaddr ) ;
2015-03-18 03:16:35 +03:00
if ( dn - > ofs_in_node = = 0 & & IS_INODE ( dn - > node_page ) )
2016-05-20 20:13:22 +03:00
clear_inode_flag ( dn - > inode , FI_FIRST_BLOCK_WRITTEN ) ;
2012-11-02 12:09:44 +04:00
nr_free + + ;
}
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 15:34:48 +03:00
2012-11-02 12:09:44 +04:00
if ( nr_free ) {
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 15:34:48 +03:00
pgoff_t fofs ;
/*
* once we invalidate valid blkaddr in range [ ofs , ofs + count ] ,
* we will invalidate all blkaddr in the whole range .
*/
fofs = start_bidx_of_node ( ofs_of_node ( dn - > node_page ) ,
2016-01-26 10:39:35 +03:00
dn - > inode ) + ofs ;
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 15:34:48 +03:00
f2fs_update_extent_cache_range ( dn , fofs , 0 , len ) ;
2013-06-08 16:25:40 +04:00
dec_valid_block_count ( sbi , dn - > inode , nr_free ) ;
2012-11-02 12:09:44 +04:00
}
dn - > ofs_in_node = ofs ;
2013-04-19 20:28:52 +04:00
2016-01-09 03:57:48 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2013-04-19 20:28:52 +04:00
trace_f2fs_truncate_data_blocks_range ( dn - > inode , dn - > nid ,
dn - > ofs_in_node , nr_free ) ;
2012-11-02 12:09:44 +04:00
return nr_free ;
}
void truncate_data_blocks ( struct dnode_of_data * dn )
{
truncate_data_blocks_range ( dn , ADDRS_PER_BLOCK ) ;
}
2015-03-10 08:16:25 +03:00
static int truncate_partial_data_page ( struct inode * inode , u64 from ,
2015-05-01 03:00:33 +03:00
bool cache_only )
2012-11-02 12:09:44 +04:00
{
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
unsigned offset = from & ( PAGE_SIZE - 1 ) ;
pgoff_t index = from > > PAGE_SHIFT ;
2015-05-01 03:00:33 +03:00
struct address_space * mapping = inode - > i_mapping ;
2012-11-02 12:09:44 +04:00
struct page * page ;
2015-05-01 03:00:33 +03:00
if ( ! offset & & ! cache_only )
2014-10-24 06:48:09 +04:00
return 0 ;
2012-11-02 12:09:44 +04:00
2015-05-01 03:00:33 +03:00
if ( cache_only ) {
2016-09-07 01:55:54 +03:00
page = find_lock_page ( mapping , index ) ;
2015-05-01 03:00:33 +03:00
if ( page & & PageUptodate ( page ) )
goto truncate_out ;
f2fs_put_page ( page , 1 ) ;
2014-10-24 06:48:09 +04:00
return 0 ;
2015-05-01 03:00:33 +03:00
}
2012-11-02 12:09:44 +04:00
2015-10-10 01:11:38 +03:00
page = get_lock_data_page ( inode , index , true ) ;
2015-05-01 03:00:33 +03:00
if ( IS_ERR ( page ) )
return 0 ;
truncate_out :
2016-01-20 18:43:51 +03:00
f2fs_wait_on_page_writeback ( page , DATA , true ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
zero_user ( page , offset , PAGE_SIZE - offset ) ;
2015-05-16 02:26:10 +03:00
if ( ! cache_only | | ! f2fs_encrypted_inode ( inode ) | |
! S_ISREG ( inode - > i_mode ) )
2015-03-10 08:16:25 +03:00
set_page_dirty ( page ) ;
2012-11-02 12:09:44 +04:00
f2fs_put_page ( page , 1 ) ;
2014-10-24 06:48:09 +04:00
return 0 ;
2012-11-02 12:09:44 +04:00
}
2014-08-15 03:32:54 +04:00
int truncate_blocks ( struct inode * inode , u64 from , bool lock )
2012-11-02 12:09:44 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2012-11-02 12:09:44 +04:00
unsigned int blocksize = inode - > i_sb - > s_blocksize ;
struct dnode_of_data dn ;
pgoff_t free_from ;
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 19:13:20 +04:00
int count = 0 , err = 0 ;
2014-10-24 06:48:09 +04:00
struct page * ipage ;
2015-03-10 08:16:25 +03:00
bool truncate_page = false ;
2012-11-02 12:09:44 +04:00
2013-04-19 20:28:52 +04:00
trace_f2fs_truncate_blocks_enter ( inode , from ) ;
2015-02-09 23:02:44 +03:00
free_from = ( pgoff_t ) F2FS_BYTES_TO_BLK ( from + blocksize - 1 ) ;
2012-11-02 12:09:44 +04:00
2016-05-05 14:13:03 +03:00
if ( free_from > = sbi - > max_file_blocks )
goto free_partial ;
2014-08-15 03:32:54 +04:00
if ( lock )
f2fs_lock_op ( sbi ) ;
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 19:13:20 +04:00
2014-10-24 06:48:09 +04:00
ipage = get_node_page ( sbi , inode - > i_ino ) ;
if ( IS_ERR ( ipage ) ) {
err = PTR_ERR ( ipage ) ;
goto out ;
}
if ( f2fs_has_inline_data ( inode ) ) {
2015-03-10 08:16:25 +03:00
if ( truncate_inline_inode ( ipage , from ) )
set_page_dirty ( ipage ) ;
2014-10-24 06:48:09 +04:00
f2fs_put_page ( ipage , 1 ) ;
2015-03-10 08:16:25 +03:00
truncate_page = true ;
2014-10-24 06:48:09 +04:00
goto out ;
}
set_new_dnode ( & dn , inode , ipage , NULL , 0 ) ;
2016-05-07 02:19:43 +03:00
err = get_dnode_of_data ( & dn , free_from , LOOKUP_NODE_RA ) ;
2012-11-02 12:09:44 +04:00
if ( err ) {
if ( err = = - ENOENT )
goto free_next ;
2014-10-24 06:48:09 +04:00
goto out ;
2014-10-15 21:24:34 +04:00
}
2016-01-26 10:39:35 +03:00
count = ADDRS_PER_PAGE ( dn . node_page , inode ) ;
2012-11-02 12:09:44 +04:00
count - = dn . ofs_in_node ;
2014-09-03 02:52:58 +04:00
f2fs_bug_on ( sbi , count < 0 ) ;
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 11:21:29 +04:00
2012-11-02 12:09:44 +04:00
if ( dn . ofs_in_node | | IS_INODE ( dn . node_page ) ) {
truncate_data_blocks_range ( & dn , count ) ;
free_from + = count ;
}
f2fs_put_dnode ( & dn ) ;
free_next :
err = truncate_inode_blocks ( inode , free_from ) ;
2014-11-11 22:01:01 +03:00
out :
if ( lock )
f2fs_unlock_op ( sbi ) ;
2016-05-05 14:13:03 +03:00
free_partial :
2014-10-24 06:48:09 +04:00
/* lastly zero out the first data page */
if ( ! err )
2015-03-10 08:16:25 +03:00
err = truncate_partial_data_page ( inode , from , truncate_page ) ;
2012-11-02 12:09:44 +04:00
2013-04-19 20:28:52 +04:00
trace_f2fs_truncate_blocks_exit ( inode , err ) ;
2012-11-02 12:09:44 +04:00
return err ;
}
2016-06-02 23:49:38 +03:00
int f2fs_truncate ( struct inode * inode )
2012-11-02 12:09:44 +04:00
{
2015-08-24 12:39:42 +03:00
int err ;
2012-11-02 12:09:44 +04:00
if ( ! ( S_ISREG ( inode - > i_mode ) | | S_ISDIR ( inode - > i_mode ) | |
S_ISLNK ( inode - > i_mode ) ) )
2015-08-24 12:39:42 +03:00
return 0 ;
2012-11-02 12:09:44 +04:00
2013-04-19 20:28:52 +04:00
trace_f2fs_truncate ( inode ) ;
2014-11-12 01:10:01 +03:00
/* we should check inline_data size */
2015-12-22 22:09:35 +03:00
if ( ! f2fs_may_inline_data ( inode ) ) {
2015-08-24 12:39:42 +03:00
err = f2fs_convert_inline_inode ( inode ) ;
if ( err )
return err ;
2014-11-12 01:10:01 +03:00
}
2016-06-02 23:49:38 +03:00
err = truncate_blocks ( inode , i_size_read ( inode ) , true ) ;
2015-08-24 12:39:42 +03:00
if ( err )
return err ;
2016-09-14 17:48:04 +03:00
inode - > i_mtime = inode - > i_ctime = current_time ( inode ) ;
2016-07-01 05:09:37 +03:00
f2fs_mark_inode_dirty_sync ( inode ) ;
2015-08-24 12:39:42 +03:00
return 0 ;
2012-11-02 12:09:44 +04:00
}
2013-06-07 11:33:07 +04:00
int f2fs_getattr ( struct vfsmount * mnt ,
2012-11-02 12:09:44 +04:00
struct dentry * dentry , struct kstat * stat )
{
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2012-11-02 12:09:44 +04:00
generic_fillattr ( inode , stat ) ;
stat - > blocks < < = 3 ;
return 0 ;
}
# ifdef CONFIG_F2FS_FS_POSIX_ACL
static void __setattr_copy ( struct inode * inode , const struct iattr * attr )
{
unsigned int ia_valid = attr - > ia_valid ;
if ( ia_valid & ATTR_UID )
inode - > i_uid = attr - > ia_uid ;
if ( ia_valid & ATTR_GID )
inode - > i_gid = attr - > ia_gid ;
if ( ia_valid & ATTR_ATIME )
inode - > i_atime = timespec_trunc ( attr - > ia_atime ,
inode - > i_sb - > s_time_gran ) ;
if ( ia_valid & ATTR_MTIME )
inode - > i_mtime = timespec_trunc ( attr - > ia_mtime ,
inode - > i_sb - > s_time_gran ) ;
if ( ia_valid & ATTR_CTIME )
inode - > i_ctime = timespec_trunc ( attr - > ia_ctime ,
inode - > i_sb - > s_time_gran ) ;
if ( ia_valid & ATTR_MODE ) {
umode_t mode = attr - > ia_mode ;
if ( ! in_group_p ( inode - > i_gid ) & & ! capable ( CAP_FSETID ) )
mode & = ~ S_ISGID ;
2016-05-20 20:13:22 +03:00
set_acl_inode ( inode , mode ) ;
2012-11-02 12:09:44 +04:00
}
}
# else
# define __setattr_copy setattr_copy
# endif
int f2fs_setattr ( struct dentry * dentry , struct iattr * attr )
{
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2012-11-02 12:09:44 +04:00
int err ;
2016-05-26 17:55:18 +03:00
err = setattr_prepare ( dentry , attr ) ;
2012-11-02 12:09:44 +04:00
if ( err )
return err ;
2014-09-15 14:02:09 +04:00
if ( attr - > ia_valid & ATTR_SIZE ) {
2015-04-22 06:39:58 +03:00
if ( f2fs_encrypted_inode ( inode ) & &
2015-05-16 02:26:10 +03:00
fscrypt_get_encryption_info ( inode ) )
2015-04-22 06:39:58 +03:00
return - EACCES ;
2015-06-05 13:34:02 +03:00
if ( attr - > ia_size < = i_size_read ( inode ) ) {
2014-09-15 14:02:09 +04:00
truncate_setsize ( inode , attr - > ia_size ) ;
2016-06-02 23:49:38 +03:00
err = f2fs_truncate ( inode ) ;
2015-08-24 12:39:42 +03:00
if ( err )
return err ;
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( F2FS_I_SB ( inode ) , true ) ;
2014-09-15 14:02:09 +04:00
} else {
/*
2015-06-05 13:34:02 +03:00
* do not trim all blocks after i_size if target size is
* larger than i_size .
2014-09-15 14:02:09 +04:00
*/
2015-06-05 13:34:02 +03:00
truncate_setsize ( inode , attr - > ia_size ) ;
2015-12-01 06:36:16 +03:00
/* should convert inline inode here */
2015-12-22 22:09:35 +03:00
if ( ! f2fs_may_inline_data ( inode ) ) {
2015-12-01 06:36:16 +03:00
err = f2fs_convert_inline_inode ( inode ) ;
if ( err )
return err ;
}
2016-09-14 17:48:04 +03:00
inode - > i_mtime = inode - > i_ctime = current_time ( inode ) ;
2014-09-15 14:02:09 +04:00
}
2012-11-02 12:09:44 +04:00
}
__setattr_copy ( inode , attr ) ;
if ( attr - > ia_valid & ATTR_MODE ) {
2013-12-20 17:16:45 +04:00
err = posix_acl_chmod ( inode , get_inode_mode ( inode ) ) ;
2016-05-20 20:13:22 +03:00
if ( err | | is_inode_flag_set ( inode , FI_ACL_MODE ) ) {
inode - > i_mode = F2FS_I ( inode ) - > i_acl_mode ;
clear_inode_flag ( inode , FI_ACL_MODE ) ;
2012-11-02 12:09:44 +04:00
}
}
2016-07-01 05:09:37 +03:00
f2fs_mark_inode_dirty_sync ( inode ) ;
2012-11-02 12:09:44 +04:00
return err ;
}
const struct inode_operations f2fs_file_inode_operations = {
. getattr = f2fs_getattr ,
. setattr = f2fs_setattr ,
. get_acl = f2fs_get_acl ,
2013-12-20 17:16:45 +04:00
. set_acl = f2fs_set_acl ,
2012-11-02 12:09:44 +04:00
# ifdef CONFIG_F2FS_FS_XATTR
. listxattr = f2fs_listxattr ,
# endif
2014-06-07 23:30:14 +04:00
. fiemap = f2fs_fiemap ,
2012-11-02 12:09:44 +04:00
} ;
2015-08-07 13:36:06 +03:00
static int fill_zero ( struct inode * inode , pgoff_t index ,
2012-11-02 12:09:44 +04:00
loff_t start , loff_t len )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2012-11-02 12:09:44 +04:00
struct page * page ;
if ( ! len )
2015-08-07 13:36:06 +03:00
return 0 ;
2012-11-02 12:09:44 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2013-01-25 13:33:41 +04:00
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2013-05-20 04:55:50 +04:00
page = get_new_data_page ( inode , NULL , index , false ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:09:44 +04:00
2015-08-07 13:36:06 +03:00
if ( IS_ERR ( page ) )
return PTR_ERR ( page ) ;
2016-01-20 18:43:51 +03:00
f2fs_wait_on_page_writeback ( page , DATA , true ) ;
2015-08-07 13:36:06 +03:00
zero_user ( page , start , len ) ;
set_page_dirty ( page ) ;
f2fs_put_page ( page , 1 ) ;
return 0 ;
2012-11-02 12:09:44 +04:00
}
int truncate_hole ( struct inode * inode , pgoff_t pg_start , pgoff_t pg_end )
{
int err ;
2015-09-17 15:22:44 +03:00
while ( pg_start < pg_end ) {
2012-11-02 12:09:44 +04:00
struct dnode_of_data dn ;
2015-09-17 15:22:44 +03:00
pgoff_t end_offset , count ;
2013-01-11 09:09:38 +04:00
2012-11-02 12:09:44 +04:00
set_new_dnode ( & dn , inode , NULL , NULL , 0 ) ;
2015-09-17 15:22:44 +03:00
err = get_dnode_of_data ( & dn , pg_start , LOOKUP_NODE ) ;
2012-11-02 12:09:44 +04:00
if ( err ) {
2015-09-17 15:22:44 +03:00
if ( err = = - ENOENT ) {
pg_start + + ;
2012-11-02 12:09:44 +04:00
continue ;
2015-09-17 15:22:44 +03:00
}
2012-11-02 12:09:44 +04:00
return err ;
}
2016-01-26 10:39:35 +03:00
end_offset = ADDRS_PER_PAGE ( dn . node_page , inode ) ;
2015-09-17 15:22:44 +03:00
count = min ( end_offset - dn . ofs_in_node , pg_end - pg_start ) ;
f2fs_bug_on ( F2FS_I_SB ( inode ) , count = = 0 | | count > end_offset ) ;
truncate_data_blocks_range ( & dn , count ) ;
2012-11-02 12:09:44 +04:00
f2fs_put_dnode ( & dn ) ;
2015-09-17 15:22:44 +03:00
pg_start + = count ;
2012-11-02 12:09:44 +04:00
}
return 0 ;
}
2013-11-22 12:52:50 +04:00
static int punch_hole ( struct inode * inode , loff_t offset , loff_t len )
2012-11-02 12:09:44 +04:00
{
pgoff_t pg_start , pg_end ;
loff_t off_start , off_end ;
2015-12-22 22:09:35 +03:00
int ret ;
2012-11-02 12:09:44 +04:00
2015-12-22 22:09:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
return ret ;
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 19:13:20 +04:00
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pg_start = ( ( unsigned long long ) offset ) > > PAGE_SHIFT ;
pg_end = ( ( unsigned long long ) offset + len ) > > PAGE_SHIFT ;
2012-11-02 12:09:44 +04:00
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
off_start = offset & ( PAGE_SIZE - 1 ) ;
off_end = ( offset + len ) & ( PAGE_SIZE - 1 ) ;
2012-11-02 12:09:44 +04:00
if ( pg_start = = pg_end ) {
2015-08-07 13:36:06 +03:00
ret = fill_zero ( inode , pg_start , off_start ,
2012-11-02 12:09:44 +04:00
off_end - off_start ) ;
2015-08-07 13:36:06 +03:00
if ( ret )
return ret ;
2012-11-02 12:09:44 +04:00
} else {
2015-08-07 13:36:06 +03:00
if ( off_start ) {
ret = fill_zero ( inode , pg_start + + , off_start ,
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
PAGE_SIZE - off_start ) ;
2015-08-07 13:36:06 +03:00
if ( ret )
return ret ;
}
if ( off_end ) {
ret = fill_zero ( inode , pg_end , 0 , off_end ) ;
if ( ret )
return ret ;
}
2012-11-02 12:09:44 +04:00
if ( pg_start < pg_end ) {
struct address_space * mapping = inode - > i_mapping ;
loff_t blk_start , blk_end ;
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2013-04-09 05:16:44 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:09:44 +04:00
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
blk_start = ( loff_t ) pg_start < < PAGE_SHIFT ;
blk_end = ( loff_t ) pg_end < < PAGE_SHIFT ;
2012-11-02 12:09:44 +04:00
truncate_inode_pages_range ( mapping , blk_start ,
blk_end - 1 ) ;
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 11:21:29 +04:00
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:09:44 +04:00
ret = truncate_hole ( inode , pg_start , pg_end ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:09:44 +04:00
}
}
return ret ;
}
2016-07-09 03:42:21 +03:00
static int __read_out_blkaddrs ( struct inode * inode , block_t * blkaddr ,
int * do_replace , pgoff_t off , pgoff_t len )
2015-05-06 08:09:46 +03:00
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
struct dnode_of_data dn ;
2016-07-09 03:42:21 +03:00
int ret , done , i ;
2015-07-16 13:18:11 +03:00
2016-07-09 03:42:21 +03:00
next_dnode :
2015-10-07 22:28:41 +03:00
set_new_dnode ( & dn , inode , NULL , NULL , 0 ) ;
2016-07-09 03:42:21 +03:00
ret = get_dnode_of_data ( & dn , off , LOOKUP_NODE_RA ) ;
2015-10-07 22:28:41 +03:00
if ( ret & & ret ! = - ENOENT ) {
return ret ;
} else if ( ret = = - ENOENT ) {
2016-07-09 03:42:21 +03:00
if ( dn . max_level = = 0 )
return - ENOENT ;
done = min ( ( pgoff_t ) ADDRS_PER_BLOCK - dn . ofs_in_node , len ) ;
blkaddr + = done ;
do_replace + = done ;
goto next ;
}
done = min ( ( pgoff_t ) ADDRS_PER_PAGE ( dn . node_page , inode ) -
dn . ofs_in_node , len ) ;
for ( i = 0 ; i < done ; i + + , blkaddr + + , do_replace + + , dn . ofs_in_node + + ) {
* blkaddr = datablock_addr ( dn . node_page , dn . ofs_in_node ) ;
if ( ! is_checkpointed_data ( sbi , * blkaddr ) ) {
if ( test_opt ( sbi , LFS ) ) {
f2fs_put_dnode ( & dn ) ;
return - ENOTSUPP ;
}
2015-10-07 22:28:41 +03:00
/* do not invalidate this block address */
2016-02-24 12:16:47 +03:00
f2fs_update_data_blkaddr ( & dn , NULL_ADDR ) ;
2016-07-09 03:42:21 +03:00
* do_replace = 1 ;
2015-05-06 08:09:46 +03:00
}
2015-10-07 22:28:41 +03:00
}
2016-07-09 03:42:21 +03:00
f2fs_put_dnode ( & dn ) ;
next :
len - = done ;
off + = done ;
if ( len )
goto next_dnode ;
return 0 ;
}
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
static int __roll_back_blkaddrs ( struct inode * inode , block_t * blkaddr ,
int * do_replace , pgoff_t off , int len )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
struct dnode_of_data dn ;
int ret , i ;
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
for ( i = 0 ; i < len ; i + + , do_replace + + , blkaddr + + ) {
if ( * do_replace = = 0 )
continue ;
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
set_new_dnode ( & dn , inode , NULL , NULL , 0 ) ;
ret = get_dnode_of_data ( & dn , off + i , LOOKUP_NODE_RA ) ;
if ( ret ) {
dec_valid_block_count ( sbi , inode , 1 ) ;
invalidate_blocks ( sbi , * blkaddr ) ;
} else {
f2fs_update_data_blkaddr ( & dn , * blkaddr ) ;
2016-06-04 05:29:38 +03:00
}
2016-07-09 03:42:21 +03:00
f2fs_put_dnode ( & dn ) ;
}
return 0 ;
}
static int __clone_blkaddrs ( struct inode * src_inode , struct inode * dst_inode ,
block_t * blkaddr , int * do_replace ,
pgoff_t src , pgoff_t dst , pgoff_t len , bool full )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( src_inode ) ;
pgoff_t i = 0 ;
int ret ;
2016-06-04 05:29:38 +03:00
2016-07-09 03:42:21 +03:00
while ( i < len ) {
if ( blkaddr [ i ] = = NULL_ADDR & & ! full ) {
i + + ;
continue ;
2015-10-07 22:28:41 +03:00
}
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
if ( do_replace [ i ] | | blkaddr [ i ] = = NULL_ADDR ) {
struct dnode_of_data dn ;
struct node_info ni ;
size_t new_size ;
pgoff_t ilen ;
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
set_new_dnode ( & dn , dst_inode , NULL , NULL , 0 ) ;
ret = get_dnode_of_data ( & dn , dst + i , ALLOC_NODE ) ;
if ( ret )
return ret ;
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
get_node_info ( sbi , dn . nid , & ni ) ;
ilen = min ( ( pgoff_t )
ADDRS_PER_PAGE ( dn . node_page , dst_inode ) -
dn . ofs_in_node , len - i ) ;
do {
dn . data_blkaddr = datablock_addr ( dn . node_page ,
dn . ofs_in_node ) ;
truncate_data_blocks_range ( & dn , 1 ) ;
if ( do_replace [ i ] ) {
f2fs_i_blocks_write ( src_inode ,
1 , false ) ;
f2fs_i_blocks_write ( dst_inode ,
1 , true ) ;
f2fs_replace_block ( sbi , & dn , dn . data_blkaddr ,
blkaddr [ i ] , ni . version , true , false ) ;
do_replace [ i ] = 0 ;
}
dn . ofs_in_node + + ;
i + + ;
new_size = ( dst + i ) < < PAGE_SHIFT ;
if ( dst_inode - > i_size < new_size )
f2fs_i_size_write ( dst_inode , new_size ) ;
} while ( ( do_replace [ i ] | | blkaddr [ i ] = = NULL_ADDR ) & & - - ilen ) ;
2015-10-07 22:28:41 +03:00
2016-07-09 03:42:21 +03:00
f2fs_put_dnode ( & dn ) ;
} else {
struct page * psrc , * pdst ;
psrc = get_lock_data_page ( src_inode , src + i , true ) ;
if ( IS_ERR ( psrc ) )
return PTR_ERR ( psrc ) ;
pdst = get_new_data_page ( dst_inode , NULL , dst + i ,
true ) ;
if ( IS_ERR ( pdst ) ) {
f2fs_put_page ( psrc , 1 ) ;
return PTR_ERR ( pdst ) ;
}
f2fs_copy_page ( psrc , pdst ) ;
set_page_dirty ( pdst ) ;
f2fs_put_page ( pdst , 1 ) ;
2015-10-07 22:28:41 +03:00
f2fs_put_page ( psrc , 1 ) ;
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
ret = truncate_hole ( src_inode , src + i , src + i + 1 ) ;
if ( ret )
return ret ;
i + + ;
}
2015-10-07 22:28:41 +03:00
}
return 0 ;
2016-07-09 03:42:21 +03:00
}
2015-05-06 08:09:46 +03:00
2016-07-09 03:42:21 +03:00
static int __exchange_data_block ( struct inode * src_inode ,
struct inode * dst_inode , pgoff_t src , pgoff_t dst ,
2016-07-17 07:59:22 +03:00
pgoff_t len , bool full )
2016-07-09 03:42:21 +03:00
{
block_t * src_blkaddr ;
int * do_replace ;
2016-07-17 07:59:22 +03:00
pgoff_t olen ;
2016-07-09 03:42:21 +03:00
int ret ;
2016-07-17 07:59:22 +03:00
while ( len ) {
olen = min ( ( pgoff_t ) 4 * ADDRS_PER_BLOCK , len ) ;
2016-07-09 03:42:21 +03:00
2016-07-17 07:59:22 +03:00
src_blkaddr = f2fs_kvzalloc ( sizeof ( block_t ) * olen , GFP_KERNEL ) ;
if ( ! src_blkaddr )
return - ENOMEM ;
2016-07-09 03:42:21 +03:00
2016-07-17 07:59:22 +03:00
do_replace = f2fs_kvzalloc ( sizeof ( int ) * olen , GFP_KERNEL ) ;
if ( ! do_replace ) {
kvfree ( src_blkaddr ) ;
return - ENOMEM ;
}
2016-07-09 03:42:21 +03:00
2016-07-17 07:59:22 +03:00
ret = __read_out_blkaddrs ( src_inode , src_blkaddr ,
do_replace , src , olen ) ;
if ( ret )
goto roll_back ;
2016-07-09 03:42:21 +03:00
2016-07-17 07:59:22 +03:00
ret = __clone_blkaddrs ( src_inode , dst_inode , src_blkaddr ,
do_replace , src , dst , olen , full ) ;
if ( ret )
goto roll_back ;
src + = olen ;
dst + = olen ;
len - = olen ;
kvfree ( src_blkaddr ) ;
kvfree ( do_replace ) ;
}
2016-07-09 03:42:21 +03:00
return 0 ;
roll_back :
__roll_back_blkaddrs ( src_inode , src_blkaddr , do_replace , src , len ) ;
kvfree ( src_blkaddr ) ;
kvfree ( do_replace ) ;
2015-10-07 22:28:41 +03:00
return ret ;
}
2015-05-06 08:09:46 +03:00
2015-10-07 22:28:41 +03:00
static int f2fs_do_collapse ( struct inode * inode , pgoff_t start , pgoff_t end )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
pgoff_t nrpages = ( i_size_read ( inode ) + PAGE_SIZE - 1 ) / PAGE_SIZE ;
2016-07-09 03:42:21 +03:00
int ret ;
2015-10-07 22:28:41 +03:00
2016-07-09 03:42:21 +03:00
f2fs_balance_fs ( sbi , true ) ;
f2fs_lock_op ( sbi ) ;
2016-07-12 21:07:52 +03:00
f2fs_drop_extent_tree ( inode ) ;
2016-07-09 03:42:21 +03:00
ret = __exchange_data_block ( inode , inode , end , start , nrpages - end , true ) ;
f2fs_unlock_op ( sbi ) ;
2015-05-06 08:09:46 +03:00
return ret ;
}
static int f2fs_collapse_range ( struct inode * inode , loff_t offset , loff_t len )
{
pgoff_t pg_start , pg_end ;
loff_t new_size ;
int ret ;
if ( offset + len > = i_size_read ( inode ) )
return - EINVAL ;
/* collapse range should be aligned to block size of f2fs. */
if ( offset & ( F2FS_BLKSIZE - 1 ) | | len & ( F2FS_BLKSIZE - 1 ) )
return - EINVAL ;
2015-12-22 22:09:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
return ret ;
2015-06-17 23:59:05 +03:00
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pg_start = offset > > PAGE_SHIFT ;
pg_end = ( offset + len ) > > PAGE_SHIFT ;
2015-05-06 08:09:46 +03:00
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range ( inode - > i_mapping , offset , LLONG_MAX ) ;
if ( ret )
return ret ;
truncate_pagecache ( inode , offset ) ;
ret = f2fs_do_collapse ( inode , pg_start , pg_end ) ;
if ( ret )
return ret ;
2015-10-07 22:28:41 +03:00
/* write out all moved pages, if possible */
filemap_write_and_wait_range ( inode - > i_mapping , offset , LLONG_MAX ) ;
truncate_pagecache ( inode , offset ) ;
2015-05-06 08:09:46 +03:00
new_size = i_size_read ( inode ) - len ;
2015-10-07 22:28:41 +03:00
truncate_pagecache ( inode , new_size ) ;
2015-05-06 08:09:46 +03:00
ret = truncate_blocks ( inode , new_size , true ) ;
if ( ! ret )
2016-05-20 19:22:03 +03:00
f2fs_i_size_write ( inode , new_size ) ;
2015-05-06 08:09:46 +03:00
return ret ;
}
2016-05-09 14:56:31 +03:00
static int f2fs_do_zero_range ( struct dnode_of_data * dn , pgoff_t start ,
pgoff_t end )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( dn - > inode ) ;
pgoff_t index = start ;
unsigned int ofs_in_node = dn - > ofs_in_node ;
blkcnt_t count = 0 ;
int ret ;
for ( ; index < end ; index + + , dn - > ofs_in_node + + ) {
if ( datablock_addr ( dn - > node_page , dn - > ofs_in_node ) = = NULL_ADDR )
count + + ;
}
dn - > ofs_in_node = ofs_in_node ;
ret = reserve_new_blocks ( dn , count ) ;
if ( ret )
return ret ;
dn - > ofs_in_node = ofs_in_node ;
for ( index = start ; index < end ; index + + , dn - > ofs_in_node + + ) {
dn - > data_blkaddr =
datablock_addr ( dn - > node_page , dn - > ofs_in_node ) ;
/*
* reserve_new_blocks will not guarantee entire block
* allocation .
*/
if ( dn - > data_blkaddr = = NULL_ADDR ) {
ret = - ENOSPC ;
break ;
}
if ( dn - > data_blkaddr ! = NEW_ADDR ) {
invalidate_blocks ( sbi , dn - > data_blkaddr ) ;
dn - > data_blkaddr = NEW_ADDR ;
set_data_blkaddr ( dn ) ;
}
}
f2fs_update_extent_cache_range ( dn , start , 0 , index - start ) ;
return ret ;
}
2015-05-06 08:11:13 +03:00
static int f2fs_zero_range ( struct inode * inode , loff_t offset , loff_t len ,
int mode )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
struct address_space * mapping = inode - > i_mapping ;
pgoff_t index , pg_start , pg_end ;
loff_t new_size = i_size_read ( inode ) ;
loff_t off_start , off_end ;
int ret = 0 ;
ret = inode_newsize_ok ( inode , ( len + offset ) ) ;
if ( ret )
return ret ;
2015-12-22 22:09:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
return ret ;
2015-05-06 08:11:13 +03:00
ret = filemap_write_and_wait_range ( mapping , offset , offset + len - 1 ) ;
if ( ret )
return ret ;
truncate_pagecache_range ( inode , offset , offset + len - 1 ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pg_start = ( ( unsigned long long ) offset ) > > PAGE_SHIFT ;
pg_end = ( ( unsigned long long ) offset + len ) > > PAGE_SHIFT ;
2015-05-06 08:11:13 +03:00
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
off_start = offset & ( PAGE_SIZE - 1 ) ;
off_end = ( offset + len ) & ( PAGE_SIZE - 1 ) ;
2015-05-06 08:11:13 +03:00
if ( pg_start = = pg_end ) {
2015-08-07 13:36:06 +03:00
ret = fill_zero ( inode , pg_start , off_start ,
off_end - off_start ) ;
if ( ret )
return ret ;
2015-05-06 08:11:13 +03:00
if ( offset + len > new_size )
new_size = offset + len ;
new_size = max_t ( loff_t , new_size , offset + len ) ;
} else {
if ( off_start ) {
2015-08-07 13:36:06 +03:00
ret = fill_zero ( inode , pg_start + + , off_start ,
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
PAGE_SIZE - off_start ) ;
2015-08-07 13:36:06 +03:00
if ( ret )
return ret ;
2015-05-06 08:11:13 +03:00
new_size = max_t ( loff_t , new_size ,
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
( loff_t ) pg_start < < PAGE_SHIFT ) ;
2015-05-06 08:11:13 +03:00
}
2016-05-09 14:56:31 +03:00
for ( index = pg_start ; index < pg_end ; ) {
2015-05-06 08:11:13 +03:00
struct dnode_of_data dn ;
2016-05-09 14:56:31 +03:00
unsigned int end_offset ;
pgoff_t end ;
2015-05-06 08:11:13 +03:00
f2fs_lock_op ( sbi ) ;
2016-05-09 14:56:31 +03:00
set_new_dnode ( & dn , inode , NULL , NULL , 0 ) ;
ret = get_dnode_of_data ( & dn , index , ALLOC_NODE ) ;
2015-05-06 08:11:13 +03:00
if ( ret ) {
f2fs_unlock_op ( sbi ) ;
goto out ;
}
2016-05-09 14:56:31 +03:00
end_offset = ADDRS_PER_PAGE ( dn . node_page , inode ) ;
end = min ( pg_end , end_offset - dn . ofs_in_node + index ) ;
ret = f2fs_do_zero_range ( & dn , index , end ) ;
2015-05-06 08:11:13 +03:00
f2fs_put_dnode ( & dn ) ;
f2fs_unlock_op ( sbi ) ;
2016-05-09 14:56:31 +03:00
if ( ret )
goto out ;
2015-05-06 08:11:13 +03:00
2016-05-09 14:56:31 +03:00
index = end ;
2015-05-06 08:11:13 +03:00
new_size = max_t ( loff_t , new_size ,
2016-05-09 14:56:31 +03:00
( loff_t ) index < < PAGE_SHIFT ) ;
2015-05-06 08:11:13 +03:00
}
if ( off_end ) {
2015-08-07 13:36:06 +03:00
ret = fill_zero ( inode , pg_end , 0 , off_end ) ;
if ( ret )
goto out ;
2015-05-06 08:11:13 +03:00
new_size = max_t ( loff_t , new_size , offset + len ) ;
}
}
out :
2016-05-21 02:32:49 +03:00
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) & & i_size_read ( inode ) < new_size )
2016-05-20 19:22:03 +03:00
f2fs_i_size_write ( inode , new_size ) ;
2015-05-06 08:11:13 +03:00
return ret ;
}
2015-05-28 14:16:57 +03:00
static int f2fs_insert_range ( struct inode * inode , loff_t offset , loff_t len )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2016-07-09 03:42:21 +03:00
pgoff_t nr , pg_start , pg_end , delta , idx ;
2015-05-28 14:16:57 +03:00
loff_t new_size ;
2015-10-07 22:28:41 +03:00
int ret = 0 ;
2015-05-28 14:16:57 +03:00
new_size = i_size_read ( inode ) + len ;
if ( new_size > inode - > i_sb - > s_maxbytes )
return - EFBIG ;
if ( offset > = i_size_read ( inode ) )
return - EINVAL ;
/* insert range should be aligned to block size of f2fs. */
if ( offset & ( F2FS_BLKSIZE - 1 ) | | len & ( F2FS_BLKSIZE - 1 ) )
return - EINVAL ;
2015-12-22 22:09:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
return ret ;
2015-06-17 23:59:05 +03:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-23 00:23:35 +03:00
2015-05-28 14:16:57 +03:00
ret = truncate_blocks ( inode , i_size_read ( inode ) , true ) ;
if ( ret )
return ret ;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range ( inode - > i_mapping , offset , LLONG_MAX ) ;
if ( ret )
return ret ;
truncate_pagecache ( inode , offset ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pg_start = offset > > PAGE_SHIFT ;
pg_end = ( offset + len ) > > PAGE_SHIFT ;
2015-05-28 14:16:57 +03:00
delta = pg_end - pg_start ;
2016-07-09 03:42:21 +03:00
idx = ( i_size_read ( inode ) + PAGE_SIZE - 1 ) / PAGE_SIZE ;
while ( ! ret & & idx > pg_start ) {
nr = idx - pg_start ;
if ( nr > delta )
nr = delta ;
idx - = nr ;
2015-05-28 14:16:57 +03:00
f2fs_lock_op ( sbi ) ;
2016-07-12 21:07:52 +03:00
f2fs_drop_extent_tree ( inode ) ;
2016-07-09 03:42:21 +03:00
ret = __exchange_data_block ( inode , inode , idx ,
idx + delta , nr , false ) ;
2015-05-28 14:16:57 +03:00
f2fs_unlock_op ( sbi ) ;
}
2015-10-07 22:28:41 +03:00
/* write out all moved pages, if possible */
filemap_write_and_wait_range ( inode - > i_mapping , offset , LLONG_MAX ) ;
truncate_pagecache ( inode , offset ) ;
if ( ! ret )
2016-05-20 19:22:03 +03:00
f2fs_i_size_write ( inode , new_size ) ;
2015-05-28 14:16:57 +03:00
return ret ;
}
2012-11-02 12:09:44 +04:00
static int expand_inode_data ( struct inode * inode , loff_t offset ,
loff_t len , int mode )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2016-05-07 01:30:38 +03:00
struct f2fs_map_blocks map = { . m_next_pgofs = NULL } ;
pgoff_t pg_end ;
2012-11-02 12:09:44 +04:00
loff_t new_size = i_size_read ( inode ) ;
2016-05-07 01:30:38 +03:00
loff_t off_end ;
int ret ;
2012-11-02 12:09:44 +04:00
ret = inode_newsize_ok ( inode , ( len + offset ) ) ;
if ( ret )
return ret ;
2015-12-22 22:09:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
return ret ;
2013-12-27 07:28:59 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-23 00:23:35 +03:00
2016-05-07 01:30:38 +03:00
pg_end = ( ( unsigned long long ) offset + len ) > > PAGE_SHIFT ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
off_end = ( offset + len ) & ( PAGE_SIZE - 1 ) ;
2012-11-02 12:09:44 +04:00
2016-05-07 01:30:38 +03:00
map . m_lblk = ( ( unsigned long long ) offset ) > > PAGE_SHIFT ;
map . m_len = pg_end - map . m_lblk ;
if ( off_end )
map . m_len + + ;
2014-06-13 08:05:55 +04:00
2016-05-07 01:30:38 +03:00
ret = f2fs_map_blocks ( inode , & map , 1 , F2FS_GET_BLOCK_PRE_AIO ) ;
if ( ret ) {
pgoff_t last_off ;
2012-11-02 12:09:44 +04:00
2016-05-07 01:30:38 +03:00
if ( ! map . m_len )
return ret ;
2014-06-13 08:07:31 +04:00
2016-05-07 01:30:38 +03:00
last_off = map . m_lblk + map . m_len - 1 ;
/* update new size to the failed position */
new_size = ( last_off = = pg_end ) ? offset + len :
( loff_t ) ( last_off + 1 ) < < PAGE_SHIFT ;
} else {
new_size = ( ( loff_t ) pg_end < < PAGE_SHIFT ) + off_end ;
2012-11-02 12:09:44 +04:00
}
2016-05-21 02:32:49 +03:00
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) & & i_size_read ( inode ) < new_size )
2016-05-20 19:22:03 +03:00
f2fs_i_size_write ( inode , new_size ) ;
2012-11-02 12:09:44 +04:00
return ret ;
}
static long f2fs_fallocate ( struct file * file , int mode ,
loff_t offset , loff_t len )
{
2013-02-28 01:59:05 +04:00
struct inode * inode = file_inode ( file ) ;
2015-04-21 09:59:12 +03:00
long ret = 0 ;
2012-11-02 12:09:44 +04:00
2015-09-11 09:39:02 +03:00
/* f2fs only support ->fallocate for regular file */
if ( ! S_ISREG ( inode - > i_mode ) )
return - EINVAL ;
2015-05-28 14:16:57 +03:00
if ( f2fs_encrypted_inode ( inode ) & &
( mode & ( FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE ) ) )
2015-04-22 06:39:58 +03:00
return - EOPNOTSUPP ;
2015-05-06 08:09:46 +03:00
if ( mode & ~ ( FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
2015-05-28 14:16:57 +03:00
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE ) )
2012-11-02 12:09:44 +04:00
return - EOPNOTSUPP ;
2016-01-22 23:40:57 +03:00
inode_lock ( inode ) ;
2014-01-28 06:29:26 +04:00
2015-04-21 09:59:12 +03:00
if ( mode & FALLOC_FL_PUNCH_HOLE ) {
if ( offset > = inode - > i_size )
goto out ;
2013-11-22 12:52:50 +04:00
ret = punch_hole ( inode , offset , len ) ;
2015-05-06 08:09:46 +03:00
} else if ( mode & FALLOC_FL_COLLAPSE_RANGE ) {
ret = f2fs_collapse_range ( inode , offset , len ) ;
2015-05-06 08:11:13 +03:00
} else if ( mode & FALLOC_FL_ZERO_RANGE ) {
ret = f2fs_zero_range ( inode , offset , len , mode ) ;
2015-05-28 14:16:57 +03:00
} else if ( mode & FALLOC_FL_INSERT_RANGE ) {
ret = f2fs_insert_range ( inode , offset , len ) ;
2015-05-06 08:09:46 +03:00
} else {
2012-11-02 12:09:44 +04:00
ret = expand_inode_data ( inode , offset , len , mode ) ;
2015-05-06 08:09:46 +03:00
}
2012-11-02 12:09:44 +04:00
2012-12-30 09:52:37 +04:00
if ( ! ret ) {
2016-09-14 17:48:04 +03:00
inode - > i_mtime = inode - > i_ctime = current_time ( inode ) ;
2016-07-01 05:09:37 +03:00
f2fs_mark_inode_dirty_sync ( inode ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( F2FS_I_SB ( inode ) , REQ_TIME ) ;
2012-12-30 09:52:37 +04:00
}
2014-01-28 06:29:26 +04:00
2015-04-21 09:59:12 +03:00
out :
2016-01-22 23:40:57 +03:00
inode_unlock ( inode ) ;
2014-01-28 06:29:26 +04:00
2013-04-23 12:00:52 +04:00
trace_f2fs_fallocate ( inode , mode , offset , len , ret ) ;
2012-11-02 12:09:44 +04:00
return ret ;
}
2014-12-09 17:08:59 +03:00
static int f2fs_release_file ( struct inode * inode , struct file * filp )
{
2016-04-11 21:51:51 +03:00
/*
* f2fs_relase_file is called at every close calls . So we should
* not drop any inmemory pages by close called by other process .
*/
if ( ! ( filp - > f_mode & FMODE_WRITE ) | |
atomic_read ( & inode - > i_writecount ) ! = 1 )
return 0 ;
2014-12-09 17:08:59 +03:00
/* some remained atomic pages should discarded */
if ( f2fs_is_atomic_file ( inode ) )
2016-02-06 09:38:29 +03:00
drop_inmem_pages ( inode ) ;
2014-12-09 17:08:59 +03:00
if ( f2fs_is_volatile_file ( inode ) ) {
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_VOLATILE_FILE ) ;
set_inode_flag ( inode , FI_DROP_CACHE ) ;
2014-12-09 17:08:59 +03:00
filemap_fdatawrite ( inode - > i_mapping ) ;
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_DROP_CACHE ) ;
2014-12-09 17:08:59 +03:00
}
return 0 ;
}
2012-11-02 12:09:44 +04:00
# define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
# define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
static inline __u32 f2fs_mask_flags ( umode_t mode , __u32 flags )
{
if ( S_ISDIR ( mode ) )
return flags ;
else if ( S_ISREG ( mode ) )
return flags & F2FS_REG_FLMASK ;
else
return flags & F2FS_OTHER_FLMASK ;
}
2014-09-25 02:37:02 +04:00
static int f2fs_ioc_getflags ( struct file * filp , unsigned long arg )
2012-11-02 12:09:44 +04:00
{
2013-02-28 01:59:05 +04:00
struct inode * inode = file_inode ( filp ) ;
2012-11-02 12:09:44 +04:00
struct f2fs_inode_info * fi = F2FS_I ( inode ) ;
2014-09-25 02:37:02 +04:00
unsigned int flags = fi - > i_flags & FS_FL_USER_VISIBLE ;
return put_user ( flags , ( int __user * ) arg ) ;
}
2012-11-02 12:09:44 +04:00
2014-09-25 02:37:02 +04:00
static int f2fs_ioc_setflags ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct f2fs_inode_info * fi = F2FS_I ( inode ) ;
2016-08-23 15:10:47 +03:00
unsigned int flags ;
2014-09-25 02:37:02 +04:00
unsigned int oldflags ;
int ret ;
2012-11-02 12:09:44 +04:00
2016-05-09 14:56:32 +03:00
if ( ! inode_owner_or_capable ( inode ) )
return - EACCES ;
if ( get_user ( flags , ( int __user * ) arg ) )
return - EFAULT ;
2014-09-25 02:37:02 +04:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2012-11-02 12:09:44 +04:00
2014-09-25 02:37:02 +04:00
flags = f2fs_mask_flags ( inode - > i_mode , flags ) ;
2012-11-02 12:09:44 +04:00
2016-01-22 23:40:57 +03:00
inode_lock ( inode ) ;
2012-11-02 12:09:44 +04:00
2014-09-25 02:37:02 +04:00
oldflags = fi - > i_flags ;
2012-11-02 12:09:44 +04:00
2014-09-25 02:37:02 +04:00
if ( ( flags ^ oldflags ) & ( FS_APPEND_FL | FS_IMMUTABLE_FL ) ) {
if ( ! capable ( CAP_LINUX_IMMUTABLE ) ) {
2016-01-22 23:40:57 +03:00
inode_unlock ( inode ) ;
2014-09-25 02:37:02 +04:00
ret = - EPERM ;
goto out ;
2012-11-02 12:09:44 +04:00
}
2014-09-25 02:37:02 +04:00
}
2012-11-02 12:09:44 +04:00
2014-09-25 02:37:02 +04:00
flags = flags & FS_FL_USER_MODIFIABLE ;
flags | = oldflags & ~ FS_FL_USER_MODIFIABLE ;
fi - > i_flags = flags ;
2016-01-22 23:40:57 +03:00
inode_unlock ( inode ) ;
2012-11-02 12:09:44 +04:00
2016-09-14 17:48:04 +03:00
inode - > i_ctime = current_time ( inode ) ;
2016-05-20 19:52:20 +03:00
f2fs_set_inode_flags ( inode ) ;
2012-11-02 12:09:44 +04:00
out :
2014-09-25 02:37:02 +04:00
mnt_drop_write_file ( filp ) ;
return ret ;
}
2014-09-21 09:06:39 +04:00
2015-01-23 15:36:04 +03:00
static int f2fs_ioc_getversion ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
return put_user ( inode - > i_generation , ( int __user * ) arg ) ;
}
2014-10-07 04:39:50 +04:00
static int f2fs_ioc_start_atomic_write ( struct file * filp )
{
struct inode * inode = file_inode ( filp ) ;
2015-07-17 13:06:35 +03:00
int ret ;
2014-10-07 04:39:50 +04:00
if ( ! inode_owner_or_capable ( inode ) )
return - EACCES ;
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2016-05-09 14:56:33 +03:00
inode_lock ( inode ) ;
2014-12-09 17:08:59 +03:00
if ( f2fs_is_atomic_file ( inode ) )
2016-05-09 14:56:32 +03:00
goto out ;
2014-10-07 04:39:50 +04:00
2015-07-17 13:06:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
2016-05-09 14:56:32 +03:00
goto out ;
2014-10-07 04:39:50 +04:00
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_ATOMIC_FILE ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( F2FS_I_SB ( inode ) , REQ_TIME ) ;
2016-04-13 00:36:11 +03:00
if ( ! get_dirty_pages ( inode ) )
2016-05-09 14:56:32 +03:00
goto out ;
2016-04-13 00:36:11 +03:00
f2fs_msg ( F2FS_I_SB ( inode ) - > sb , KERN_WARNING ,
2016-05-13 22:47:11 +03:00
" Unexpected flush for atomic writes: ino=%lu, npages=%lld " ,
2016-04-13 00:36:11 +03:00
inode - > i_ino , get_dirty_pages ( inode ) ) ;
ret = filemap_write_and_wait_range ( inode - > i_mapping , 0 , LLONG_MAX ) ;
if ( ret )
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_ATOMIC_FILE ) ;
2016-05-09 14:56:32 +03:00
out :
2016-05-09 14:56:33 +03:00
inode_unlock ( inode ) ;
2016-05-09 14:56:32 +03:00
mnt_drop_write_file ( filp ) ;
2016-04-13 00:36:11 +03:00
return ret ;
2014-10-07 04:39:50 +04:00
}
static int f2fs_ioc_commit_atomic_write ( struct file * filp )
{
struct inode * inode = file_inode ( filp ) ;
int ret ;
if ( ! inode_owner_or_capable ( inode ) )
return - EACCES ;
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2016-05-09 14:56:33 +03:00
inode_lock ( inode ) ;
2016-05-09 14:56:32 +03:00
if ( f2fs_is_volatile_file ( inode ) )
goto err_out ;
2015-07-25 10:29:17 +03:00
if ( f2fs_is_atomic_file ( inode ) ) {
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_ATOMIC_FILE ) ;
2016-02-06 09:38:29 +03:00
ret = commit_inmem_pages ( inode ) ;
2016-01-10 04:08:38 +03:00
if ( ret ) {
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_ATOMIC_FILE ) ;
2015-07-25 10:52:52 +03:00
goto err_out ;
2016-01-10 04:08:38 +03:00
}
2015-07-25 10:29:17 +03:00
}
2014-10-07 04:39:50 +04:00
2016-04-15 19:43:17 +03:00
ret = f2fs_do_sync_file ( filp , 0 , LLONG_MAX , 0 , true ) ;
2015-07-25 10:52:52 +03:00
err_out :
2016-05-09 14:56:33 +03:00
inode_unlock ( inode ) ;
2014-10-07 04:39:50 +04:00
mnt_drop_write_file ( filp ) ;
return ret ;
}
2014-10-07 03:11:16 +04:00
static int f2fs_ioc_start_volatile_write ( struct file * filp )
{
struct inode * inode = file_inode ( filp ) ;
2015-07-17 13:06:35 +03:00
int ret ;
2014-10-07 03:11:16 +04:00
if ( ! inode_owner_or_capable ( inode ) )
return - EACCES ;
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2016-05-09 14:56:33 +03:00
inode_lock ( inode ) ;
2014-12-09 17:08:59 +03:00
if ( f2fs_is_volatile_file ( inode ) )
2016-05-09 14:56:32 +03:00
goto out ;
2014-12-09 17:08:59 +03:00
2015-07-17 13:06:35 +03:00
ret = f2fs_convert_inline_inode ( inode ) ;
if ( ret )
2016-05-09 14:56:32 +03:00
goto out ;
2014-10-24 06:48:09 +04:00
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_VOLATILE_FILE ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( F2FS_I_SB ( inode ) , REQ_TIME ) ;
2016-05-09 14:56:32 +03:00
out :
2016-05-09 14:56:33 +03:00
inode_unlock ( inode ) ;
2016-05-09 14:56:32 +03:00
mnt_drop_write_file ( filp ) ;
return ret ;
2014-10-07 03:11:16 +04:00
}
2014-12-09 17:08:59 +03:00
static int f2fs_ioc_release_volatile_write ( struct file * filp )
{
struct inode * inode = file_inode ( filp ) ;
2016-05-09 14:56:32 +03:00
int ret ;
2014-12-09 17:08:59 +03:00
if ( ! inode_owner_or_capable ( inode ) )
return - EACCES ;
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2016-05-09 14:56:33 +03:00
inode_lock ( inode ) ;
2014-12-09 17:08:59 +03:00
if ( ! f2fs_is_volatile_file ( inode ) )
2016-05-09 14:56:32 +03:00
goto out ;
2014-12-09 17:08:59 +03:00
2016-05-09 14:56:32 +03:00
if ( ! f2fs_is_first_block_written ( inode ) ) {
ret = truncate_partial_data_page ( inode , 0 , true ) ;
goto out ;
}
2015-03-18 03:16:35 +03:00
2016-05-09 14:56:32 +03:00
ret = punch_hole ( inode , 0 , F2FS_BLKSIZE ) ;
out :
2016-05-09 14:56:33 +03:00
inode_unlock ( inode ) ;
2016-05-09 14:56:32 +03:00
mnt_drop_write_file ( filp ) ;
return ret ;
2014-12-09 17:08:59 +03:00
}
static int f2fs_ioc_abort_volatile_write ( struct file * filp )
{
struct inode * inode = file_inode ( filp ) ;
int ret ;
if ( ! inode_owner_or_capable ( inode ) )
return - EACCES ;
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2016-05-09 14:56:33 +03:00
inode_lock ( inode ) ;
2016-04-11 23:15:10 +03:00
if ( f2fs_is_atomic_file ( inode ) )
2016-02-06 09:38:29 +03:00
drop_inmem_pages ( inode ) ;
2015-12-30 02:46:33 +03:00
if ( f2fs_is_volatile_file ( inode ) ) {
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_VOLATILE_FILE ) ;
2016-04-15 19:43:17 +03:00
ret = f2fs_do_sync_file ( filp , 0 , LLONG_MAX , 0 , true ) ;
2015-12-30 02:46:33 +03:00
}
2015-06-09 03:51:10 +03:00
2016-05-09 14:56:33 +03:00
inode_unlock ( inode ) ;
2014-12-09 17:08:59 +03:00
mnt_drop_write_file ( filp ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( F2FS_I_SB ( inode ) , REQ_TIME ) ;
2014-12-09 17:08:59 +03:00
return ret ;
}
2015-01-09 06:15:53 +03:00
static int f2fs_ioc_shutdown ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
struct super_block * sb = sbi - > sb ;
__u32 in ;
2016-05-09 14:56:32 +03:00
int ret ;
2015-01-09 06:15:53 +03:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
if ( get_user ( in , ( __u32 __user * ) arg ) )
return - EFAULT ;
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2015-01-09 06:15:53 +03:00
switch ( in ) {
case F2FS_GOING_DOWN_FULLSYNC :
sb = freeze_bdev ( sb - > s_bdev ) ;
if ( sb & & ! IS_ERR ( sb ) ) {
2016-05-19 00:07:56 +03:00
f2fs_stop_checkpoint ( sbi , false ) ;
2015-01-09 06:15:53 +03:00
thaw_bdev ( sb - > s_bdev , sb ) ;
}
break ;
case F2FS_GOING_DOWN_METASYNC :
/* do checkpoint only */
f2fs_sync_fs ( sb , 1 ) ;
2016-05-19 00:07:56 +03:00
f2fs_stop_checkpoint ( sbi , false ) ;
2015-01-09 06:15:53 +03:00
break ;
case F2FS_GOING_DOWN_NOSYNC :
2016-05-19 00:07:56 +03:00
f2fs_stop_checkpoint ( sbi , false ) ;
2015-01-09 06:15:53 +03:00
break ;
2015-10-07 19:46:37 +03:00
case F2FS_GOING_DOWN_METAFLUSH :
sync_meta_pages ( sbi , META , LONG_MAX ) ;
2016-05-19 00:07:56 +03:00
f2fs_stop_checkpoint ( sbi , false ) ;
2015-10-07 19:46:37 +03:00
break ;
2015-01-09 06:15:53 +03:00
default :
2016-05-09 14:56:32 +03:00
ret = - EINVAL ;
goto out ;
2015-01-09 06:15:53 +03:00
}
2016-01-09 03:57:48 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2016-05-09 14:56:32 +03:00
out :
mnt_drop_write_file ( filp ) ;
return ret ;
2015-01-09 06:15:53 +03:00
}
2014-09-25 02:37:02 +04:00
static int f2fs_ioc_fitrim ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct super_block * sb = inode - > i_sb ;
struct request_queue * q = bdev_get_queue ( sb - > s_bdev ) ;
struct fstrim_range range ;
int ret ;
2014-09-21 09:06:39 +04:00
2014-09-25 02:37:02 +04:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
2014-09-21 09:06:39 +04:00
2014-09-25 02:37:02 +04:00
if ( ! blk_queue_discard ( q ) )
return - EOPNOTSUPP ;
2014-09-21 09:06:39 +04:00
2014-09-25 02:37:02 +04:00
if ( copy_from_user ( & range , ( struct fstrim_range __user * ) arg ,
sizeof ( range ) ) )
return - EFAULT ;
2014-09-21 09:06:39 +04:00
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2014-09-25 02:37:02 +04:00
range . minlen = max ( ( unsigned int ) range . minlen ,
q - > limits . discard_granularity ) ;
ret = f2fs_trim_fs ( F2FS_SB ( sb ) , & range ) ;
2016-05-09 14:56:32 +03:00
mnt_drop_write_file ( filp ) ;
2014-09-25 02:37:02 +04:00
if ( ret < 0 )
return ret ;
2014-09-21 09:06:39 +04:00
2014-09-25 02:37:02 +04:00
if ( copy_to_user ( ( struct fstrim_range __user * ) arg , & range ,
sizeof ( range ) ) )
return - EFAULT ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( F2FS_I_SB ( inode ) , REQ_TIME ) ;
2014-09-25 02:37:02 +04:00
return 0 ;
}
2015-04-21 01:19:06 +03:00
static bool uuid_is_nonzero ( __u8 u [ 16 ] )
{
int i ;
for ( i = 0 ; i < 16 ; i + + )
if ( u [ i ] )
return true ;
return false ;
}
static int f2fs_ioc_set_encryption_policy ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( F2FS_I_SB ( inode ) , REQ_TIME ) ;
2016-05-09 14:56:32 +03:00
2016-11-27 03:07:49 +03:00
return fscrypt_ioctl_set_policy ( filp , ( const void __user * ) arg ) ;
2015-04-21 01:19:06 +03:00
}
static int f2fs_ioc_get_encryption_policy ( struct file * filp , unsigned long arg )
{
2016-11-27 03:07:49 +03:00
return fscrypt_ioctl_get_policy ( filp , ( void __user * ) arg ) ;
2015-04-21 01:19:06 +03:00
}
static int f2fs_ioc_get_encryption_pwsalt ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
int err ;
if ( ! f2fs_sb_has_crypto ( inode - > i_sb ) )
return - EOPNOTSUPP ;
if ( uuid_is_nonzero ( sbi - > raw_super - > encrypt_pw_salt ) )
goto got_it ;
err = mnt_want_write_file ( filp ) ;
if ( err )
return err ;
/* update superblock with uuid */
generate_random_uuid ( sbi - > raw_super - > encrypt_pw_salt ) ;
2015-06-08 08:28:03 +03:00
err = f2fs_commit_super ( sbi , false ) ;
2015-04-21 01:19:06 +03:00
if ( err ) {
/* undo new data */
memset ( sbi - > raw_super - > encrypt_pw_salt , 0 , 16 ) ;
2015-12-15 12:19:26 +03:00
mnt_drop_write_file ( filp ) ;
2015-04-21 01:19:06 +03:00
return err ;
}
2015-12-15 12:19:26 +03:00
mnt_drop_write_file ( filp ) ;
2015-04-21 01:19:06 +03:00
got_it :
if ( copy_to_user ( ( __u8 __user * ) arg , sbi - > raw_super - > encrypt_pw_salt ,
16 ) )
return - EFAULT ;
return 0 ;
}
2015-07-10 13:08:10 +03:00
static int f2fs_ioc_gc ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2015-10-05 17:22:44 +03:00
__u32 sync ;
2016-05-09 14:56:32 +03:00
int ret ;
2015-07-10 13:08:10 +03:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
2015-10-05 17:22:44 +03:00
if ( get_user ( sync , ( __u32 __user * ) arg ) )
2015-07-10 13:08:10 +03:00
return - EFAULT ;
2015-10-05 17:22:44 +03:00
if ( f2fs_readonly ( sbi - > sb ) )
return - EROFS ;
2015-07-10 13:08:10 +03:00
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
2015-10-05 17:22:44 +03:00
if ( ! sync ) {
2016-05-09 14:56:32 +03:00
if ( ! mutex_trylock ( & sbi - > gc_mutex ) ) {
ret = - EBUSY ;
goto out ;
}
2015-10-05 17:22:44 +03:00
} else {
mutex_lock ( & sbi - > gc_mutex ) ;
2015-07-10 13:08:10 +03:00
}
2016-05-09 14:56:32 +03:00
ret = f2fs_gc ( sbi , sync ) ;
out :
mnt_drop_write_file ( filp ) ;
return ret ;
2015-07-10 13:08:10 +03:00
}
2015-10-05 17:24:19 +03:00
static int f2fs_ioc_write_checkpoint ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
2016-05-09 14:56:32 +03:00
int ret ;
2015-10-05 17:24:19 +03:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
if ( f2fs_readonly ( sbi - > sb ) )
return - EROFS ;
2016-05-09 14:56:32 +03:00
ret = mnt_want_write_file ( filp ) ;
if ( ret )
return ret ;
ret = f2fs_sync_fs ( sbi - > sb , 1 ) ;
mnt_drop_write_file ( filp ) ;
return ret ;
2015-10-05 17:24:19 +03:00
}
2015-10-27 04:53:45 +03:00
static int f2fs_defragment_range ( struct f2fs_sb_info * sbi ,
struct file * filp ,
struct f2fs_defragment * range )
{
struct inode * inode = file_inode ( filp ) ;
2016-01-26 10:42:58 +03:00
struct f2fs_map_blocks map = { . m_next_pgofs = NULL } ;
2015-10-27 04:53:45 +03:00
struct extent_info ei ;
pgoff_t pg_start , pg_end ;
2015-12-01 06:56:52 +03:00
unsigned int blk_per_seg = sbi - > blocks_per_seg ;
2015-10-27 04:53:45 +03:00
unsigned int total = 0 , sec_num ;
2015-12-01 06:56:52 +03:00
unsigned int pages_per_sec = sbi - > segs_per_sec * blk_per_seg ;
2015-10-27 04:53:45 +03:00
block_t blk_end = 0 ;
bool fragmented = false ;
int err ;
/* if in-place-update policy is enabled, don't waste time here */
if ( need_inplace_update ( inode ) )
return - EINVAL ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pg_start = range - > start > > PAGE_SHIFT ;
pg_end = ( range - > start + range - > len ) > > PAGE_SHIFT ;
2015-10-27 04:53:45 +03:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-10-27 04:53:45 +03:00
2016-01-22 23:40:57 +03:00
inode_lock ( inode ) ;
2015-10-27 04:53:45 +03:00
/* writeback all dirty pages in the range */
err = filemap_write_and_wait_range ( inode - > i_mapping , range - > start ,
2015-12-14 08:34:00 +03:00
range - > start + range - > len - 1 ) ;
2015-10-27 04:53:45 +03:00
if ( err )
goto out ;
/*
* lookup mapping info in extent cache , skip defragmenting if physical
* block addresses are continuous .
*/
if ( f2fs_lookup_extent_cache ( inode , pg_start , & ei ) ) {
if ( ei . fofs + ei . len > = pg_end )
goto out ;
}
map . m_lblk = pg_start ;
/*
* lookup mapping info in dnode page cache , skip defragmenting if all
* physical block addresses are continuous even if there are hole ( s )
* in logical blocks .
*/
while ( map . m_lblk < pg_end ) {
2015-12-15 12:02:41 +03:00
map . m_len = pg_end - map . m_lblk ;
2015-10-27 04:53:45 +03:00
err = f2fs_map_blocks ( inode , & map , 0 , F2FS_GET_BLOCK_READ ) ;
if ( err )
goto out ;
if ( ! ( map . m_flags & F2FS_MAP_FLAGS ) ) {
map . m_lblk + + ;
continue ;
}
if ( blk_end & & blk_end ! = map . m_pblk ) {
fragmented = true ;
break ;
}
blk_end = map . m_pblk + map . m_len ;
map . m_lblk + = map . m_len ;
}
if ( ! fragmented )
goto out ;
map . m_lblk = pg_start ;
map . m_len = pg_end - pg_start ;
sec_num = ( map . m_len + pages_per_sec - 1 ) / pages_per_sec ;
/*
* make sure there are enough free section for LFS allocation , this can
* avoid defragment running in SSR mode when free section are allocated
* intensively
*/
2016-09-01 22:02:51 +03:00
if ( has_not_enough_free_secs ( sbi , 0 , sec_num ) ) {
2015-10-27 04:53:45 +03:00
err = - EAGAIN ;
goto out ;
}
while ( map . m_lblk < pg_end ) {
pgoff_t idx ;
int cnt = 0 ;
do_map :
2015-12-15 12:02:41 +03:00
map . m_len = pg_end - map . m_lblk ;
2015-10-27 04:53:45 +03:00
err = f2fs_map_blocks ( inode , & map , 0 , F2FS_GET_BLOCK_READ ) ;
if ( err )
goto clear_out ;
if ( ! ( map . m_flags & F2FS_MAP_FLAGS ) ) {
map . m_lblk + + ;
continue ;
}
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_DO_DEFRAG ) ;
2015-10-27 04:53:45 +03:00
idx = map . m_lblk ;
while ( idx < map . m_lblk + map . m_len & & cnt < blk_per_seg ) {
struct page * page ;
page = get_lock_data_page ( inode , idx , true ) ;
if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
goto clear_out ;
}
set_page_dirty ( page ) ;
f2fs_put_page ( page , 1 ) ;
idx + + ;
cnt + + ;
total + + ;
}
map . m_lblk = idx ;
if ( idx < pg_end & & cnt < blk_per_seg )
goto do_map ;
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_DO_DEFRAG ) ;
2015-10-27 04:53:45 +03:00
err = filemap_fdatawrite ( inode - > i_mapping ) ;
if ( err )
goto out ;
}
clear_out :
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_DO_DEFRAG ) ;
2015-10-27 04:53:45 +03:00
out :
2016-01-22 23:40:57 +03:00
inode_unlock ( inode ) ;
2015-10-27 04:53:45 +03:00
if ( ! err )
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
range - > len = ( u64 ) total < < PAGE_SHIFT ;
2015-10-27 04:53:45 +03:00
return err ;
}
static int f2fs_ioc_defragment ( struct file * filp , unsigned long arg )
{
struct inode * inode = file_inode ( filp ) ;
struct f2fs_sb_info * sbi = F2FS_I_SB ( inode ) ;
struct f2fs_defragment range ;
int err ;
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
if ( ! S_ISREG ( inode - > i_mode ) )
return - EINVAL ;
err = mnt_want_write_file ( filp ) ;
if ( err )
return err ;
if ( f2fs_readonly ( sbi - > sb ) ) {
err = - EROFS ;
goto out ;
}
if ( copy_from_user ( & range , ( struct f2fs_defragment __user * ) arg ,
sizeof ( range ) ) ) {
err = - EFAULT ;
goto out ;
}
/* verify alignment of offset & size */
if ( range . start & ( F2FS_BLKSIZE - 1 ) | |
range . len & ( F2FS_BLKSIZE - 1 ) ) {
err = - EINVAL ;
goto out ;
}
err = f2fs_defragment_range ( sbi , filp , & range ) ;
2016-01-09 03:57:48 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2015-10-27 04:53:45 +03:00
if ( err < 0 )
goto out ;
if ( copy_to_user ( ( struct f2fs_defragment __user * ) arg , & range ,
sizeof ( range ) ) )
err = - EFAULT ;
out :
mnt_drop_write_file ( filp ) ;
return err ;
}
2016-07-09 01:16:47 +03:00
static int f2fs_move_file_range ( struct file * file_in , loff_t pos_in ,
struct file * file_out , loff_t pos_out , size_t len )
{
struct inode * src = file_inode ( file_in ) ;
struct inode * dst = file_inode ( file_out ) ;
struct f2fs_sb_info * sbi = F2FS_I_SB ( src ) ;
size_t olen = len , dst_max_i_size = 0 ;
size_t dst_osize ;
int ret ;
if ( file_in - > f_path . mnt ! = file_out - > f_path . mnt | |
src - > i_sb ! = dst - > i_sb )
return - EXDEV ;
if ( unlikely ( f2fs_readonly ( src - > i_sb ) ) )
return - EROFS ;
2016-08-04 15:13:02 +03:00
if ( ! S_ISREG ( src - > i_mode ) | | ! S_ISREG ( dst - > i_mode ) )
return - EINVAL ;
2016-07-09 01:16:47 +03:00
if ( f2fs_encrypted_inode ( src ) | | f2fs_encrypted_inode ( dst ) )
return - EOPNOTSUPP ;
2016-09-13 06:35:42 +03:00
if ( src = = dst ) {
if ( pos_in = = pos_out )
return 0 ;
if ( pos_out > pos_in & & pos_out < pos_in + len )
return - EINVAL ;
}
2016-07-09 01:16:47 +03:00
inode_lock ( src ) ;
2016-08-04 15:13:03 +03:00
if ( src ! = dst ) {
if ( ! inode_trylock ( dst ) ) {
ret = - EBUSY ;
goto out ;
}
}
2016-07-09 01:16:47 +03:00
ret = - EINVAL ;
if ( pos_in + len > src - > i_size | | pos_in + len < pos_in )
goto out_unlock ;
if ( len = = 0 )
olen = len = src - > i_size - pos_in ;
if ( pos_in + len = = src - > i_size )
len = ALIGN ( src - > i_size , F2FS_BLKSIZE ) - pos_in ;
if ( len = = 0 ) {
ret = 0 ;
goto out_unlock ;
}
dst_osize = dst - > i_size ;
if ( pos_out + olen > dst - > i_size )
dst_max_i_size = pos_out + olen ;
/* verify the end result is block aligned */
if ( ! IS_ALIGNED ( pos_in , F2FS_BLKSIZE ) | |
! IS_ALIGNED ( pos_in + len , F2FS_BLKSIZE ) | |
! IS_ALIGNED ( pos_out , F2FS_BLKSIZE ) )
goto out_unlock ;
ret = f2fs_convert_inline_inode ( src ) ;
if ( ret )
goto out_unlock ;
ret = f2fs_convert_inline_inode ( dst ) ;
if ( ret )
goto out_unlock ;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range ( src - > i_mapping ,
pos_in , pos_in + len ) ;
if ( ret )
goto out_unlock ;
ret = filemap_write_and_wait_range ( dst - > i_mapping ,
pos_out , pos_out + len ) ;
if ( ret )
goto out_unlock ;
f2fs_balance_fs ( sbi , true ) ;
f2fs_lock_op ( sbi ) ;
2016-09-10 06:19:37 +03:00
ret = __exchange_data_block ( src , dst , pos_in > > F2FS_BLKSIZE_BITS ,
pos_out > > F2FS_BLKSIZE_BITS ,
len > > F2FS_BLKSIZE_BITS , false ) ;
2016-07-09 01:16:47 +03:00
if ( ! ret ) {
if ( dst_max_i_size )
f2fs_i_size_write ( dst , dst_max_i_size ) ;
else if ( dst_osize ! = dst - > i_size )
f2fs_i_size_write ( dst , dst_osize ) ;
}
f2fs_unlock_op ( sbi ) ;
out_unlock :
if ( src ! = dst )
inode_unlock ( dst ) ;
2016-08-04 15:13:03 +03:00
out :
2016-07-09 01:16:47 +03:00
inode_unlock ( src ) ;
return ret ;
}
static int f2fs_ioc_move_range ( struct file * filp , unsigned long arg )
{
struct f2fs_move_range range ;
struct fd dst ;
int err ;
if ( ! ( filp - > f_mode & FMODE_READ ) | |
! ( filp - > f_mode & FMODE_WRITE ) )
return - EBADF ;
if ( copy_from_user ( & range , ( struct f2fs_move_range __user * ) arg ,
sizeof ( range ) ) )
return - EFAULT ;
dst = fdget ( range . dst_fd ) ;
if ( ! dst . file )
return - EBADF ;
if ( ! ( dst . file - > f_mode & FMODE_WRITE ) ) {
err = - EBADF ;
goto err_out ;
}
err = mnt_want_write_file ( filp ) ;
if ( err )
goto err_out ;
err = f2fs_move_file_range ( filp , range . pos_in , dst . file ,
range . pos_out , range . len ) ;
mnt_drop_write_file ( filp ) ;
if ( copy_to_user ( ( struct f2fs_move_range __user * ) arg ,
& range , sizeof ( range ) ) )
err = - EFAULT ;
err_out :
fdput ( dst ) ;
return err ;
}
2014-09-25 02:37:02 +04:00
long f2fs_ioctl ( struct file * filp , unsigned int cmd , unsigned long arg )
{
switch ( cmd ) {
case F2FS_IOC_GETFLAGS :
return f2fs_ioc_getflags ( filp , arg ) ;
case F2FS_IOC_SETFLAGS :
return f2fs_ioc_setflags ( filp , arg ) ;
2015-01-23 15:36:04 +03:00
case F2FS_IOC_GETVERSION :
return f2fs_ioc_getversion ( filp , arg ) ;
2014-10-07 04:39:50 +04:00
case F2FS_IOC_START_ATOMIC_WRITE :
return f2fs_ioc_start_atomic_write ( filp ) ;
case F2FS_IOC_COMMIT_ATOMIC_WRITE :
return f2fs_ioc_commit_atomic_write ( filp ) ;
2014-10-07 03:11:16 +04:00
case F2FS_IOC_START_VOLATILE_WRITE :
return f2fs_ioc_start_volatile_write ( filp ) ;
2014-12-09 17:08:59 +03:00
case F2FS_IOC_RELEASE_VOLATILE_WRITE :
return f2fs_ioc_release_volatile_write ( filp ) ;
case F2FS_IOC_ABORT_VOLATILE_WRITE :
return f2fs_ioc_abort_volatile_write ( filp ) ;
2015-01-09 06:15:53 +03:00
case F2FS_IOC_SHUTDOWN :
return f2fs_ioc_shutdown ( filp , arg ) ;
2014-09-25 02:37:02 +04:00
case FITRIM :
return f2fs_ioc_fitrim ( filp , arg ) ;
2015-04-21 01:19:06 +03:00
case F2FS_IOC_SET_ENCRYPTION_POLICY :
return f2fs_ioc_set_encryption_policy ( filp , arg ) ;
case F2FS_IOC_GET_ENCRYPTION_POLICY :
return f2fs_ioc_get_encryption_policy ( filp , arg ) ;
case F2FS_IOC_GET_ENCRYPTION_PWSALT :
return f2fs_ioc_get_encryption_pwsalt ( filp , arg ) ;
2015-07-10 13:08:10 +03:00
case F2FS_IOC_GARBAGE_COLLECT :
return f2fs_ioc_gc ( filp , arg ) ;
2015-10-05 17:24:19 +03:00
case F2FS_IOC_WRITE_CHECKPOINT :
return f2fs_ioc_write_checkpoint ( filp , arg ) ;
2015-10-27 04:53:45 +03:00
case F2FS_IOC_DEFRAGMENT :
return f2fs_ioc_defragment ( filp , arg ) ;
2016-07-09 01:16:47 +03:00
case F2FS_IOC_MOVE_RANGE :
return f2fs_ioc_move_range ( filp , arg ) ;
2012-11-02 12:09:44 +04:00
default :
return - ENOTTY ;
}
}
2015-04-22 06:39:58 +03:00
static ssize_t f2fs_file_write_iter ( struct kiocb * iocb , struct iov_iter * from )
{
2016-02-04 00:09:09 +03:00
struct file * file = iocb - > ki_filp ;
struct inode * inode = file_inode ( file ) ;
2016-07-14 05:33:19 +03:00
struct blk_plug plug ;
2016-02-04 00:09:09 +03:00
ssize_t ret ;
2015-04-22 06:39:58 +03:00
if ( f2fs_encrypted_inode ( inode ) & &
2015-05-16 02:26:10 +03:00
! fscrypt_has_encryption_key ( inode ) & &
fscrypt_get_encryption_info ( inode ) )
2015-04-22 06:39:58 +03:00
return - EACCES ;
2016-02-04 00:09:09 +03:00
inode_lock ( inode ) ;
ret = generic_write_checks ( iocb , from ) ;
if ( ret > 0 ) {
ret = f2fs_preallocate_blocks ( iocb , from ) ;
2016-07-14 05:33:19 +03:00
if ( ! ret ) {
blk_start_plug ( & plug ) ;
2016-02-04 00:09:09 +03:00
ret = __generic_file_write_iter ( iocb , from ) ;
2016-07-14 05:33:19 +03:00
blk_finish_plug ( & plug ) ;
}
2016-02-04 00:09:09 +03:00
}
inode_unlock ( inode ) ;
2016-04-07 18:52:01 +03:00
if ( ret > 0 )
ret = generic_write_sync ( iocb , ret ) ;
2016-02-04 00:09:09 +03:00
return ret ;
2015-04-22 06:39:58 +03:00
}
2013-02-04 18:41:41 +04:00
# ifdef CONFIG_COMPAT
long f2fs_compat_ioctl ( struct file * file , unsigned int cmd , unsigned long arg )
{
switch ( cmd ) {
case F2FS_IOC32_GETFLAGS :
cmd = F2FS_IOC_GETFLAGS ;
break ;
case F2FS_IOC32_SETFLAGS :
cmd = F2FS_IOC_SETFLAGS ;
break ;
2015-11-10 13:44:20 +03:00
case F2FS_IOC32_GETVERSION :
cmd = F2FS_IOC_GETVERSION ;
break ;
case F2FS_IOC_START_ATOMIC_WRITE :
case F2FS_IOC_COMMIT_ATOMIC_WRITE :
case F2FS_IOC_START_VOLATILE_WRITE :
case F2FS_IOC_RELEASE_VOLATILE_WRITE :
case F2FS_IOC_ABORT_VOLATILE_WRITE :
case F2FS_IOC_SHUTDOWN :
case F2FS_IOC_SET_ENCRYPTION_POLICY :
case F2FS_IOC_GET_ENCRYPTION_PWSALT :
case F2FS_IOC_GET_ENCRYPTION_POLICY :
case F2FS_IOC_GARBAGE_COLLECT :
case F2FS_IOC_WRITE_CHECKPOINT :
case F2FS_IOC_DEFRAGMENT :
break ;
2016-07-09 01:16:47 +03:00
case F2FS_IOC_MOVE_RANGE :
break ;
2013-02-04 18:41:41 +04:00
default :
return - ENOIOCTLCMD ;
}
return f2fs_ioctl ( file , cmd , ( unsigned long ) compat_ptr ( arg ) ) ;
}
# endif
2012-11-02 12:09:44 +04:00
const struct file_operations f2fs_file_operations = {
2014-04-23 10:10:24 +04:00
. llseek = f2fs_llseek ,
2014-04-02 22:33:16 +04:00
. read_iter = generic_file_read_iter ,
2015-04-22 06:39:58 +03:00
. write_iter = f2fs_file_write_iter ,
. open = f2fs_file_open ,
2014-12-06 01:37:37 +03:00
. release = f2fs_release_file ,
2012-11-02 12:09:44 +04:00
. mmap = f2fs_file_mmap ,
. fsync = f2fs_sync_file ,
. fallocate = f2fs_fallocate ,
. unlocked_ioctl = f2fs_ioctl ,
2013-02-04 18:41:41 +04:00
# ifdef CONFIG_COMPAT
. compat_ioctl = f2fs_compat_ioctl ,
# endif
2012-11-02 12:09:44 +04:00
. splice_read = generic_file_splice_read ,
2014-04-05 12:27:08 +04:00
. splice_write = iter_file_splice_write ,
2012-11-02 12:09:44 +04:00
} ;