2012-11-29 08:28:09 +04:00
/*
2012-11-02 12:11:10 +04:00
* fs / f2fs / namei . c
*
* Copyright ( c ) 2012 Samsung Electronics Co . , Ltd .
* http : //www.samsung.com/
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*/
# include <linux/fs.h>
# include <linux/f2fs_fs.h>
# include <linux/pagemap.h>
# include <linux/sched.h>
# include <linux/ctype.h>
2014-06-19 12:23:19 +04:00
# include <linux/dcache.h>
2015-04-15 23:49:55 +03:00
# include <linux/namei.h>
2017-07-08 19:13:07 +03:00
# include <linux/quotaops.h>
2012-11-02 12:11:10 +04:00
# include "f2fs.h"
2013-03-21 10:21:57 +04:00
# include "node.h"
2012-11-02 12:11:10 +04:00
# include "xattr.h"
# include "acl.h"
2013-04-19 20:28:40 +04:00
# include <trace/events/f2fs.h>
2012-11-02 12:11:10 +04:00
static struct inode * f2fs_new_inode ( struct inode * dir , umode_t mode )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
nid_t ino ;
struct inode * inode ;
bool nid_free = false ;
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
int xattr_size = 0 ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2014-06-21 08:44:02 +04:00
inode = new_inode ( dir - > i_sb ) ;
2012-11-02 12:11:10 +04:00
if ( ! inode )
return ERR_PTR ( - ENOMEM ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
if ( ! alloc_nid ( sbi , & ino ) ) {
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = - ENOSPC ;
goto fail ;
}
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
2017-07-08 19:13:07 +03:00
nid_free = true ;
2014-05-08 13:09:30 +04:00
inode_init_owner ( inode , dir , mode ) ;
2012-11-02 12:11:10 +04:00
inode - > i_ino = ino ;
inode - > i_blocks = 0 ;
2016-09-14 17:48:04 +03:00
inode - > i_mtime = inode - > i_atime = inode - > i_ctime = current_time ( inode ) ;
2012-11-02 12:11:10 +04:00
inode - > i_generation = sbi - > s_next_generation + + ;
err = insert_inode_locked ( inode ) ;
if ( err ) {
err = - EINVAL ;
2015-08-16 22:38:15 +03:00
goto fail ;
2012-11-02 12:11:10 +04:00
}
2014-09-24 14:19:10 +04:00
2017-07-25 19:01:41 +03:00
if ( f2fs_sb_has_project_quota ( sbi - > sb ) & &
( F2FS_I ( dir ) - > i_flags & FS_PROJINHERIT_FL ) )
F2FS_I ( inode ) - > i_projid = F2FS_I ( dir ) - > i_projid ;
else
F2FS_I ( inode ) - > i_projid = make_kprojid ( & init_user_ns ,
F2FS_DEF_PROJID ) ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( inode ) ;
if ( err )
goto fail_drop ;
err = dquot_alloc_inode ( inode ) ;
if ( err )
goto fail_drop ;
2015-04-22 06:39:58 +03:00
/* If the directory encrypted, then we should encrypt the inode. */
if ( f2fs_encrypted_inode ( dir ) & & f2fs_may_encrypt ( inode ) )
f2fs_set_encrypted_inode ( inode ) ;
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_NEW_INODE ) ;
2017-07-18 19:19:06 +03:00
if ( f2fs_sb_has_extra_attr ( sbi - > sb ) ) {
set_inode_flag ( inode , FI_EXTRA_ATTR ) ;
F2FS_I ( inode ) - > i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE ;
}
2016-05-20 20:13:22 +03:00
if ( test_opt ( sbi , INLINE_XATTR ) )
set_inode_flag ( inode , FI_INLINE_XATTR ) ;
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
2015-12-23 12:51:35 +03:00
if ( test_opt ( sbi , INLINE_DATA ) & & f2fs_may_inline_data ( inode ) )
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INLINE_DATA ) ;
2015-04-23 20:27:21 +03:00
if ( f2fs_may_inline_dentry ( inode ) )
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INLINE_DENTRY ) ;
2014-09-24 14:19:10 +04:00
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
if ( f2fs_sb_has_flexible_inline_xattr ( sbi - > sb ) ) {
f2fs_bug_on ( sbi , ! f2fs_has_extra_attr ( inode ) ) ;
if ( f2fs_has_inline_xattr ( inode ) )
xattr_size = sbi - > inline_xattr_size ;
/* Otherwise, will be 0 */
} else if ( f2fs_has_inline_xattr ( inode ) | |
f2fs_has_inline_dentry ( inode ) ) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS ;
}
F2FS_I ( inode ) - > i_inline_xattr_size = xattr_size ;
2015-06-20 03:53:26 +03:00
f2fs_init_extent_tree ( inode , NULL ) ;
2015-07-15 12:28:53 +03:00
stat_inc_inline_xattr ( inode ) ;
2015-05-01 04:58:22 +03:00
stat_inc_inline_inode ( inode ) ;
stat_inc_inline_dir ( inode ) ;
2017-07-25 19:01:41 +03:00
F2FS_I ( inode ) - > i_flags =
f2fs_mask_flags ( mode , F2FS_I ( dir ) - > i_flags & F2FS_FL_INHERITED ) ;
2017-08-30 13:04:47 +03:00
if ( S_ISDIR ( inode - > i_mode ) )
F2FS_I ( inode ) - > i_flags | = FS_INDEX_FL ;
2017-07-25 19:01:41 +03:00
if ( F2FS_I ( inode ) - > i_flags & FS_PROJINHERIT_FL )
set_inode_flag ( inode , FI_PROJ_INHERIT ) ;
2013-04-25 08:24:33 +04:00
trace_f2fs_new_inode ( inode , 0 ) ;
2012-11-02 12:11:10 +04:00
return inode ;
fail :
2013-04-25 08:24:33 +04:00
trace_f2fs_new_inode ( inode , err ) ;
2013-04-30 06:33:27 +04:00
make_bad_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
if ( nid_free )
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_FREE_NID ) ;
2015-06-23 20:36:08 +03:00
iput ( inode ) ;
2012-11-02 12:11:10 +04:00
return ERR_PTR ( err ) ;
2017-07-08 19:13:07 +03:00
fail_drop :
trace_f2fs_new_inode ( inode , err ) ;
dquot_drop ( inode ) ;
inode - > i_flags | = S_NOQUOTA ;
if ( nid_free )
set_inode_flag ( inode , FI_FREE_NID ) ;
clear_nlink ( inode ) ;
unlock_new_inode ( inode ) ;
iput ( inode ) ;
return ERR_PTR ( err ) ;
2012-11-02 12:11:10 +04:00
}
static int is_multimedia_file ( const unsigned char * s , const char * sub )
{
2012-12-27 21:55:46 +04:00
size_t slen = strlen ( s ) ;
size_t sublen = strlen ( sub ) ;
2016-09-05 07:28:27 +03:00
int i ;
2012-11-02 12:11:10 +04:00
2015-07-06 15:30:40 +03:00
/*
* filename format of multimedia file should be defined as :
2016-09-05 07:28:27 +03:00
* " filename + '.' + extension + (optional: '.' + temp extension) " .
2015-07-06 15:30:40 +03:00
*/
if ( slen < sublen + 2 )
return 0 ;
2016-09-05 07:28:27 +03:00
for ( i = 1 ; i < slen - sublen ; i + + ) {
if ( s [ i ] ! = ' . ' )
continue ;
if ( ! strncasecmp ( s + i + 1 , sub , sublen ) )
return 1 ;
}
2012-11-02 12:11:10 +04:00
2016-09-05 07:28:27 +03:00
return 0 ;
2012-11-02 12:11:10 +04:00
}
2012-11-29 08:28:09 +04:00
/*
2012-11-02 12:11:10 +04:00
* Set multimedia files as cold files for hot / cold data separation
*/
2013-03-21 10:21:57 +04:00
static inline void set_cold_files ( struct f2fs_sb_info * sbi , struct inode * inode ,
2012-11-02 12:11:10 +04:00
const unsigned char * name )
{
int i ;
__u8 ( * extlist ) [ 8 ] = sbi - > raw_super - > extension_list ;
int count = le32_to_cpu ( sbi - > raw_super - > extension_count ) ;
for ( i = 0 ; i < count ; i + + ) {
2013-04-19 20:27:21 +04:00
if ( is_multimedia_file ( name , extlist [ i ] ) ) {
2013-06-14 03:52:35 +04:00
file_set_cold ( inode ) ;
2012-11-02 12:11:10 +04:00
break ;
}
}
}
static int f2fs_create ( struct inode * dir , struct dentry * dentry , umode_t mode ,
bool excl )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
nid_t ino = 0 ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
2012-11-02 12:11:10 +04:00
inode = f2fs_new_inode ( dir , mode ) ;
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
if ( ! test_opt ( sbi , DISABLE_EXT_IDENTIFY ) )
2013-03-21 10:21:57 +04:00
set_cold_files ( sbi , inode , dentry - > d_name . name ) ;
2012-11-02 12:11:10 +04:00
inode - > i_op = & f2fs_file_inode_operations ;
inode - > i_fop = & f2fs_file_operations ;
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
ino = inode - > i_ino ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
alloc_nid_done ( sbi , ino ) ;
2013-05-16 03:57:43 +04:00
d_instantiate ( dentry , inode ) ;
2012-11-02 12:11:10 +04:00
unlock_new_inode ( inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out :
2014-09-25 22:55:53 +04:00
handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
static int f2fs_link ( struct dentry * old_dentry , struct inode * dir ,
struct dentry * dentry )
{
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( old_dentry ) ;
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2015-04-22 06:39:58 +03:00
if ( f2fs_encrypted_inode ( dir ) & &
2015-05-16 02:26:10 +03:00
! fscrypt_has_permitted_context ( dir , inode ) )
2015-04-22 06:39:58 +03:00
return - EPERM ;
2017-07-25 19:01:41 +03:00
if ( is_inode_flag_set ( dir , FI_PROJ_INHERIT ) & &
( ! projid_eq ( F2FS_I ( dir ) - > i_projid ,
F2FS_I ( old_dentry - > d_inode ) - > i_projid ) ) )
return - EXDEV ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-12-19 11:25:21 +04:00
2016-09-14 17:48:04 +03:00
inode - > i_ctime = current_time ( inode ) ;
2013-05-22 07:06:26 +04:00
ihold ( inode ) ;
2012-11-02 12:11:10 +04:00
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INC_LINK ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
d_instantiate ( dentry , inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out :
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_INC_LINK ) ;
2012-11-02 12:11:10 +04:00
iput ( inode ) ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
struct dentry * f2fs_get_parent ( struct dentry * child )
{
struct qstr dotdot = QSTR_INIT ( " .. " , 2 ) ;
2016-07-19 03:27:47 +03:00
struct page * page ;
unsigned long ino = f2fs_inode_by_name ( d_inode ( child ) , & dotdot , & page ) ;
if ( ! ino ) {
if ( IS_ERR ( page ) )
return ERR_CAST ( page ) ;
2012-11-02 12:11:10 +04:00
return ERR_PTR ( - ENOENT ) ;
2016-07-19 03:27:47 +03:00
}
2016-04-10 08:33:30 +03:00
return d_obtain_alias ( f2fs_iget ( child - > d_sb , ino ) ) ;
2012-11-02 12:11:10 +04:00
}
2015-03-31 01:07:16 +03:00
static int __recover_dot_dentries ( struct inode * dir , nid_t pino )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
struct qstr dot = QSTR_INIT ( " . " , 1 ) ;
struct qstr dotdot = QSTR_INIT ( " .. " , 2 ) ;
struct f2fs_dir_entry * de ;
struct page * page ;
int err = 0 ;
2015-12-30 12:40:31 +03:00
if ( f2fs_readonly ( sbi - > sb ) ) {
f2fs_msg ( sbi - > sb , KERN_INFO ,
" skip recovering inline_dots inode (ino:%lu, pino:%u) "
" in readonly mountpoint " , dir - > i_ino , pino ) ;
return 0 ;
}
2017-07-24 12:12:06 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-24 13:03:29 +03:00
2015-03-31 01:07:16 +03:00
f2fs_lock_op ( sbi ) ;
de = f2fs_find_entry ( dir , & dot , & page ) ;
if ( de ) {
f2fs_dentry_kunmap ( dir , page ) ;
f2fs_put_page ( page , 0 ) ;
2016-05-26 00:29:11 +03:00
} else if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
goto out ;
2015-03-31 01:07:16 +03:00
} else {
err = __f2fs_add_link ( dir , & dot , NULL , dir - > i_ino , S_IFDIR ) ;
if ( err )
goto out ;
}
de = f2fs_find_entry ( dir , & dotdot , & page ) ;
if ( de ) {
f2fs_dentry_kunmap ( dir , page ) ;
f2fs_put_page ( page , 0 ) ;
2016-05-26 00:29:11 +03:00
} else if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
2015-03-31 01:07:16 +03:00
} else {
err = __f2fs_add_link ( dir , & dotdot , NULL , pino , S_IFDIR ) ;
}
out :
2016-05-20 19:52:20 +03:00
if ( ! err )
2016-05-20 20:13:22 +03:00
clear_inode_flag ( dir , FI_INLINE_DOTS ) ;
2015-03-31 01:07:16 +03:00
f2fs_unlock_op ( sbi ) ;
return err ;
}
2012-11-02 12:11:10 +04:00
static struct dentry * f2fs_lookup ( struct inode * dir , struct dentry * dentry ,
unsigned int flags )
{
struct inode * inode = NULL ;
struct f2fs_dir_entry * de ;
struct page * page ;
2017-10-17 12:33:41 +03:00
struct dentry * new ;
nid_t ino = - 1 ;
2015-04-22 06:39:58 +03:00
int err = 0 ;
2016-02-26 09:39:23 +03:00
unsigned int root_ino = F2FS_ROOT_INO ( F2FS_I_SB ( dir ) ) ;
2012-11-02 12:11:10 +04:00
2017-10-17 12:33:41 +03:00
trace_f2fs_lookup_start ( dir , dentry , flags ) ;
2016-02-23 20:21:37 +03:00
if ( f2fs_encrypted_inode ( dir ) ) {
2017-10-17 12:33:41 +03:00
err = fscrypt_get_encryption_info ( dir ) ;
2016-02-23 20:21:37 +03:00
/*
* DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
* created while the directory was encrypted and we
* don ' t have access to the key .
*/
if ( fscrypt_has_encryption_key ( dir ) )
fscrypt_set_encrypted_dentry ( dentry ) ;
fscrypt_set_d_op ( dentry ) ;
2017-10-17 12:33:41 +03:00
if ( err & & err ! = - ENOKEY )
goto out ;
2016-02-23 20:21:37 +03:00
}
2017-10-17 12:33:41 +03:00
if ( dentry - > d_name . len > F2FS_NAME_LEN ) {
err = - ENAMETOOLONG ;
goto out ;
}
2012-11-02 12:11:10 +04:00
de = f2fs_find_entry ( dir , & dentry - > d_name , & page ) ;
2016-05-27 20:10:41 +03:00
if ( ! de ) {
2017-10-17 12:33:41 +03:00
if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
goto out ;
}
goto out_splice ;
2016-05-27 20:10:41 +03:00
}
2012-11-02 12:11:10 +04:00
2015-04-22 21:40:27 +03:00
ino = le32_to_cpu ( de - > ino ) ;
f2fs_dentry_kunmap ( dir , page ) ;
f2fs_put_page ( page , 0 ) ;
2015-03-31 01:07:16 +03:00
2015-04-22 21:40:27 +03:00
inode = f2fs_iget ( dir - > i_sb , ino ) ;
2017-10-17 12:33:41 +03:00
if ( IS_ERR ( inode ) ) {
err = PTR_ERR ( inode ) ;
goto out ;
}
2015-03-31 01:07:16 +03:00
2016-02-26 09:39:23 +03:00
if ( ( dir - > i_ino = = root_ino ) & & f2fs_has_inline_dots ( dir ) ) {
err = __recover_dot_dentries ( dir , root_ino ) ;
if ( err )
2017-10-17 12:33:41 +03:00
goto out_iput ;
2016-02-26 09:39:23 +03:00
}
2015-04-22 06:39:58 +03:00
if ( f2fs_has_inline_dots ( inode ) ) {
2015-04-22 21:40:27 +03:00
err = __recover_dot_dentries ( inode , dir - > i_ino ) ;
2015-04-22 06:39:58 +03:00
if ( err )
2017-10-17 12:33:41 +03:00
goto out_iput ;
2012-11-02 12:11:10 +04:00
}
2016-12-16 11:18:15 +03:00
if ( f2fs_encrypted_inode ( dir ) & &
( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) ) & &
! fscrypt_has_permitted_context ( dir , inode ) ) {
2017-04-07 20:58:39 +03:00
f2fs_msg ( inode - > i_sb , KERN_WARNING ,
" Inconsistent encryption contexts: %lu/%lu " ,
dir - > i_ino , inode - > i_ino ) ;
err = - EPERM ;
2017-10-17 12:33:41 +03:00
goto out_iput ;
2016-02-23 20:21:37 +03:00
}
2017-10-17 12:33:41 +03:00
out_splice :
new = d_splice_alias ( inode , dentry ) ;
if ( IS_ERR ( new ) )
err = PTR_ERR ( new ) ;
trace_f2fs_lookup_end ( dir , dentry , ino , err ) ;
return new ;
out_iput :
2016-03-10 17:24:23 +03:00
iput ( inode ) ;
2017-10-17 12:33:41 +03:00
out :
trace_f2fs_lookup_end ( dir , dentry , ino , err ) ;
2015-04-22 06:39:58 +03:00
return ERR_PTR ( err ) ;
2012-11-02 12:11:10 +04:00
}
static int f2fs_unlink ( struct inode * dir , struct dentry * dentry )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2012-11-02 12:11:10 +04:00
struct f2fs_dir_entry * de ;
struct page * page ;
int err = - ENOENT ;
2013-04-19 20:28:40 +04:00
trace_f2fs_unlink_enter ( dir , dentry ) ;
2012-12-19 11:25:21 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
2017-10-24 00:50:15 +03:00
if ( err )
return err ;
err = dquot_initialize ( inode ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2012-11-02 12:11:10 +04:00
de = f2fs_find_entry ( dir , & dentry - > d_name , & page ) ;
2016-07-19 03:27:47 +03:00
if ( ! de ) {
if ( IS_ERR ( page ) )
err = PTR_ERR ( page ) ;
2012-11-02 12:11:10 +04:00
goto fail ;
2016-07-19 03:27:47 +03:00
}
2012-11-02 12:11:10 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2013-10-08 05:19:28 +04:00
f2fs_lock_op ( sbi ) ;
2013-07-30 06:36:53 +04:00
err = acquire_orphan_inode ( sbi ) ;
2012-11-02 12:11:10 +04:00
if ( err ) {
2013-10-08 05:19:28 +04:00
f2fs_unlock_op ( sbi ) ;
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( dir , page ) ;
2012-11-02 12:11:10 +04:00
f2fs_put_page ( page , 0 ) ;
goto fail ;
}
2014-09-24 14:17:04 +04:00
f2fs_delete_entry ( de , page , dir , inode ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2012-11-02 12:11:10 +04:00
fail :
2013-04-19 20:28:40 +04:00
trace_f2fs_unlink_exit ( inode , err ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
2015-11-17 18:20:54 +03:00
static const char * f2fs_get_link ( struct dentry * dentry ,
2015-12-29 23:58:39 +03:00
struct inode * inode ,
struct delayed_call * done )
2015-04-15 23:49:55 +03:00
{
2015-12-29 23:58:39 +03:00
const char * link = page_get_link ( dentry , inode , done ) ;
2015-05-02 20:32:22 +03:00
if ( ! IS_ERR ( link ) & & ! * link ) {
/* this is broken symlink case */
2015-12-29 23:58:39 +03:00
do_delayed_call ( done ) ;
clear_delayed_call ( done ) ;
2015-05-02 20:32:22 +03:00
link = ERR_PTR ( - ENOENT ) ;
2015-04-15 23:49:55 +03:00
}
2015-05-02 20:32:22 +03:00
return link ;
2015-04-15 23:49:55 +03:00
}
2012-11-02 12:11:10 +04:00
static int f2fs_symlink ( struct inode * dir , struct dentry * dentry ,
const char * symname )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
2015-04-30 01:10:53 +03:00
size_t len = strlen ( symname ) ;
2015-05-16 02:26:10 +03:00
struct fscrypt_str disk_link = FSTR_INIT ( ( char * ) symname , len + 1 ) ;
struct fscrypt_symlink_data * sd = NULL ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2016-02-15 12:54:26 +03:00
if ( f2fs_encrypted_inode ( dir ) ) {
2015-05-16 02:26:10 +03:00
err = fscrypt_get_encryption_info ( dir ) ;
2016-02-15 12:54:26 +03:00
if ( err )
return err ;
2015-05-16 02:26:10 +03:00
if ( ! fscrypt_has_encryption_key ( dir ) )
2016-12-05 22:12:44 +03:00
return - ENOKEY ;
2016-02-15 12:54:26 +03:00
2015-05-16 02:26:10 +03:00
disk_link . len = ( fscrypt_fname_encrypted_size ( dir , len ) +
sizeof ( struct fscrypt_symlink_data ) ) ;
2016-02-15 12:54:26 +03:00
}
if ( disk_link . len > dir - > i_sb - > s_blocksize )
2015-04-30 01:10:53 +03:00
return - ENAMETOOLONG ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
2012-11-02 12:11:10 +04:00
inode = f2fs_new_inode ( dir , S_IFLNK | S_IRWXUGO ) ;
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
2015-04-30 01:10:53 +03:00
if ( f2fs_encrypted_inode ( inode ) )
inode - > i_op = & f2fs_encrypted_symlink_inode_operations ;
else
inode - > i_op = & f2fs_symlink_inode_operations ;
2015-11-17 09:07:57 +03:00
inode_nohighmem ( inode ) ;
2012-11-02 12:11:10 +04:00
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
alloc_nid_done ( sbi , inode - > i_ino ) ;
2016-02-15 12:54:26 +03:00
if ( f2fs_encrypted_inode ( inode ) ) {
2015-04-30 01:10:53 +03:00
struct qstr istr = QSTR_INIT ( symname , len ) ;
2015-05-16 02:26:10 +03:00
struct fscrypt_str ostr ;
2015-04-30 01:10:53 +03:00
2016-02-15 12:54:26 +03:00
sd = kzalloc ( disk_link . len , GFP_NOFS ) ;
if ( ! sd ) {
err = - ENOMEM ;
2015-04-30 01:10:53 +03:00
goto err_out ;
2016-02-15 12:54:26 +03:00
}
2015-04-30 01:10:53 +03:00
2015-05-16 02:26:10 +03:00
err = fscrypt_get_encryption_info ( inode ) ;
2015-04-30 01:10:53 +03:00
if ( err )
goto err_out ;
2015-05-16 02:26:10 +03:00
if ( ! fscrypt_has_encryption_key ( inode ) ) {
2016-12-05 22:12:44 +03:00
err = - ENOKEY ;
2015-04-30 01:10:53 +03:00
goto err_out ;
}
2016-02-15 12:54:26 +03:00
ostr . name = sd - > encrypted_path ;
ostr . len = disk_link . len ;
2015-05-16 02:26:10 +03:00
err = fscrypt_fname_usr_to_disk ( inode , & istr , & ostr ) ;
2016-09-16 00:25:55 +03:00
if ( err )
2015-04-30 01:10:53 +03:00
goto err_out ;
2016-02-15 12:54:26 +03:00
sd - > len = cpu_to_le16 ( ostr . len ) ;
disk_link . name = ( char * ) sd ;
2015-04-30 01:10:53 +03:00
}
2016-02-15 12:54:26 +03:00
err = page_symlink ( inode , disk_link . name , disk_link . len ) ;
2015-04-30 01:10:53 +03:00
err_out :
2012-11-02 12:11:10 +04:00
d_instantiate ( dentry , inode ) ;
unlock_new_inode ( inode ) ;
2014-11-10 09:15:31 +03:00
2015-04-15 23:37:53 +03:00
/*
* Let ' s flush symlink data in order to avoid broken symlink as much as
* possible . Nevertheless , fsyncing is the best way , but there is no
* way to get a file descriptor in order to flush that .
*
* Note that , it needs to do dir - > fsync to make this recoverable .
* If the symlink path is stored into inline_data , there is no
* performance regression .
*/
2015-10-22 13:23:08 +03:00
if ( ! err ) {
2016-02-15 12:54:26 +03:00
filemap_write_and_wait_range ( inode - > i_mapping , 0 ,
disk_link . len - 1 ) ;
2015-04-15 23:37:53 +03:00
2015-10-22 13:23:08 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
} else {
f2fs_unlink ( dir , dentry ) ;
}
2015-04-30 01:10:53 +03:00
kfree ( sd ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return err ;
out :
2014-09-25 22:55:53 +04:00
handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
static int f2fs_mkdir ( struct inode * dir , struct dentry * dentry , umode_t mode )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
2012-11-02 12:11:10 +04:00
inode = f2fs_new_inode ( dir , S_IFDIR | mode ) ;
if ( IS_ERR ( inode ) )
2012-12-01 05:56:25 +04:00
return PTR_ERR ( inode ) ;
2012-11-02 12:11:10 +04:00
inode - > i_op = & f2fs_dir_inode_operations ;
inode - > i_fop = & f2fs_dir_operations ;
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
2015-02-07 12:36:15 +03:00
mapping_set_gfp_mask ( inode - > i_mapping , GFP_F2FS_HIGH_ZERO ) ;
2012-11-02 12:11:10 +04:00
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INC_LINK ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out_fail ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
alloc_nid_done ( sbi , inode - > i_ino ) ;
d_instantiate ( dentry , inode ) ;
unlock_new_inode ( inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out_fail :
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_INC_LINK ) ;
2014-09-25 22:55:53 +04:00
handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
static int f2fs_rmdir ( struct inode * dir , struct dentry * dentry )
{
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2012-11-02 12:11:10 +04:00
if ( f2fs_empty_dir ( inode ) )
return f2fs_unlink ( dir , dentry ) ;
return - ENOTEMPTY ;
}
static int f2fs_mknod ( struct inode * dir , struct dentry * dentry ,
umode_t mode , dev_t rdev )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
int err = 0 ;
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
2012-11-02 12:11:10 +04:00
inode = f2fs_new_inode ( dir , mode ) ;
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
init_special_inode ( inode , inode - > i_mode , rdev ) ;
inode - > i_op = & f2fs_special_inode_operations ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
alloc_nid_done ( sbi , inode - > i_ino ) ;
2014-11-10 09:15:31 +03:00
2012-11-02 12:11:10 +04:00
d_instantiate ( dentry , inode ) ;
unlock_new_inode ( inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out :
2014-09-25 22:55:53 +04:00
handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
static int __f2fs_tmpfile ( struct inode * dir , struct dentry * dentry ,
umode_t mode , struct inode * * whiteout )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
struct inode * inode ;
int err ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( dir ) ;
if ( err )
return err ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
inode = f2fs_new_inode ( dir , mode ) ;
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
if ( whiteout ) {
init_special_inode ( inode , inode - > i_mode , WHITEOUT_DEV ) ;
inode - > i_op = & f2fs_special_inode_operations ;
} else {
inode - > i_op = & f2fs_file_inode_operations ;
inode - > i_fop = & f2fs_file_operations ;
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
}
f2fs_lock_op ( sbi ) ;
err = acquire_orphan_inode ( sbi ) ;
if ( err )
goto out ;
err = f2fs_do_tmpfile ( inode , dir ) ;
if ( err )
goto release_out ;
/*
* add this non - linked tmpfile to orphan list , in this way we could
* remove all unused data of tmpfile after abnormal power - off .
*/
2016-06-14 04:27:02 +03:00
add_orphan_inode ( inode ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
alloc_nid_done ( sbi , inode - > i_ino ) ;
if ( whiteout ) {
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( inode , false ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
* whiteout = inode ;
} else {
d_tmpfile ( dentry , inode ) ;
}
2016-05-20 19:43:20 +03:00
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op ( sbi ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
unlock_new_inode ( inode ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
return 0 ;
release_out :
release_orphan_inode ( sbi ) ;
out :
handle_failed_inode ( inode ) ;
return err ;
}
static int f2fs_tmpfile ( struct inode * dir , struct dentry * dentry , umode_t mode )
{
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( F2FS_I_SB ( dir ) ) ) )
return - EIO ;
2015-05-20 02:11:40 +03:00
if ( f2fs_encrypted_inode ( dir ) ) {
2015-05-16 02:26:10 +03:00
int err = fscrypt_get_encryption_info ( dir ) ;
2015-05-20 02:11:40 +03:00
if ( err )
return err ;
}
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
return __f2fs_tmpfile ( dir , dentry , mode , NULL ) ;
}
static int f2fs_create_whiteout ( struct inode * dir , struct inode * * whiteout )
{
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( F2FS_I_SB ( dir ) ) ) )
return - EIO ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
return __f2fs_tmpfile ( dir , NULL , S_IFCHR | WHITEOUT_MODE , whiteout ) ;
}
2012-11-02 12:11:10 +04:00
static int f2fs_rename ( struct inode * old_dir , struct dentry * old_dentry ,
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
struct inode * new_dir , struct dentry * new_dentry ,
unsigned int flags )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( old_dir ) ;
2015-03-18 01:25:59 +03:00
struct inode * old_inode = d_inode ( old_dentry ) ;
struct inode * new_inode = d_inode ( new_dentry ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
struct inode * whiteout = NULL ;
2012-11-02 12:11:10 +04:00
struct page * old_dir_page ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
struct page * old_page , * new_page = NULL ;
2012-11-02 12:11:10 +04:00
struct f2fs_dir_entry * old_dir_entry = NULL ;
struct f2fs_dir_entry * old_entry ;
struct f2fs_dir_entry * new_entry ;
f2fs: fix to delete old dirent in converted inline directory in ->rename
When doing test with fstests/generic/068 in inline_dentry enabled f2fs,
following oops dmesg will be reported:
------------[ cut here ]------------
WARNING: CPU: 5 PID: 11841 at fs/inode.c:273 drop_nlink+0x49/0x50()
Modules linked in: f2fs(O) ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state
CPU: 5 PID: 11841 Comm: fsstress Tainted: G O 4.5.0-rc1 #45
Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.61 05/16/2013
0000000000000111 ffff88009cdf7ae8 ffffffff813e5944 0000000000002e41
0000000000000000 0000000000000111 0000000000000000 ffff88009cdf7b28
ffffffff8106a587 ffff88009cdf7b58 ffff8804078fe180 ffff880374a64e00
Call Trace:
[<ffffffff813e5944>] dump_stack+0x48/0x64
[<ffffffff8106a587>] warn_slowpath_common+0x97/0xe0
[<ffffffff8106a5ea>] warn_slowpath_null+0x1a/0x20
[<ffffffff81231039>] drop_nlink+0x49/0x50
[<ffffffffa07b95b4>] f2fs_rename2+0xe04/0x10c0 [f2fs]
[<ffffffff81231ff1>] ? lock_two_nondirectories+0x81/0x90
[<ffffffff813f454d>] ? lockref_get+0x1d/0x30
[<ffffffff81220f70>] vfs_rename+0x2e0/0x640
[<ffffffff8121f9db>] ? lookup_dcache+0x3b/0xd0
[<ffffffff810b8e41>] ? update_fast_ctr+0x21/0x40
[<ffffffff8134ff12>] ? security_path_rename+0xa2/0xd0
[<ffffffff81224af6>] SYSC_renameat2+0x4b6/0x540
[<ffffffff810ba8ed>] ? trace_hardirqs_off+0xd/0x10
[<ffffffff810022ba>] ? exit_to_usermode_loop+0x7a/0xd0
[<ffffffff817e0ade>] ? int_ret_from_sys_call+0x52/0x9f
[<ffffffff810bdc90>] ? trace_hardirqs_on_caller+0x100/0x1c0
[<ffffffff81224b8e>] SyS_renameat2+0xe/0x10
[<ffffffff8121f08e>] SyS_rename+0x1e/0x20
[<ffffffff817e0957>] entry_SYSCALL_64_fastpath+0x12/0x6f
---[ end trace 2b31e17995404e42 ]---
This is because: in the same inline directory, when we renaming one file
from source name to target name which is not existed, once space of inline
dentry is not enough, inline conversion will be triggered, after that all
data in inline dentry will be moved to normal dentry page.
After attaching the new entry in coverted dentry page, still we try to
remove old entry in original inline dentry, since old entry has been
moved, so it obviously doesn't make any effect, result in remaining old
entry in converted dentry page.
Now, we have two valid dentries pointed to the same inode which has nlink
value of 1, deleting them both, above warning appears.
This issue can be reproduced easily as below steps:
1. mount f2fs with inline_dentry option
2. mkdir dir
3. touch 180 files named [001-180] in dir
4. rename dir/180 dir/181
5. rm dir/180 dir/181
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-17 11:47:05 +03:00
bool is_old_inline = f2fs_has_inline_dentry ( old_dir ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err = - ENOENT ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2016-12-29 04:31:15 +03:00
if ( ( f2fs_encrypted_inode ( old_dir ) & &
! fscrypt_has_encryption_key ( old_dir ) ) | |
( f2fs_encrypted_inode ( new_dir ) & &
! fscrypt_has_encryption_key ( new_dir ) ) )
return - ENOKEY ;
2015-04-22 06:39:58 +03:00
if ( ( old_dir ! = new_dir ) & & f2fs_encrypted_inode ( new_dir ) & &
2015-05-16 02:26:10 +03:00
! fscrypt_has_permitted_context ( new_dir , old_inode ) ) {
2015-04-22 06:39:58 +03:00
err = - EPERM ;
goto out ;
}
2017-07-25 19:01:41 +03:00
if ( is_inode_flag_set ( new_dir , FI_PROJ_INHERIT ) & &
( ! projid_eq ( F2FS_I ( new_dir ) - > i_projid ,
F2FS_I ( old_dentry - > d_inode ) - > i_projid ) ) )
return - EXDEV ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( old_dir ) ;
if ( err )
goto out ;
err = dquot_initialize ( new_dir ) ;
if ( err )
goto out ;
2017-10-24 00:50:15 +03:00
if ( new_inode ) {
err = dquot_initialize ( new_inode ) ;
if ( err )
goto out ;
}
2012-11-02 12:11:10 +04:00
old_entry = f2fs_find_entry ( old_dir , & old_dentry - > d_name , & old_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! old_entry ) {
if ( IS_ERR ( old_page ) )
err = PTR_ERR ( old_page ) ;
2012-11-02 12:11:10 +04:00
goto out ;
2016-07-19 03:27:47 +03:00
}
2012-11-02 12:11:10 +04:00
if ( S_ISDIR ( old_inode - > i_mode ) ) {
old_dir_entry = f2fs_parent_dir ( old_inode , & old_dir_page ) ;
2016-06-10 00:57:19 +03:00
if ( ! old_dir_entry ) {
2016-07-19 03:27:47 +03:00
if ( IS_ERR ( old_dir_page ) )
err = PTR_ERR ( old_dir_page ) ;
2012-11-02 12:11:10 +04:00
goto out_old ;
2016-06-10 00:57:19 +03:00
}
2012-11-02 12:11:10 +04:00
}
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( flags & RENAME_WHITEOUT ) {
err = f2fs_create_whiteout ( old_dir , & whiteout ) ;
if ( err )
goto out_dir ;
}
2012-11-02 12:11:10 +04:00
if ( new_inode ) {
err = - ENOTEMPTY ;
if ( old_dir_entry & & ! f2fs_empty_dir ( new_inode ) )
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
goto out_whiteout ;
2012-11-02 12:11:10 +04:00
err = - ENOENT ;
new_entry = f2fs_find_entry ( new_dir , & new_dentry - > d_name ,
& new_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! new_entry ) {
if ( IS_ERR ( new_page ) )
err = PTR_ERR ( new_page ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
goto out_whiteout ;
2016-07-19 03:27:47 +03:00
}
2012-11-02 12:11:10 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2014-06-24 10:16:24 +04:00
f2fs_lock_op ( sbi ) ;
2013-07-30 06:36:53 +04:00
err = acquire_orphan_inode ( sbi ) ;
if ( err )
goto put_out_dir ;
2012-11-02 12:11:10 +04:00
f2fs_set_link ( new_dir , new_entry , new_page , old_inode ) ;
2016-09-14 17:48:04 +03:00
new_inode - > i_ctime = current_time ( new_inode ) ;
2014-03-20 14:10:08 +04:00
down_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2012-11-02 12:11:10 +04:00
if ( old_dir_entry )
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( new_inode , false ) ;
f2fs_i_links_write ( new_inode , false ) ;
2014-03-20 14:10:08 +04:00
up_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2012-11-02 12:11:10 +04:00
if ( ! new_inode - > i_nlink )
2016-06-14 04:27:02 +03:00
add_orphan_inode ( new_inode ) ;
2013-07-30 06:36:53 +04:00
else
release_orphan_inode ( sbi ) ;
2012-11-02 12:11:10 +04:00
} else {
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2014-06-24 10:16:24 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( new_dentry , old_inode ) ;
2014-06-24 10:16:24 +04:00
if ( err ) {
f2fs_unlock_op ( sbi ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
goto out_whiteout ;
2014-06-24 10:16:24 +04:00
}
2012-11-02 12:11:10 +04:00
2016-05-21 02:32:49 +03:00
if ( old_dir_entry )
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( new_dir , true ) ;
f2fs: fix to delete old dirent in converted inline directory in ->rename
When doing test with fstests/generic/068 in inline_dentry enabled f2fs,
following oops dmesg will be reported:
------------[ cut here ]------------
WARNING: CPU: 5 PID: 11841 at fs/inode.c:273 drop_nlink+0x49/0x50()
Modules linked in: f2fs(O) ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state
CPU: 5 PID: 11841 Comm: fsstress Tainted: G O 4.5.0-rc1 #45
Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.61 05/16/2013
0000000000000111 ffff88009cdf7ae8 ffffffff813e5944 0000000000002e41
0000000000000000 0000000000000111 0000000000000000 ffff88009cdf7b28
ffffffff8106a587 ffff88009cdf7b58 ffff8804078fe180 ffff880374a64e00
Call Trace:
[<ffffffff813e5944>] dump_stack+0x48/0x64
[<ffffffff8106a587>] warn_slowpath_common+0x97/0xe0
[<ffffffff8106a5ea>] warn_slowpath_null+0x1a/0x20
[<ffffffff81231039>] drop_nlink+0x49/0x50
[<ffffffffa07b95b4>] f2fs_rename2+0xe04/0x10c0 [f2fs]
[<ffffffff81231ff1>] ? lock_two_nondirectories+0x81/0x90
[<ffffffff813f454d>] ? lockref_get+0x1d/0x30
[<ffffffff81220f70>] vfs_rename+0x2e0/0x640
[<ffffffff8121f9db>] ? lookup_dcache+0x3b/0xd0
[<ffffffff810b8e41>] ? update_fast_ctr+0x21/0x40
[<ffffffff8134ff12>] ? security_path_rename+0xa2/0xd0
[<ffffffff81224af6>] SYSC_renameat2+0x4b6/0x540
[<ffffffff810ba8ed>] ? trace_hardirqs_off+0xd/0x10
[<ffffffff810022ba>] ? exit_to_usermode_loop+0x7a/0xd0
[<ffffffff817e0ade>] ? int_ret_from_sys_call+0x52/0x9f
[<ffffffff810bdc90>] ? trace_hardirqs_on_caller+0x100/0x1c0
[<ffffffff81224b8e>] SyS_renameat2+0xe/0x10
[<ffffffff8121f08e>] SyS_rename+0x1e/0x20
[<ffffffff817e0957>] entry_SYSCALL_64_fastpath+0x12/0x6f
---[ end trace 2b31e17995404e42 ]---
This is because: in the same inline directory, when we renaming one file
from source name to target name which is not existed, once space of inline
dentry is not enough, inline conversion will be triggered, after that all
data in inline dentry will be moved to normal dentry page.
After attaching the new entry in coverted dentry page, still we try to
remove old entry in original inline dentry, since old entry has been
moved, so it obviously doesn't make any effect, result in remaining old
entry in converted dentry page.
Now, we have two valid dentries pointed to the same inode which has nlink
value of 1, deleting them both, above warning appears.
This issue can be reproduced easily as below steps:
1. mount f2fs with inline_dentry option
2. mkdir dir
3. touch 180 files named [001-180] in dir
4. rename dir/180 dir/181
5. rm dir/180 dir/181
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-17 11:47:05 +03:00
/*
* old entry and new entry can locate in the same inline
* dentry in inode , when attaching new entry in inline dentry ,
* it could force inline dentry conversion , after that ,
* old_entry and old_page will point to wrong address , in
* order to avoid this , let ' s do the check and update here .
*/
if ( is_old_inline & & ! f2fs_has_inline_dentry ( old_dir ) ) {
f2fs_put_page ( old_page , 0 ) ;
old_page = NULL ;
old_entry = f2fs_find_entry ( old_dir ,
& old_dentry - > d_name , & old_page ) ;
if ( ! old_entry ) {
2016-07-19 03:27:47 +03:00
err = - ENOENT ;
if ( IS_ERR ( old_page ) )
err = PTR_ERR ( old_page ) ;
f2fs: fix to delete old dirent in converted inline directory in ->rename
When doing test with fstests/generic/068 in inline_dentry enabled f2fs,
following oops dmesg will be reported:
------------[ cut here ]------------
WARNING: CPU: 5 PID: 11841 at fs/inode.c:273 drop_nlink+0x49/0x50()
Modules linked in: f2fs(O) ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state
CPU: 5 PID: 11841 Comm: fsstress Tainted: G O 4.5.0-rc1 #45
Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.61 05/16/2013
0000000000000111 ffff88009cdf7ae8 ffffffff813e5944 0000000000002e41
0000000000000000 0000000000000111 0000000000000000 ffff88009cdf7b28
ffffffff8106a587 ffff88009cdf7b58 ffff8804078fe180 ffff880374a64e00
Call Trace:
[<ffffffff813e5944>] dump_stack+0x48/0x64
[<ffffffff8106a587>] warn_slowpath_common+0x97/0xe0
[<ffffffff8106a5ea>] warn_slowpath_null+0x1a/0x20
[<ffffffff81231039>] drop_nlink+0x49/0x50
[<ffffffffa07b95b4>] f2fs_rename2+0xe04/0x10c0 [f2fs]
[<ffffffff81231ff1>] ? lock_two_nondirectories+0x81/0x90
[<ffffffff813f454d>] ? lockref_get+0x1d/0x30
[<ffffffff81220f70>] vfs_rename+0x2e0/0x640
[<ffffffff8121f9db>] ? lookup_dcache+0x3b/0xd0
[<ffffffff810b8e41>] ? update_fast_ctr+0x21/0x40
[<ffffffff8134ff12>] ? security_path_rename+0xa2/0xd0
[<ffffffff81224af6>] SYSC_renameat2+0x4b6/0x540
[<ffffffff810ba8ed>] ? trace_hardirqs_off+0xd/0x10
[<ffffffff810022ba>] ? exit_to_usermode_loop+0x7a/0xd0
[<ffffffff817e0ade>] ? int_ret_from_sys_call+0x52/0x9f
[<ffffffff810bdc90>] ? trace_hardirqs_on_caller+0x100/0x1c0
[<ffffffff81224b8e>] SyS_renameat2+0xe/0x10
[<ffffffff8121f08e>] SyS_rename+0x1e/0x20
[<ffffffff817e0957>] entry_SYSCALL_64_fastpath+0x12/0x6f
---[ end trace 2b31e17995404e42 ]---
This is because: in the same inline directory, when we renaming one file
from source name to target name which is not existed, once space of inline
dentry is not enough, inline conversion will be triggered, after that all
data in inline dentry will be moved to normal dentry page.
After attaching the new entry in coverted dentry page, still we try to
remove old entry in original inline dentry, since old entry has been
moved, so it obviously doesn't make any effect, result in remaining old
entry in converted dentry page.
Now, we have two valid dentries pointed to the same inode which has nlink
value of 1, deleting them both, above warning appears.
This issue can be reproduced easily as below steps:
1. mount f2fs with inline_dentry option
2. mkdir dir
3. touch 180 files named [001-180] in dir
4. rename dir/180 dir/181
5. rm dir/180 dir/181
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-17 11:47:05 +03:00
f2fs_unlock_op ( sbi ) ;
goto out_whiteout ;
}
}
2012-11-02 12:11:10 +04:00
}
f2fs: do checkpoint for the renamed inode
If an inode is renamed, it should be registered as file_lost_pino to conduct
checkpoint at f2fs_sync_file.
Otherwise, the inode cannot be recovered due to no dent_mark in the following
scenario.
Note that, this scenario is from xfstests/322.
1. create "a"
2. fsync "a"
3. rename "a" to "b"
4. fsync "b"
5. Sudden power-cut
After recovery is done, "b" should be seen.
However, the result shows "a", since the recovery procedure does not enter
recover_dentry due to no dent_mark.
The reason is like below.
- The nid of "a" is checkpointed during #2, f2fs_sync_file.
- The inode page for "b" produced by #3 is written without dent_mark by
sync_node_pages.
So, this patch fixes this bug by assinging file_lost_pino to the "a"'s inode.
If the pino is lost, f2fs_sync_file conducts checkpoint, and then recovers
the latest pino and its dentry information for further recovery.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-06-30 13:09:55 +04:00
down_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
2017-06-26 05:41:36 +03:00
if ( ! old_dir_entry | | whiteout )
file_lost_pino ( old_inode ) ;
else
F2FS_I ( old_inode ) - > i_pino = new_dir - > i_ino ;
f2fs: do checkpoint for the renamed inode
If an inode is renamed, it should be registered as file_lost_pino to conduct
checkpoint at f2fs_sync_file.
Otherwise, the inode cannot be recovered due to no dent_mark in the following
scenario.
Note that, this scenario is from xfstests/322.
1. create "a"
2. fsync "a"
3. rename "a" to "b"
4. fsync "b"
5. Sudden power-cut
After recovery is done, "b" should be seen.
However, the result shows "a", since the recovery procedure does not enter
recover_dentry due to no dent_mark.
The reason is like below.
- The nid of "a" is checkpointed during #2, f2fs_sync_file.
- The inode page for "b" produced by #3 is written without dent_mark by
sync_node_pages.
So, this patch fixes this bug by assinging file_lost_pino to the "a"'s inode.
If the pino is lost, f2fs_sync_file conducts checkpoint, and then recovers
the latest pino and its dentry information for further recovery.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-06-30 13:09:55 +04:00
up_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
2016-09-14 17:48:04 +03:00
old_inode - > i_ctime = current_time ( old_inode ) ;
2016-10-14 21:51:23 +03:00
f2fs_mark_inode_dirty_sync ( old_inode , false ) ;
2012-11-02 12:11:10 +04:00
2014-09-24 14:17:04 +04:00
f2fs_delete_entry ( old_entry , old_page , old_dir , NULL ) ;
2012-11-02 12:11:10 +04:00
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( whiteout ) {
whiteout - > i_state | = I_LINKABLE ;
2016-05-20 20:13:22 +03:00
set_inode_flag ( whiteout , FI_INC_LINK ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
err = f2fs_add_link ( old_dentry , whiteout ) ;
if ( err )
goto put_out_dir ;
whiteout - > i_state & = ~ I_LINKABLE ;
iput ( whiteout ) ;
}
2012-11-02 12:11:10 +04:00
if ( old_dir_entry ) {
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( old_dir ! = new_dir & & ! whiteout ) {
2012-11-02 12:11:10 +04:00
f2fs_set_link ( old_inode , old_dir_entry ,
old_dir_page , new_dir ) ;
} else {
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( old_inode , old_dir_page ) ;
2012-11-02 12:11:10 +04:00
f2fs_put_page ( old_dir_page , 0 ) ;
}
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( old_dir , false ) ;
2012-11-02 12:11:10 +04:00
}
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( old_dir ) | | IS_DIRSYNC ( new_dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
2013-07-30 06:36:53 +04:00
put_out_dir :
2014-06-24 10:16:24 +04:00
f2fs_unlock_op ( sbi ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( new_page ) {
f2fs_dentry_kunmap ( new_dir , new_page ) ;
f2fs_put_page ( new_page , 0 ) ;
}
out_whiteout :
if ( whiteout )
iput ( whiteout ) ;
2012-11-02 12:11:10 +04:00
out_dir :
if ( old_dir_entry ) {
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( old_inode , old_dir_page ) ;
2012-11-02 12:11:10 +04:00
f2fs_put_page ( old_dir_page , 0 ) ;
}
out_old :
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( old_dir , old_page ) ;
2012-11-02 12:11:10 +04:00
f2fs_put_page ( old_page , 0 ) ;
out :
return err ;
}
2014-07-12 15:13:54 +04:00
static int f2fs_cross_rename ( struct inode * old_dir , struct dentry * old_dentry ,
struct inode * new_dir , struct dentry * new_dentry )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( old_dir ) ;
2015-03-18 01:25:59 +03:00
struct inode * old_inode = d_inode ( old_dentry ) ;
struct inode * new_inode = d_inode ( new_dentry ) ;
2014-07-12 15:13:54 +04:00
struct page * old_dir_page , * new_dir_page ;
struct page * old_page , * new_page ;
struct f2fs_dir_entry * old_dir_entry = NULL , * new_dir_entry = NULL ;
struct f2fs_dir_entry * old_entry , * new_entry ;
int old_nlink = 0 , new_nlink = 0 ;
int err = - ENOENT ;
2016-12-29 04:31:15 +03:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2016-12-29 04:31:15 +03:00
if ( ( f2fs_encrypted_inode ( old_dir ) & &
! fscrypt_has_encryption_key ( old_dir ) ) | |
( f2fs_encrypted_inode ( new_dir ) & &
! fscrypt_has_encryption_key ( new_dir ) ) )
return - ENOKEY ;
2014-07-12 15:13:54 +04:00
2015-05-25 13:07:02 +03:00
if ( ( f2fs_encrypted_inode ( old_dir ) | | f2fs_encrypted_inode ( new_dir ) ) & &
2015-05-16 02:26:10 +03:00
( old_dir ! = new_dir ) & &
( ! fscrypt_has_permitted_context ( new_dir , old_inode ) | |
! fscrypt_has_permitted_context ( old_dir , new_inode ) ) )
2015-05-25 13:07:02 +03:00
return - EPERM ;
2017-07-25 19:01:41 +03:00
if ( ( is_inode_flag_set ( new_dir , FI_PROJ_INHERIT ) & &
! projid_eq ( F2FS_I ( new_dir ) - > i_projid ,
F2FS_I ( old_dentry - > d_inode ) - > i_projid ) ) | |
( is_inode_flag_set ( new_dir , FI_PROJ_INHERIT ) & &
! projid_eq ( F2FS_I ( old_dir ) - > i_projid ,
F2FS_I ( new_dentry - > d_inode ) - > i_projid ) ) )
return - EXDEV ;
2017-07-08 19:13:07 +03:00
err = dquot_initialize ( old_dir ) ;
if ( err )
goto out ;
err = dquot_initialize ( new_dir ) ;
if ( err )
goto out ;
2014-07-12 15:13:54 +04:00
old_entry = f2fs_find_entry ( old_dir , & old_dentry - > d_name , & old_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! old_entry ) {
if ( IS_ERR ( old_page ) )
err = PTR_ERR ( old_page ) ;
2014-07-12 15:13:54 +04:00
goto out ;
2016-07-19 03:27:47 +03:00
}
2014-07-12 15:13:54 +04:00
new_entry = f2fs_find_entry ( new_dir , & new_dentry - > d_name , & new_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! new_entry ) {
if ( IS_ERR ( new_page ) )
err = PTR_ERR ( new_page ) ;
2014-07-12 15:13:54 +04:00
goto out_old ;
2016-07-19 03:27:47 +03:00
}
2014-07-12 15:13:54 +04:00
/* prepare for updating ".." directory entry info later */
if ( old_dir ! = new_dir ) {
if ( S_ISDIR ( old_inode - > i_mode ) ) {
old_dir_entry = f2fs_parent_dir ( old_inode ,
& old_dir_page ) ;
2016-06-10 00:57:19 +03:00
if ( ! old_dir_entry ) {
2016-07-19 03:27:47 +03:00
if ( IS_ERR ( old_dir_page ) )
err = PTR_ERR ( old_dir_page ) ;
2014-07-12 15:13:54 +04:00
goto out_new ;
2016-06-10 00:57:19 +03:00
}
2014-07-12 15:13:54 +04:00
}
if ( S_ISDIR ( new_inode - > i_mode ) ) {
new_dir_entry = f2fs_parent_dir ( new_inode ,
& new_dir_page ) ;
2016-06-10 00:57:19 +03:00
if ( ! new_dir_entry ) {
2016-07-19 03:27:47 +03:00
if ( IS_ERR ( new_dir_page ) )
err = PTR_ERR ( new_dir_page ) ;
2014-07-12 15:13:54 +04:00
goto out_old_dir ;
2016-06-10 00:57:19 +03:00
}
2014-07-12 15:13:54 +04:00
}
}
/*
* If cross rename between file and directory those are not
* in the same directory , we will inc nlink of file ' s parent
* later , so we should check upper boundary of its nlink .
*/
if ( ( ! old_dir_entry | | ! new_dir_entry ) & &
old_dir_entry ! = new_dir_entry ) {
old_nlink = old_dir_entry ? - 1 : 1 ;
new_nlink = - old_nlink ;
err = - EMLINK ;
2017-03-04 16:48:28 +03:00
if ( ( old_nlink > 0 & & old_dir - > i_nlink > = F2FS_LINK_MAX ) | |
( new_nlink > 0 & & new_dir - > i_nlink > = F2FS_LINK_MAX ) )
2014-07-12 15:13:54 +04:00
goto out_new_dir ;
}
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2014-07-12 15:13:54 +04:00
f2fs_lock_op ( sbi ) ;
/* update ".." directory entry info of old dentry */
if ( old_dir_entry )
f2fs_set_link ( old_inode , old_dir_entry , old_dir_page , new_dir ) ;
/* update ".." directory entry info of new dentry */
if ( new_dir_entry )
f2fs_set_link ( new_inode , new_dir_entry , new_dir_page , old_dir ) ;
/* update directory entry info of old dir inode */
f2fs_set_link ( old_dir , old_entry , old_page , new_inode ) ;
down_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
file_lost_pino ( old_inode ) ;
up_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
2016-09-14 17:48:04 +03:00
old_dir - > i_ctime = current_time ( old_dir ) ;
2014-07-12 15:13:54 +04:00
if ( old_nlink ) {
down_write ( & F2FS_I ( old_dir ) - > i_sem ) ;
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( old_dir , old_nlink > 0 ) ;
2014-07-12 15:13:54 +04:00
up_write ( & F2FS_I ( old_dir ) - > i_sem ) ;
}
2016-10-14 21:51:23 +03:00
f2fs_mark_inode_dirty_sync ( old_dir , false ) ;
2014-07-12 15:13:54 +04:00
/* update directory entry info of new dir inode */
f2fs_set_link ( new_dir , new_entry , new_page , old_inode ) ;
down_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
file_lost_pino ( new_inode ) ;
up_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2016-09-14 17:48:04 +03:00
new_dir - > i_ctime = current_time ( new_dir ) ;
2014-07-12 15:13:54 +04:00
if ( new_nlink ) {
down_write ( & F2FS_I ( new_dir ) - > i_sem ) ;
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( new_dir , new_nlink > 0 ) ;
2014-07-12 15:13:54 +04:00
up_write ( & F2FS_I ( new_dir ) - > i_sem ) ;
}
2016-10-14 21:51:23 +03:00
f2fs_mark_inode_dirty_sync ( new_dir , false ) ;
2014-07-12 15:13:54 +04:00
f2fs_unlock_op ( sbi ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( old_dir ) | | IS_DIRSYNC ( new_dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2014-07-12 15:13:54 +04:00
return 0 ;
out_new_dir :
if ( new_dir_entry ) {
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( new_inode , new_dir_page ) ;
2014-07-12 15:13:54 +04:00
f2fs_put_page ( new_dir_page , 0 ) ;
}
out_old_dir :
if ( old_dir_entry ) {
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( old_inode , old_dir_page ) ;
2014-07-12 15:13:54 +04:00
f2fs_put_page ( old_dir_page , 0 ) ;
}
out_new :
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( new_dir , new_page ) ;
2014-07-12 15:13:54 +04:00
f2fs_put_page ( new_page , 0 ) ;
out_old :
2014-11-22 03:36:28 +03:00
f2fs_dentry_kunmap ( old_dir , old_page ) ;
2014-07-12 15:13:54 +04:00
f2fs_put_page ( old_page , 0 ) ;
out :
return err ;
}
static int f2fs_rename2 ( struct inode * old_dir , struct dentry * old_dentry ,
struct inode * new_dir , struct dentry * new_dentry ,
unsigned int flags )
{
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( flags & ~ ( RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT ) )
2014-07-12 15:13:54 +04:00
return - EINVAL ;
if ( flags & RENAME_EXCHANGE ) {
return f2fs_cross_rename ( old_dir , old_dentry ,
new_dir , new_dentry ) ;
}
/*
* VFS has already handled the new dentry existence case ,
* here , we just deal with " RENAME_NOREPLACE " as regular rename .
*/
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
return f2fs_rename ( old_dir , old_dentry , new_dir , new_dentry , flags ) ;
2014-07-12 15:13:54 +04:00
}
2015-11-17 18:20:54 +03:00
static const char * f2fs_encrypted_get_link ( struct dentry * dentry ,
2015-12-29 23:58:39 +03:00
struct inode * inode ,
struct delayed_call * done )
2014-06-19 12:23:19 +04:00
{
2015-04-30 01:10:53 +03:00
struct page * cpage = NULL ;
char * caddr , * paddr = NULL ;
2015-05-16 02:26:10 +03:00
struct fscrypt_str cstr = FSTR_INIT ( NULL , 0 ) ;
struct fscrypt_str pstr = FSTR_INIT ( NULL , 0 ) ;
struct fscrypt_symlink_data * sd ;
2015-04-30 01:10:53 +03:00
u32 max_size = inode - > i_sb - > s_blocksize ;
int res ;
2015-11-17 18:20:54 +03:00
if ( ! dentry )
return ERR_PTR ( - ECHILD ) ;
2015-05-16 02:26:10 +03:00
res = fscrypt_get_encryption_info ( inode ) ;
2015-04-30 01:10:53 +03:00
if ( res )
return ERR_PTR ( res ) ;
cpage = read_mapping_page ( inode - > i_mapping , 0 , NULL ) ;
if ( IS_ERR ( cpage ) )
2015-06-25 06:38:29 +03:00
return ERR_CAST ( cpage ) ;
2015-11-17 09:07:57 +03:00
caddr = page_address ( cpage ) ;
2015-04-30 01:10:53 +03:00
/* Symlink is encrypted */
2015-05-16 02:26:10 +03:00
sd = ( struct fscrypt_symlink_data * ) caddr ;
2016-02-15 12:54:26 +03:00
cstr . name = sd - > encrypted_path ;
2015-04-30 01:10:53 +03:00
cstr . len = le16_to_cpu ( sd - > len ) ;
2015-12-25 03:13:09 +03:00
/* this is broken symlink case */
if ( unlikely ( cstr . len = = 0 ) ) {
res = - ENOENT ;
goto errout ;
}
2015-04-30 01:10:53 +03:00
2015-05-16 02:26:10 +03:00
if ( ( cstr . len + sizeof ( struct fscrypt_symlink_data ) - 1 ) > max_size ) {
2015-04-30 01:10:53 +03:00
/* Symlink data on the disk is corrupted */
res = - EIO ;
goto errout ;
}
2015-05-16 02:26:10 +03:00
res = fscrypt_fname_alloc_buffer ( inode , cstr . len , & pstr ) ;
2015-04-30 01:10:53 +03:00
if ( res )
goto errout ;
2015-05-16 02:26:10 +03:00
res = fscrypt_fname_disk_to_usr ( inode , 0 , 0 , & cstr , & pstr ) ;
2016-09-16 00:25:55 +03:00
if ( res )
2015-04-30 01:10:53 +03:00
goto errout ;
2016-03-30 23:13:16 +03:00
/* this is broken symlink case */
if ( unlikely ( pstr . name [ 0 ] = = 0 ) ) {
res = - ENOENT ;
goto errout ;
}
2015-04-30 01:10:53 +03:00
paddr = pstr . name ;
/* Null-terminate the name */
2016-09-16 00:25:55 +03:00
paddr [ pstr . len ] = ' \0 ' ;
2015-04-30 01:10:53 +03:00
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( cpage ) ;
2015-12-29 23:58:39 +03:00
set_delayed_call ( done , kfree_link , paddr ) ;
return paddr ;
2015-04-30 01:10:53 +03:00
errout :
2015-05-16 02:26:10 +03:00
fscrypt_fname_free_buffer ( & pstr ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( cpage ) ;
2015-04-30 01:10:53 +03:00
return ERR_PTR ( res ) ;
2014-06-19 12:23:19 +04:00
}
2015-04-30 01:10:53 +03:00
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
2015-11-17 18:20:54 +03:00
. get_link = f2fs_encrypted_get_link ,
2015-04-30 01:10:53 +03:00
. getattr = f2fs_getattr ,
. setattr = f2fs_setattr ,
2015-12-31 13:20:10 +03:00
# ifdef CONFIG_F2FS_FS_XATTR
2015-04-30 01:10:53 +03:00
. listxattr = f2fs_listxattr ,
2015-12-31 13:20:10 +03:00
# endif
2015-04-30 01:10:53 +03:00
} ;
2012-11-02 12:11:10 +04:00
const struct inode_operations f2fs_dir_inode_operations = {
. create = f2fs_create ,
. lookup = f2fs_lookup ,
. link = f2fs_link ,
. unlink = f2fs_unlink ,
. symlink = f2fs_symlink ,
. mkdir = f2fs_mkdir ,
. rmdir = f2fs_rmdir ,
. mknod = f2fs_mknod ,
2016-09-27 12:03:58 +03:00
. rename = f2fs_rename2 ,
2014-06-19 12:23:19 +04:00
. tmpfile = f2fs_tmpfile ,
2013-06-07 11:33:07 +04:00
. getattr = f2fs_getattr ,
2012-11-02 12:11:10 +04:00
. setattr = f2fs_setattr ,
. get_acl = f2fs_get_acl ,
2013-12-20 17:16:45 +04:00
. set_acl = f2fs_set_acl ,
2012-11-02 12:11:10 +04:00
# ifdef CONFIG_F2FS_FS_XATTR
. listxattr = f2fs_listxattr ,
# endif
} ;
const struct inode_operations f2fs_symlink_inode_operations = {
2015-11-17 18:20:54 +03:00
. get_link = f2fs_get_link ,
2013-06-07 11:33:07 +04:00
. getattr = f2fs_getattr ,
2012-11-02 12:11:10 +04:00
. setattr = f2fs_setattr ,
# ifdef CONFIG_F2FS_FS_XATTR
. listxattr = f2fs_listxattr ,
# endif
} ;
const struct inode_operations f2fs_special_inode_operations = {
2013-06-07 11:33:07 +04:00
. getattr = f2fs_getattr ,
2012-11-02 12:11:10 +04:00
. setattr = f2fs_setattr ,
. get_acl = f2fs_get_acl ,
2013-12-20 17:16:45 +04:00
. set_acl = f2fs_set_acl ,
2012-11-02 12:11:10 +04:00
# ifdef CONFIG_F2FS_FS_XATTR
. listxattr = f2fs_listxattr ,
# endif
} ;