2018-09-12 04:16:07 +03:00
// SPDX-License-Identifier: GPL-2.0
2012-11-29 08:28:09 +04:00
/*
2012-11-02 12:11:10 +04:00
* fs / f2fs / namei . c
*
* Copyright ( c ) 2012 Samsung Electronics Co . , Ltd .
* http : //www.samsung.com/
*/
# include <linux/fs.h>
# include <linux/f2fs_fs.h>
# include <linux/pagemap.h>
# include <linux/sched.h>
# include <linux/ctype.h>
2019-02-25 20:46:45 +03:00
# include <linux/random.h>
2014-06-19 12:23:19 +04:00
# include <linux/dcache.h>
2015-04-15 23:49:55 +03:00
# include <linux/namei.h>
2017-07-08 19:13:07 +03:00
# include <linux/quotaops.h>
2012-11-02 12:11:10 +04:00
# include "f2fs.h"
2013-03-21 10:21:57 +04:00
# include "node.h"
2018-08-21 05:21:43 +03:00
# include "segment.h"
2012-11-02 12:11:10 +04:00
# include "xattr.h"
# include "acl.h"
2013-04-19 20:28:40 +04:00
# include <trace/events/f2fs.h>
2012-11-02 12:11:10 +04:00
2023-03-19 10:58:22 +03:00
static inline bool is_extension_exist ( const unsigned char * s , const char * sub ,
2023-06-06 20:17:47 +03:00
bool tmp_ext , bool tmp_dot )
2012-11-02 12:11:10 +04:00
{
2012-12-27 21:55:46 +04:00
size_t slen = strlen ( s ) ;
size_t sublen = strlen ( sub ) ;
2016-09-05 07:28:27 +03:00
int i ;
2012-11-02 12:11:10 +04:00
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
if ( sublen = = 1 & & * sub = = ' * ' )
2023-03-19 10:58:22 +03:00
return true ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2015-07-06 15:30:40 +03:00
/*
* filename format of multimedia file should be defined as :
2016-09-05 07:28:27 +03:00
* " filename + '.' + extension + (optional: '.' + temp extension) " .
2015-07-06 15:30:40 +03:00
*/
if ( slen < sublen + 2 )
2023-03-19 10:58:22 +03:00
return false ;
2015-07-06 15:30:40 +03:00
2021-05-18 12:54:58 +03:00
if ( ! tmp_ext ) {
/* file has no temp extension */
if ( s [ slen - sublen - 1 ] ! = ' . ' )
2023-03-19 10:58:22 +03:00
return false ;
2021-05-18 12:54:58 +03:00
return ! strncasecmp ( s + slen - sublen , sub , sublen ) ;
}
2016-09-05 07:28:27 +03:00
for ( i = 1 ; i < slen - sublen ; i + + ) {
if ( s [ i ] ! = ' . ' )
continue ;
2023-06-06 20:17:47 +03:00
if ( ! strncasecmp ( s + i + 1 , sub , sublen ) ) {
if ( ! tmp_dot )
return true ;
if ( i = = slen - sublen - 1 | | s [ i + 1 + sublen ] = = ' . ' )
return true ;
}
2016-09-05 07:28:27 +03:00
}
2012-11-02 12:11:10 +04:00
2023-03-19 10:58:22 +03:00
return false ;
2012-11-02 12:11:10 +04:00
}
2023-06-06 20:17:47 +03:00
static inline bool is_temperature_extension ( const unsigned char * s , const char * sub )
{
return is_extension_exist ( s , sub , true , false ) ;
}
static inline bool is_compress_extension ( const unsigned char * s , const char * sub )
{
return is_extension_exist ( s , sub , true , true ) ;
}
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
int f2fs_update_extension_list ( struct f2fs_sb_info * sbi , const char * name ,
2018-02-28 12:07:27 +03:00
bool hot , bool set )
2018-02-26 17:04:13 +03:00
{
__u8 ( * extlist ) [ F2FS_EXTENSION_LEN ] = sbi - > raw_super - > extension_list ;
2018-02-28 12:07:27 +03:00
int cold_count = le32_to_cpu ( sbi - > raw_super - > extension_count ) ;
int hot_count = sbi - > raw_super - > hot_ext_count ;
int total_count = cold_count + hot_count ;
int start , count ;
2018-02-26 17:04:13 +03:00
int i ;
2018-02-28 12:07:27 +03:00
if ( set ) {
if ( total_count = = F2FS_MAX_EXTENSION )
return - EINVAL ;
} else {
if ( ! hot & & ! cold_count )
return - EINVAL ;
if ( hot & & ! hot_count )
return - EINVAL ;
}
if ( hot ) {
start = cold_count ;
count = total_count ;
} else {
start = 0 ;
count = cold_count ;
}
for ( i = start ; i < count ; i + + ) {
2018-02-26 17:04:13 +03:00
if ( strcmp ( name , extlist [ i ] ) )
continue ;
if ( set )
return - EINVAL ;
memcpy ( extlist [ i ] , extlist [ i + 1 ] ,
2018-02-28 12:07:27 +03:00
F2FS_EXTENSION_LEN * ( total_count - i - 1 ) ) ;
memset ( extlist [ total_count - 1 ] , 0 , F2FS_EXTENSION_LEN ) ;
if ( hot )
sbi - > raw_super - > hot_ext_count = hot_count - 1 ;
else
sbi - > raw_super - > extension_count =
cpu_to_le32 ( cold_count - 1 ) ;
2018-02-26 17:04:13 +03:00
return 0 ;
}
if ( ! set )
return - EINVAL ;
2018-02-28 12:07:27 +03:00
if ( hot ) {
2018-07-01 23:57:06 +03:00
memcpy ( extlist [ count ] , name , strlen ( name ) ) ;
2018-02-28 12:07:27 +03:00
sbi - > raw_super - > hot_ext_count = hot_count + 1 ;
} else {
char buf [ F2FS_MAX_EXTENSION ] [ F2FS_EXTENSION_LEN ] ;
memcpy ( buf , & extlist [ cold_count ] ,
F2FS_EXTENSION_LEN * hot_count ) ;
memset ( extlist [ cold_count ] , 0 , F2FS_EXTENSION_LEN ) ;
2018-07-01 23:57:06 +03:00
memcpy ( extlist [ cold_count ] , name , strlen ( name ) ) ;
2018-02-28 12:07:27 +03:00
memcpy ( & extlist [ cold_count + 1 ] , buf ,
F2FS_EXTENSION_LEN * hot_count ) ;
sbi - > raw_super - > extension_count = cpu_to_le32 ( cold_count + 1 ) ;
}
2018-02-26 17:04:13 +03:00
return 0 ;
2012-11-02 12:11:10 +04:00
}
2022-11-11 13:08:29 +03:00
static void set_compress_new_inode ( struct f2fs_sb_info * sbi , struct inode * dir ,
struct inode * inode , const unsigned char * name )
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
{
__u8 ( * extlist ) [ F2FS_EXTENSION_LEN ] = sbi - > raw_super - > extension_list ;
2022-11-11 13:08:29 +03:00
unsigned char ( * noext ) [ F2FS_EXTENSION_LEN ] =
F2FS_OPTION ( sbi ) . noextensions ;
2021-06-08 14:15:08 +03:00
unsigned char ( * ext ) [ F2FS_EXTENSION_LEN ] = F2FS_OPTION ( sbi ) . extensions ;
unsigned char ext_cnt = F2FS_OPTION ( sbi ) . compress_ext_cnt ;
unsigned char noext_cnt = F2FS_OPTION ( sbi ) . nocompress_ext_cnt ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
int i , cold_count , hot_count ;
2022-11-11 13:08:29 +03:00
if ( ! f2fs_sb_has_compression ( sbi ) )
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
return ;
2022-11-11 13:08:29 +03:00
if ( S_ISDIR ( inode - > i_mode ) )
goto inherit_comp ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2022-11-11 13:08:29 +03:00
/* This name comes only from normal files. */
if ( ! name )
return ;
/* Don't compress hot files. */
f2fs_down_read ( & sbi - > sb_lock ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
cold_count = le32_to_cpu ( sbi - > raw_super - > extension_count ) ;
hot_count = sbi - > raw_super - > hot_ext_count ;
2022-11-11 13:08:29 +03:00
for ( i = cold_count ; i < cold_count + hot_count ; i + + )
2023-06-06 20:17:47 +03:00
if ( is_temperature_extension ( name , extlist [ i ] ) )
2022-11-11 13:08:29 +03:00
break ;
f2fs_up_read ( & sbi - > sb_lock ) ;
if ( i < ( cold_count + hot_count ) )
return ;
/* Don't compress unallowed extension. */
for ( i = 0 ; i < noext_cnt ; i + + )
2023-06-06 20:17:47 +03:00
if ( is_compress_extension ( name , noext [ i ] ) )
2022-11-11 13:08:29 +03:00
return ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2022-11-11 13:08:29 +03:00
/* Compress wanting extension. */
for ( i = 0 ; i < ext_cnt ; i + + ) {
2023-06-06 20:17:47 +03:00
if ( is_compress_extension ( name , ext [ i ] ) ) {
2022-11-11 13:08:29 +03:00
set_compress_context ( inode ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
return ;
}
}
2022-11-11 13:08:29 +03:00
inherit_comp :
/* Inherit the {no-}compression flag in directory */
if ( F2FS_I ( dir ) - > i_flags & F2FS_NOCOMP_FL ) {
F2FS_I ( inode ) - > i_flags | = F2FS_NOCOMP_FL ;
f2fs_mark_inode_dirty_sync ( inode , true ) ;
} else if ( F2FS_I ( dir ) - > i_flags & F2FS_COMPR_FL ) {
set_compress_context ( inode ) ;
}
}
2022-11-11 13:08:30 +03:00
/*
* Set file ' s temperature for hot / cold data separation
*/
static void set_file_temperature ( struct f2fs_sb_info * sbi , struct inode * inode ,
const unsigned char * name )
{
__u8 ( * extlist ) [ F2FS_EXTENSION_LEN ] = sbi - > raw_super - > extension_list ;
int i , cold_count , hot_count ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2022-11-11 13:08:30 +03:00
f2fs_down_read ( & sbi - > sb_lock ) ;
cold_count = le32_to_cpu ( sbi - > raw_super - > extension_count ) ;
hot_count = sbi - > raw_super - > hot_ext_count ;
for ( i = 0 ; i < cold_count + hot_count ; i + + )
2023-06-06 20:17:47 +03:00
if ( is_temperature_extension ( name , extlist [ i ] ) )
2022-11-11 13:08:30 +03:00
break ;
2022-01-07 23:48:44 +03:00
f2fs_up_read ( & sbi - > sb_lock ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2022-11-11 13:08:30 +03:00
if ( i = = cold_count + hot_count )
return ;
if ( i < cold_count )
file_set_cold ( inode ) ;
else
file_set_hot ( inode ) ;
}
2023-01-13 14:49:25 +03:00
static struct inode * f2fs_new_inode ( struct mnt_idmap * idmap ,
2022-11-11 13:08:29 +03:00
struct inode * dir , umode_t mode ,
const char * name )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
nid_t ino ;
struct inode * inode ;
bool nid_free = false ;
2020-09-17 07:11:27 +03:00
bool encrypt = false ;
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
int xattr_size = 0 ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2014-06-21 08:44:02 +04:00
inode = new_inode ( dir - > i_sb ) ;
2012-11-02 12:11:10 +04:00
if ( ! inode )
return ERR_PTR ( - ENOMEM ) ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
if ( ! f2fs_alloc_nid ( sbi , & ino ) ) {
2012-11-02 12:11:10 +04:00
err = - ENOSPC ;
goto fail ;
2021-06-08 14:15:08 +03:00
}
2017-07-08 19:13:07 +03:00
nid_free = true ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2023-01-13 14:49:25 +03:00
inode_init_owner ( idmap , inode , dir , mode ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2012-11-02 12:11:10 +04:00
inode - > i_ino = ino ;
inode - > i_blocks = 0 ;
2023-10-04 21:52:21 +03:00
simple_inode_init_ts ( inode ) ;
F2FS_I ( inode ) - > i_crtime = inode_get_mtime ( inode ) ;
2022-10-05 18:43:22 +03:00
inode - > i_generation = get_random_u32 ( ) ;
2012-11-02 12:11:10 +04:00
2018-05-07 15:28:52 +03:00
if ( S_ISDIR ( inode - > i_mode ) )
F2FS_I ( inode ) - > i_current_depth = 1 ;
2012-11-02 12:11:10 +04:00
err = insert_inode_locked ( inode ) ;
if ( err ) {
err = - EINVAL ;
2015-08-16 22:38:15 +03:00
goto fail ;
2012-11-02 12:11:10 +04:00
}
2014-09-24 14:19:10 +04:00
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_project_quota ( sbi ) & &
2018-04-03 10:08:17 +03:00
( F2FS_I ( dir ) - > i_flags & F2FS_PROJINHERIT_FL ) )
2017-07-25 19:01:41 +03:00
F2FS_I ( inode ) - > i_projid = F2FS_I ( dir ) - > i_projid ;
else
2023-01-13 14:49:09 +03:00
F2FS_I ( inode ) - > i_projid = make_kprojid ( & init_user_ns ,
2017-07-25 19:01:41 +03:00
F2FS_DEF_PROJID ) ;
2020-09-17 07:11:27 +03:00
err = fscrypt_prepare_new_inode ( dir , inode , & encrypt ) ;
if ( err )
goto fail_drop ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( inode ) ;
2017-07-08 19:13:07 +03:00
if ( err )
goto fail_drop ;
2018-01-11 05:26:19 +03:00
set_inode_flag ( inode , FI_NEW_INODE ) ;
2020-09-17 07:11:27 +03:00
if ( encrypt )
2015-04-22 06:39:58 +03:00
f2fs_set_encrypted_inode ( inode ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_extra_attr ( sbi ) ) {
2017-07-18 19:19:06 +03:00
set_inode_flag ( inode , FI_EXTRA_ATTR ) ;
F2FS_I ( inode ) - > i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE ;
}
2016-05-20 20:13:22 +03:00
if ( test_opt ( sbi , INLINE_XATTR ) )
set_inode_flag ( inode , FI_INLINE_XATTR ) ;
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
2015-04-23 20:27:21 +03:00
if ( f2fs_may_inline_dentry ( inode ) )
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INLINE_DENTRY ) ;
2014-09-24 14:19:10 +04:00
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_flexible_inline_xattr ( sbi ) ) {
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
f2fs_bug_on ( sbi , ! f2fs_has_extra_attr ( inode ) ) ;
if ( f2fs_has_inline_xattr ( inode ) )
2018-03-08 09:22:56 +03:00
xattr_size = F2FS_OPTION ( sbi ) . inline_xattr_size ;
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
/* Otherwise, will be 0 */
} else if ( f2fs_has_inline_xattr ( inode ) | |
f2fs_has_inline_dentry ( inode ) ) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
}
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
F2FS_I ( inode ) - > i_inline_xattr_size = xattr_size ;
2017-07-25 19:01:41 +03:00
F2FS_I ( inode ) - > i_flags =
f2fs_mask_flags ( mode , F2FS_I ( dir ) - > i_flags & F2FS_FL_INHERITED ) ;
2017-08-30 13:04:47 +03:00
if ( S_ISDIR ( inode - > i_mode ) )
2018-04-03 10:08:17 +03:00
F2FS_I ( inode ) - > i_flags | = F2FS_INDEX_FL ;
2017-08-30 13:04:47 +03:00
2018-04-03 10:08:17 +03:00
if ( F2FS_I ( inode ) - > i_flags & F2FS_PROJINHERIT_FL )
2017-07-25 19:01:41 +03:00
set_inode_flag ( inode , FI_PROJ_INHERIT ) ;
2022-11-11 13:08:29 +03:00
/* Check compression first. */
set_compress_new_inode ( sbi , dir , inode , name ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
2022-06-01 04:27:09 +03:00
/* Should enable inline_data after compression set */
if ( test_opt ( sbi , INLINE_DATA ) & & f2fs_may_inline_data ( inode ) )
set_inode_flag ( inode , FI_INLINE_DATA ) ;
2022-11-11 13:08:30 +03:00
if ( name & & ! test_opt ( sbi , DISABLE_EXT_IDENTIFY ) )
set_file_temperature ( sbi , inode , name ) ;
2022-06-01 04:27:09 +03:00
stat_inc_inline_xattr ( inode ) ;
stat_inc_inline_inode ( inode ) ;
stat_inc_inline_dir ( inode ) ;
2018-10-07 14:06:15 +03:00
f2fs_set_inode_flags ( inode ) ;
2022-12-03 00:51:09 +03:00
f2fs_init_extent_tree ( inode ) ;
2013-04-25 08:24:33 +04:00
trace_f2fs_new_inode ( inode , 0 ) ;
2012-11-02 12:11:10 +04:00
return inode ;
fail :
2013-04-25 08:24:33 +04:00
trace_f2fs_new_inode ( inode , err ) ;
2013-04-30 06:33:27 +04:00
make_bad_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
if ( nid_free )
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_FREE_NID ) ;
2015-06-23 20:36:08 +03:00
iput ( inode ) ;
2012-11-02 12:11:10 +04:00
return ERR_PTR ( err ) ;
2017-07-08 19:13:07 +03:00
fail_drop :
trace_f2fs_new_inode ( inode , err ) ;
dquot_drop ( inode ) ;
inode - > i_flags | = S_NOQUOTA ;
if ( nid_free )
set_inode_flag ( inode , FI_FREE_NID ) ;
clear_nlink ( inode ) ;
unlock_new_inode ( inode ) ;
iput ( inode ) ;
return ERR_PTR ( err ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
}
2023-01-13 14:49:13 +03:00
static int f2fs_create ( struct mnt_idmap * idmap , struct inode * dir ,
2021-01-21 16:19:43 +03:00
struct dentry * dentry , umode_t mode , bool excl )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
nid_t ino = 0 ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2023-01-13 14:49:25 +03:00
inode = f2fs_new_inode ( idmap , dir , mode , dentry - > d_name . name ) ;
2012-11-02 12:11:10 +04:00
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
inode - > i_op = & f2fs_file_inode_operations ;
inode - > i_fop = & f2fs_file_operations ;
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
ino = inode - > i_ino ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_alloc_nid_done ( sbi , ino ) ;
2012-11-02 12:11:10 +04:00
2018-05-04 15:23:01 +03:00
d_instantiate_new ( dentry , inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out :
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
static int f2fs_link ( struct dentry * old_dentry , struct inode * dir ,
struct dentry * dentry )
{
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( old_dentry ) ;
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2017-11-29 23:35:29 +03:00
err = fscrypt_prepare_link ( old_dentry , dir , dentry ) ;
if ( err )
return err ;
2015-04-22 06:39:58 +03:00
2017-07-25 19:01:41 +03:00
if ( is_inode_flag_set ( dir , FI_PROJ_INHERIT ) & &
( ! projid_eq ( F2FS_I ( dir ) - > i_projid ,
F2FS_I ( old_dentry - > d_inode ) - > i_projid ) ) )
return - EXDEV ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-12-19 11:25:21 +04:00
2023-07-05 22:01:08 +03:00
inode_set_ctime_current ( inode ) ;
2013-05-22 07:06:26 +04:00
ihold ( inode ) ;
2012-11-02 12:11:10 +04:00
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INC_LINK ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
d_instantiate ( dentry , inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out :
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_INC_LINK ) ;
2012-11-02 12:11:10 +04:00
iput ( inode ) ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
struct dentry * f2fs_get_parent ( struct dentry * child )
{
2016-07-19 03:27:47 +03:00
struct page * page ;
2021-04-16 02:46:50 +03:00
unsigned long ino = f2fs_inode_by_name ( d_inode ( child ) , & dotdot_name , & page ) ;
2021-04-06 04:47:35 +03:00
2016-07-19 03:27:47 +03:00
if ( ! ino ) {
if ( IS_ERR ( page ) )
return ERR_CAST ( page ) ;
2012-11-02 12:11:10 +04:00
return ERR_PTR ( - ENOENT ) ;
2016-07-19 03:27:47 +03:00
}
2016-04-10 08:33:30 +03:00
return d_obtain_alias ( f2fs_iget ( child - > d_sb , ino ) ) ;
2012-11-02 12:11:10 +04:00
}
2015-03-31 01:07:16 +03:00
static int __recover_dot_dentries ( struct inode * dir , nid_t pino )
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
struct qstr dot = QSTR_INIT ( " . " , 1 ) ;
struct f2fs_dir_entry * de ;
struct page * page ;
int err = 0 ;
2015-12-30 12:40:31 +03:00
if ( f2fs_readonly ( sbi - > sb ) ) {
2019-06-18 12:48:42 +03:00
f2fs_info ( sbi , " skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint " ,
dir - > i_ino , pino ) ;
2015-12-30 12:40:31 +03:00
return 0 ;
}
2022-03-28 19:02:53 +03:00
if ( ! S_ISDIR ( dir - > i_mode ) ) {
f2fs_err ( sbi , " inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u) " ,
dir - > i_ino , dir - > i_mode , pino ) ;
set_sbi_flag ( sbi , SBI_NEED_FSCK ) ;
return - ENOTDIR ;
}
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-24 12:12:06 +03:00
if ( err )
return err ;
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-24 13:03:29 +03:00
2015-03-31 01:07:16 +03:00
f2fs_lock_op ( sbi ) ;
de = f2fs_find_entry ( dir , & dot , & page ) ;
if ( de ) {
f2fs_put_page ( page , 0 ) ;
2016-05-26 00:29:11 +03:00
} else if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
goto out ;
2015-03-31 01:07:16 +03:00
} else {
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
err = f2fs_do_add_link ( dir , & dot , NULL , dir - > i_ino , S_IFDIR ) ;
2015-03-31 01:07:16 +03:00
if ( err )
goto out ;
}
2023-11-16 09:25:54 +03:00
de = f2fs_find_entry ( dir , & dotdot_name , & page ) ;
2018-02-28 15:31:52 +03:00
if ( de )
2015-03-31 01:07:16 +03:00
f2fs_put_page ( page , 0 ) ;
2018-02-28 15:31:52 +03:00
else if ( IS_ERR ( page ) )
2016-05-26 00:29:11 +03:00
err = PTR_ERR ( page ) ;
2018-02-28 15:31:52 +03:00
else
2023-11-16 09:25:54 +03:00
err = f2fs_do_add_link ( dir , & dotdot_name , NULL , pino , S_IFDIR ) ;
2015-03-31 01:07:16 +03:00
out :
2016-05-20 19:52:20 +03:00
if ( ! err )
2016-05-20 20:13:22 +03:00
clear_inode_flag ( dir , FI_INLINE_DOTS ) ;
2015-03-31 01:07:16 +03:00
f2fs_unlock_op ( sbi ) ;
return err ;
}
2012-11-02 12:11:10 +04:00
static struct dentry * f2fs_lookup ( struct inode * dir , struct dentry * dentry ,
unsigned int flags )
{
struct inode * inode = NULL ;
struct f2fs_dir_entry * de ;
struct page * page ;
2017-10-17 12:33:41 +03:00
struct dentry * new ;
nid_t ino = - 1 ;
2015-04-22 06:39:58 +03:00
int err = 0 ;
2016-02-26 09:39:23 +03:00
unsigned int root_ino = F2FS_ROOT_INO ( F2FS_I_SB ( dir ) ) ;
f2fs: rework filename handling
Rework f2fs's handling of filenames to use a new 'struct f2fs_filename'.
Similar to 'struct ext4_filename', this stores the usr_fname, disk_name,
dirhash, crypto_buf, and casefolded name. Some of these names can be
NULL in some cases. 'struct f2fs_filename' differs from
'struct fscrypt_name' mainly in that the casefolded name is included.
For user-initiated directory operations like lookup() and create(),
initialize the f2fs_filename by translating the corresponding
fscrypt_name, then computing the dirhash and casefolded name if needed.
This makes the dirhash and casefolded name be cached for each syscall,
so we don't have to recompute them repeatedly. (Previously, f2fs
computed the dirhash once per directory level, and the casefolded name
once per directory block.) This improves performance.
This rework also makes it much easier to correctly handle all
combinations of normal, encrypted, casefolded, and encrypted+casefolded
directories. (The fourth isn't supported yet but is being worked on.)
The only other cases where an f2fs_filename gets initialized are for two
filesystem-internal operations: (1) when converting an inline directory
to a regular one, we grab the needed disk_name and hash from an existing
f2fs_dir_entry; and (2) when roll-forward recovering a new dentry, we
grab the needed disk_name from f2fs_inode::i_name and compute the hash.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-05-07 10:59:04 +03:00
struct f2fs_filename fname ;
2012-11-02 12:11:10 +04:00
2017-10-17 12:33:41 +03:00
trace_f2fs_lookup_start ( dir , dentry , flags ) ;
if ( dentry - > d_name . len > F2FS_NAME_LEN ) {
err = - ENAMETOOLONG ;
goto out ;
}
2012-11-02 12:11:10 +04:00
f2fs: rework filename handling
Rework f2fs's handling of filenames to use a new 'struct f2fs_filename'.
Similar to 'struct ext4_filename', this stores the usr_fname, disk_name,
dirhash, crypto_buf, and casefolded name. Some of these names can be
NULL in some cases. 'struct f2fs_filename' differs from
'struct fscrypt_name' mainly in that the casefolded name is included.
For user-initiated directory operations like lookup() and create(),
initialize the f2fs_filename by translating the corresponding
fscrypt_name, then computing the dirhash and casefolded name if needed.
This makes the dirhash and casefolded name be cached for each syscall,
so we don't have to recompute them repeatedly. (Previously, f2fs
computed the dirhash once per directory level, and the casefolded name
once per directory block.) This improves performance.
This rework also makes it much easier to correctly handle all
combinations of normal, encrypted, casefolded, and encrypted+casefolded
directories. (The fourth isn't supported yet but is being worked on.)
The only other cases where an f2fs_filename gets initialized are for two
filesystem-internal operations: (1) when converting an inline directory
to a regular one, we grab the needed disk_name and hash from an existing
f2fs_dir_entry; and (2) when roll-forward recovering a new dentry, we
grab the needed disk_name from f2fs_inode::i_name and compute the hash.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-05-07 10:59:04 +03:00
err = f2fs_prepare_lookup ( dir , dentry , & fname ) ;
2020-11-19 09:09:03 +03:00
generic_set_encrypted_ci_d_ops ( dentry ) ;
fscrypt: fix race where ->lookup() marks plaintext dentry as ciphertext
->lookup() in an encrypted directory begins as follows:
1. fscrypt_prepare_lookup():
a. Try to load the directory's encryption key.
b. If the key is unavailable, mark the dentry as a ciphertext name
via d_flags.
2. fscrypt_setup_filename():
a. Try to load the directory's encryption key.
b. If the key is available, encrypt the name (treated as a plaintext
name) to get the on-disk name. Otherwise decode the name
(treated as a ciphertext name) to get the on-disk name.
But if the key is concurrently added, it may be found at (2a) but not at
(1a). In this case, the dentry will be wrongly marked as a ciphertext
name even though it was actually treated as plaintext.
This will cause the dentry to be wrongly invalidated on the next lookup,
potentially causing problems. For example, if the racy ->lookup() was
part of sys_mount(), then the new mount will be detached when anything
tries to access it. This is despite the mountpoint having a plaintext
path, which should remain valid now that the key was added.
Of course, this is only possible if there's a userspace race. Still,
the additional kernel-side race is confusing and unexpected.
Close the kernel-side race by changing fscrypt_prepare_lookup() to also
set the on-disk filename (step 2b), consistent with the d_flags update.
Fixes: 28b4c263961c ("ext4 crypto: revalidate dentry after adding or removing the key")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2019-03-20 21:39:13 +03:00
if ( err = = - ENOENT )
goto out_splice ;
if ( err )
goto out ;
de = __f2fs_find_entry ( dir , & fname , & page ) ;
f2fs: rework filename handling
Rework f2fs's handling of filenames to use a new 'struct f2fs_filename'.
Similar to 'struct ext4_filename', this stores the usr_fname, disk_name,
dirhash, crypto_buf, and casefolded name. Some of these names can be
NULL in some cases. 'struct f2fs_filename' differs from
'struct fscrypt_name' mainly in that the casefolded name is included.
For user-initiated directory operations like lookup() and create(),
initialize the f2fs_filename by translating the corresponding
fscrypt_name, then computing the dirhash and casefolded name if needed.
This makes the dirhash and casefolded name be cached for each syscall,
so we don't have to recompute them repeatedly. (Previously, f2fs
computed the dirhash once per directory level, and the casefolded name
once per directory block.) This improves performance.
This rework also makes it much easier to correctly handle all
combinations of normal, encrypted, casefolded, and encrypted+casefolded
directories. (The fourth isn't supported yet but is being worked on.)
The only other cases where an f2fs_filename gets initialized are for two
filesystem-internal operations: (1) when converting an inline directory
to a regular one, we grab the needed disk_name and hash from an existing
f2fs_dir_entry; and (2) when roll-forward recovering a new dentry, we
grab the needed disk_name from f2fs_inode::i_name and compute the hash.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-05-07 10:59:04 +03:00
f2fs_free_filename ( & fname ) ;
fscrypt: fix race where ->lookup() marks plaintext dentry as ciphertext
->lookup() in an encrypted directory begins as follows:
1. fscrypt_prepare_lookup():
a. Try to load the directory's encryption key.
b. If the key is unavailable, mark the dentry as a ciphertext name
via d_flags.
2. fscrypt_setup_filename():
a. Try to load the directory's encryption key.
b. If the key is available, encrypt the name (treated as a plaintext
name) to get the on-disk name. Otherwise decode the name
(treated as a ciphertext name) to get the on-disk name.
But if the key is concurrently added, it may be found at (2a) but not at
(1a). In this case, the dentry will be wrongly marked as a ciphertext
name even though it was actually treated as plaintext.
This will cause the dentry to be wrongly invalidated on the next lookup,
potentially causing problems. For example, if the racy ->lookup() was
part of sys_mount(), then the new mount will be detached when anything
tries to access it. This is despite the mountpoint having a plaintext
path, which should remain valid now that the key was added.
Of course, this is only possible if there's a userspace race. Still,
the additional kernel-side race is confusing and unexpected.
Close the kernel-side race by changing fscrypt_prepare_lookup() to also
set the on-disk filename (step 2b), consistent with the d_flags update.
Fixes: 28b4c263961c ("ext4 crypto: revalidate dentry after adding or removing the key")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2019-03-20 21:39:13 +03:00
2016-05-27 20:10:41 +03:00
if ( ! de ) {
2017-10-17 12:33:41 +03:00
if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
goto out ;
}
2020-05-27 13:27:51 +03:00
err = - ENOENT ;
2017-10-17 12:33:41 +03:00
goto out_splice ;
2016-05-27 20:10:41 +03:00
}
2012-11-02 12:11:10 +04:00
2015-04-22 21:40:27 +03:00
ino = le32_to_cpu ( de - > ino ) ;
f2fs_put_page ( page , 0 ) ;
2015-03-31 01:07:16 +03:00
2015-04-22 21:40:27 +03:00
inode = f2fs_iget ( dir - > i_sb , ino ) ;
2017-10-17 12:33:41 +03:00
if ( IS_ERR ( inode ) ) {
err = PTR_ERR ( inode ) ;
goto out ;
}
2015-03-31 01:07:16 +03:00
2016-02-26 09:39:23 +03:00
if ( ( dir - > i_ino = = root_ino ) & & f2fs_has_inline_dots ( dir ) ) {
err = __recover_dot_dentries ( dir , root_ino ) ;
if ( err )
2017-10-17 12:33:41 +03:00
goto out_iput ;
2016-02-26 09:39:23 +03:00
}
2015-04-22 06:39:58 +03:00
if ( f2fs_has_inline_dots ( inode ) ) {
2015-04-22 21:40:27 +03:00
err = __recover_dot_dentries ( inode , dir - > i_ino ) ;
2015-04-22 06:39:58 +03:00
if ( err )
2017-10-17 12:33:41 +03:00
goto out_iput ;
2012-11-02 12:11:10 +04:00
}
2018-12-12 12:50:11 +03:00
if ( IS_ENCRYPTED ( dir ) & &
2016-12-16 11:18:15 +03:00
( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) ) & &
! fscrypt_has_permitted_context ( dir , inode ) ) {
2019-06-18 12:48:42 +03:00
f2fs_warn ( F2FS_I_SB ( inode ) , " Inconsistent encryption contexts: %lu/%lu " ,
dir - > i_ino , inode - > i_ino ) ;
2017-04-07 20:58:39 +03:00
err = - EPERM ;
2017-10-17 12:33:41 +03:00
goto out_iput ;
2016-02-23 20:21:37 +03:00
}
2017-10-17 12:33:41 +03:00
out_splice :
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
f2fs: Support case-insensitive file name lookups
Modeled after commit b886ee3e778e ("ext4: Support case-insensitive file
name lookups")
"""
This patch implements the actual support for case-insensitive file name
lookups in f2fs, based on the feature bit and the encoding stored in the
superblock.
A filesystem that has the casefold feature set is able to configure
directories with the +F (F2FS_CASEFOLD_FL) attribute, enabling lookups
to succeed in that directory in a case-insensitive fashion, i.e: match
a directory entry even if the name used by userspace is not a byte per
byte match with the disk name, but is an equivalent case-insensitive
version of the Unicode string. This operation is called a
case-insensitive file name lookup.
The feature is configured as an inode attribute applied to directories
and inherited by its children. This attribute can only be enabled on
empty directories for filesystems that support the encoding feature,
thus preventing collision of file names that only differ by case.
* dcache handling:
For a +F directory, F2Fs only stores the first equivalent name dentry
used in the dcache. This is done to prevent unintentional duplication of
dentries in the dcache, while also allowing the VFS code to quickly find
the right entry in the cache despite which equivalent string was used in
a previous lookup, without having to resort to ->lookup().
d_hash() of casefolded directories is implemented as the hash of the
casefolded string, such that we always have a well-known bucket for all
the equivalencies of the same string. d_compare() uses the
utf8_strncasecmp() infrastructure, which handles the comparison of
equivalent, same case, names as well.
For now, negative lookups are not inserted in the dcache, since they
would need to be invalidated anyway, because we can't trust missing file
dentries. This is bad for performance but requires some leveraging of
the vfs layer to fix. We can live without that for now, and so does
everyone else.
* on-disk data:
Despite using a specific version of the name as the internal
representation within the dcache, the name stored and fetched from the
disk is a byte-per-byte match with what the user requested, making this
implementation 'name-preserving'. i.e. no actual information is lost
when writing to storage.
DX is supported by modifying the hashes used in +F directories to make
them case/encoding-aware. The new disk hashes are calculated as the
hash of the full casefolded string, instead of the string directly.
This allows us to efficiently search for file names in the htree without
requiring the user to provide an exact name.
* Dealing with invalid sequences:
By default, when a invalid UTF-8 sequence is identified, ext4 will treat
it as an opaque byte sequence, ignoring the encoding and reverting to
the old behavior for that unique file. This means that case-insensitive
file name lookup will not work only for that file. An optional bit can
be set in the superblock telling the filesystem code and userspace tools
to enforce the encoding. When that optional bit is set, any attempt to
create a file name using an invalid UTF-8 sequence will fail and return
an error to userspace.
* Normalization algorithm:
The UTF-8 algorithms used to compare strings in f2fs is implemented
in fs/unicode, and is based on a previous version developed by
SGI. It implements the Canonical decomposition (NFD) algorithm
described by the Unicode specification 12.1, or higher, combined with
the elimination of ignorable code points (NFDi) and full
case-folding (CF) as documented in fs/unicode/utf8_norm.c.
NFD seems to be the best normalization method for F2FS because:
- It has a lower cost than NFC/NFKC (which requires
decomposing to NFD as an intermediary step)
- It doesn't eliminate important semantic meaning like
compatibility decompositions.
Although:
- This implementation is not completely linguistic accurate, because
different languages have conflicting rules, which would require the
specialization of the filesystem to a given locale, which brings all
sorts of problems for removable media and for users who use more than
one language.
"""
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-07-24 02:05:29 +03:00
if ( ! inode & & IS_CASEFOLDED ( dir ) ) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well . For now , prevent the negative dentry
* from being cached .
*/
trace_f2fs_lookup_end ( dir , dentry , ino , err ) ;
return NULL ;
}
# endif
2017-10-17 12:33:41 +03:00
new = d_splice_alias ( inode , dentry ) ;
2023-06-01 04:37:59 +03:00
trace_f2fs_lookup_end ( dir , ! IS_ERR_OR_NULL ( new ) ? new : dentry ,
ino , IS_ERR ( new ) ? PTR_ERR ( new ) : err ) ;
2017-10-17 12:33:41 +03:00
return new ;
out_iput :
2016-03-10 17:24:23 +03:00
iput ( inode ) ;
2017-10-17 12:33:41 +03:00
out :
trace_f2fs_lookup_end ( dir , dentry , ino , err ) ;
2015-04-22 06:39:58 +03:00
return ERR_PTR ( err ) ;
2012-11-02 12:11:10 +04:00
}
static int f2fs_unlink ( struct inode * dir , struct dentry * dentry )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2012-11-02 12:11:10 +04:00
struct f2fs_dir_entry * de ;
struct page * page ;
2020-04-21 01:00:57 +03:00
int err ;
2012-11-02 12:11:10 +04:00
2013-04-19 20:28:40 +04:00
trace_f2fs_unlink_enter ( dir , dentry ) ;
2012-12-19 11:25:21 +04:00
2020-06-20 05:12:17 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) ) {
err = - EIO ;
goto fail ;
}
2017-10-24 00:48:49 +03:00
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-10-24 00:50:15 +03:00
if ( err )
2020-06-20 05:12:17 +03:00
goto fail ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( inode ) ;
2017-07-08 19:13:07 +03:00
if ( err )
2020-06-20 05:12:17 +03:00
goto fail ;
2017-07-08 19:13:07 +03:00
2012-11-02 12:11:10 +04:00
de = f2fs_find_entry ( dir , & dentry - > d_name , & page ) ;
2016-07-19 03:27:47 +03:00
if ( ! de ) {
if ( IS_ERR ( page ) )
err = PTR_ERR ( page ) ;
2012-11-02 12:11:10 +04:00
goto fail ;
2016-07-19 03:27:47 +03:00
}
2012-11-02 12:11:10 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2013-10-08 05:19:28 +04:00
f2fs_lock_op ( sbi ) ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
err = f2fs_acquire_orphan_inode ( sbi ) ;
2012-11-02 12:11:10 +04:00
if ( err ) {
2013-10-08 05:19:28 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
f2fs_put_page ( page , 0 ) ;
goto fail ;
}
2014-09-24 14:17:04 +04:00
f2fs_delete_entry ( de , page , dir , inode ) ;
2022-10-18 03:52:05 +03:00
f2fs_unlock_op ( sbi ) ;
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
f2fs: Support case-insensitive file name lookups
Modeled after commit b886ee3e778e ("ext4: Support case-insensitive file
name lookups")
"""
This patch implements the actual support for case-insensitive file name
lookups in f2fs, based on the feature bit and the encoding stored in the
superblock.
A filesystem that has the casefold feature set is able to configure
directories with the +F (F2FS_CASEFOLD_FL) attribute, enabling lookups
to succeed in that directory in a case-insensitive fashion, i.e: match
a directory entry even if the name used by userspace is not a byte per
byte match with the disk name, but is an equivalent case-insensitive
version of the Unicode string. This operation is called a
case-insensitive file name lookup.
The feature is configured as an inode attribute applied to directories
and inherited by its children. This attribute can only be enabled on
empty directories for filesystems that support the encoding feature,
thus preventing collision of file names that only differ by case.
* dcache handling:
For a +F directory, F2Fs only stores the first equivalent name dentry
used in the dcache. This is done to prevent unintentional duplication of
dentries in the dcache, while also allowing the VFS code to quickly find
the right entry in the cache despite which equivalent string was used in
a previous lookup, without having to resort to ->lookup().
d_hash() of casefolded directories is implemented as the hash of the
casefolded string, such that we always have a well-known bucket for all
the equivalencies of the same string. d_compare() uses the
utf8_strncasecmp() infrastructure, which handles the comparison of
equivalent, same case, names as well.
For now, negative lookups are not inserted in the dcache, since they
would need to be invalidated anyway, because we can't trust missing file
dentries. This is bad for performance but requires some leveraging of
the vfs layer to fix. We can live without that for now, and so does
everyone else.
* on-disk data:
Despite using a specific version of the name as the internal
representation within the dcache, the name stored and fetched from the
disk is a byte-per-byte match with what the user requested, making this
implementation 'name-preserving'. i.e. no actual information is lost
when writing to storage.
DX is supported by modifying the hashes used in +F directories to make
them case/encoding-aware. The new disk hashes are calculated as the
hash of the full casefolded string, instead of the string directly.
This allows us to efficiently search for file names in the htree without
requiring the user to provide an exact name.
* Dealing with invalid sequences:
By default, when a invalid UTF-8 sequence is identified, ext4 will treat
it as an opaque byte sequence, ignoring the encoding and reverting to
the old behavior for that unique file. This means that case-insensitive
file name lookup will not work only for that file. An optional bit can
be set in the superblock telling the filesystem code and userspace tools
to enforce the encoding. When that optional bit is set, any attempt to
create a file name using an invalid UTF-8 sequence will fail and return
an error to userspace.
* Normalization algorithm:
The UTF-8 algorithms used to compare strings in f2fs is implemented
in fs/unicode, and is based on a previous version developed by
SGI. It implements the Canonical decomposition (NFD) algorithm
described by the Unicode specification 12.1, or higher, combined with
the elimination of ignorable code points (NFDi) and full
case-folding (CF) as documented in fs/unicode/utf8_norm.c.
NFD seems to be the best normalization method for F2FS because:
- It has a lower cost than NFC/NFKC (which requires
decomposing to NFD as an intermediary step)
- It doesn't eliminate important semantic meaning like
compatibility decompositions.
Although:
- This implementation is not completely linguistic accurate, because
different languages have conflicting rules, which would require the
specialization of the filesystem to a given locale, which brings all
sorts of problems for removable media and for users who use more than
one language.
"""
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-07-24 02:05:29 +03:00
/* VFS negative dentries are incompatible with Encoding and
* Case - insensitiveness . Eventually we ' ll want avoid
* invalidating the dentries here , alongside with returning the
2020-07-24 11:55:28 +03:00
* negative dentries at f2fs_lookup ( ) , when it is better
f2fs: Support case-insensitive file name lookups
Modeled after commit b886ee3e778e ("ext4: Support case-insensitive file
name lookups")
"""
This patch implements the actual support for case-insensitive file name
lookups in f2fs, based on the feature bit and the encoding stored in the
superblock.
A filesystem that has the casefold feature set is able to configure
directories with the +F (F2FS_CASEFOLD_FL) attribute, enabling lookups
to succeed in that directory in a case-insensitive fashion, i.e: match
a directory entry even if the name used by userspace is not a byte per
byte match with the disk name, but is an equivalent case-insensitive
version of the Unicode string. This operation is called a
case-insensitive file name lookup.
The feature is configured as an inode attribute applied to directories
and inherited by its children. This attribute can only be enabled on
empty directories for filesystems that support the encoding feature,
thus preventing collision of file names that only differ by case.
* dcache handling:
For a +F directory, F2Fs only stores the first equivalent name dentry
used in the dcache. This is done to prevent unintentional duplication of
dentries in the dcache, while also allowing the VFS code to quickly find
the right entry in the cache despite which equivalent string was used in
a previous lookup, without having to resort to ->lookup().
d_hash() of casefolded directories is implemented as the hash of the
casefolded string, such that we always have a well-known bucket for all
the equivalencies of the same string. d_compare() uses the
utf8_strncasecmp() infrastructure, which handles the comparison of
equivalent, same case, names as well.
For now, negative lookups are not inserted in the dcache, since they
would need to be invalidated anyway, because we can't trust missing file
dentries. This is bad for performance but requires some leveraging of
the vfs layer to fix. We can live without that for now, and so does
everyone else.
* on-disk data:
Despite using a specific version of the name as the internal
representation within the dcache, the name stored and fetched from the
disk is a byte-per-byte match with what the user requested, making this
implementation 'name-preserving'. i.e. no actual information is lost
when writing to storage.
DX is supported by modifying the hashes used in +F directories to make
them case/encoding-aware. The new disk hashes are calculated as the
hash of the full casefolded string, instead of the string directly.
This allows us to efficiently search for file names in the htree without
requiring the user to provide an exact name.
* Dealing with invalid sequences:
By default, when a invalid UTF-8 sequence is identified, ext4 will treat
it as an opaque byte sequence, ignoring the encoding and reverting to
the old behavior for that unique file. This means that case-insensitive
file name lookup will not work only for that file. An optional bit can
be set in the superblock telling the filesystem code and userspace tools
to enforce the encoding. When that optional bit is set, any attempt to
create a file name using an invalid UTF-8 sequence will fail and return
an error to userspace.
* Normalization algorithm:
The UTF-8 algorithms used to compare strings in f2fs is implemented
in fs/unicode, and is based on a previous version developed by
SGI. It implements the Canonical decomposition (NFD) algorithm
described by the Unicode specification 12.1, or higher, combined with
the elimination of ignorable code points (NFDi) and full
case-folding (CF) as documented in fs/unicode/utf8_norm.c.
NFD seems to be the best normalization method for F2FS because:
- It has a lower cost than NFC/NFKC (which requires
decomposing to NFD as an intermediary step)
- It doesn't eliminate important semantic meaning like
compatibility decompositions.
Although:
- This implementation is not completely linguistic accurate, because
different languages have conflicting rules, which would require the
specialization of the filesystem to a given locale, which brings all
sorts of problems for removable media and for users who use more than
one language.
"""
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-07-24 02:05:29 +03:00
* supported by the VFS for the CI case .
*/
if ( IS_CASEFOLDED ( dir ) )
d_invalidate ( dentry ) ;
# endif
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2012-11-02 12:11:10 +04:00
fail :
2013-04-19 20:28:40 +04:00
trace_f2fs_unlink_exit ( inode , err ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
2015-11-17 18:20:54 +03:00
static const char * f2fs_get_link ( struct dentry * dentry ,
2015-12-29 23:58:39 +03:00
struct inode * inode ,
struct delayed_call * done )
2015-04-15 23:49:55 +03:00
{
2015-12-29 23:58:39 +03:00
const char * link = page_get_link ( dentry , inode , done ) ;
2021-04-06 04:47:35 +03:00
2015-05-02 20:32:22 +03:00
if ( ! IS_ERR ( link ) & & ! * link ) {
/* this is broken symlink case */
2015-12-29 23:58:39 +03:00
do_delayed_call ( done ) ;
clear_delayed_call ( done ) ;
2015-05-02 20:32:22 +03:00
link = ERR_PTR ( - ENOENT ) ;
2015-04-15 23:49:55 +03:00
}
2015-05-02 20:32:22 +03:00
return link ;
2015-04-15 23:49:55 +03:00
}
2023-01-13 14:49:14 +03:00
static int f2fs_symlink ( struct mnt_idmap * idmap , struct inode * dir ,
2021-01-21 16:19:43 +03:00
struct dentry * dentry , const char * symname )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
2015-04-30 01:10:53 +03:00
size_t len = strlen ( symname ) ;
2018-01-12 07:26:49 +03:00
struct fscrypt_str disk_link ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2018-01-12 07:26:49 +03:00
err = fscrypt_prepare_symlink ( dir , symname , len , dir - > i_sb - > s_blocksize ,
& disk_link ) ;
if ( err )
return err ;
2015-04-30 01:10:53 +03:00
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2023-01-13 14:49:25 +03:00
inode = f2fs_new_inode ( idmap , dir , S_IFLNK | S_IRWXUGO , NULL ) ;
2012-11-02 12:11:10 +04:00
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
2018-01-12 07:26:49 +03:00
if ( IS_ENCRYPTED ( inode ) )
2015-04-30 01:10:53 +03:00
inode - > i_op = & f2fs_encrypted_symlink_inode_operations ;
else
inode - > i_op = & f2fs_symlink_inode_operations ;
2015-11-17 09:07:57 +03:00
inode_nohighmem ( inode ) ;
2012-11-02 12:11:10 +04:00
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
goto out_f2fs_handle_failed_inode ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_alloc_nid_done ( sbi , inode - > i_ino ) ;
2012-11-02 12:11:10 +04:00
2018-01-12 07:26:49 +03:00
err = fscrypt_encrypt_symlink ( inode , symname , len , & disk_link ) ;
if ( err )
goto err_out ;
2015-04-30 01:10:53 +03:00
2016-02-15 12:54:26 +03:00
err = page_symlink ( inode , disk_link . name , disk_link . len ) ;
2015-04-30 01:10:53 +03:00
err_out :
2018-05-04 15:23:01 +03:00
d_instantiate_new ( dentry , inode ) ;
2014-11-10 09:15:31 +03:00
2015-04-15 23:37:53 +03:00
/*
* Let ' s flush symlink data in order to avoid broken symlink as much as
* possible . Nevertheless , fsyncing is the best way , but there is no
* way to get a file descriptor in order to flush that .
*
* Note that , it needs to do dir - > fsync to make this recoverable .
* If the symlink path is stored into inline_data , there is no
* performance regression .
*/
2015-10-22 13:23:08 +03:00
if ( ! err ) {
2016-02-15 12:54:26 +03:00
filemap_write_and_wait_range ( inode - > i_mapping , 0 ,
disk_link . len - 1 ) ;
2015-04-15 23:37:53 +03:00
2015-10-22 13:23:08 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
} else {
f2fs_unlink ( dir , dentry ) ;
}
2015-04-30 01:10:53 +03:00
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2018-01-12 07:26:49 +03:00
goto out_free_encrypted_link ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
out_f2fs_handle_failed_inode :
f2fs_handle_failed_inode ( inode ) ;
2018-01-12 07:26:49 +03:00
out_free_encrypted_link :
if ( disk_link . name ! = ( unsigned char * ) symname )
2020-09-14 11:47:00 +03:00
kfree ( disk_link . name ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
2023-01-13 14:49:15 +03:00
static int f2fs_mkdir ( struct mnt_idmap * idmap , struct inode * dir ,
2021-01-21 16:19:43 +03:00
struct dentry * dentry , umode_t mode )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2023-01-13 14:49:25 +03:00
inode = f2fs_new_inode ( idmap , dir , S_IFDIR | mode , NULL ) ;
2012-11-02 12:11:10 +04:00
if ( IS_ERR ( inode ) )
2012-12-01 05:56:25 +04:00
return PTR_ERR ( inode ) ;
2012-11-02 12:11:10 +04:00
inode - > i_op = & f2fs_dir_inode_operations ;
inode - > i_fop = & f2fs_dir_operations ;
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
2021-09-07 20:24:21 +03:00
mapping_set_gfp_mask ( inode - > i_mapping , GFP_NOFS ) ;
2012-11-02 12:11:10 +04:00
2016-05-20 20:13:22 +03:00
set_inode_flag ( inode , FI_INC_LINK ) ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out_fail ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_alloc_nid_done ( sbi , inode - > i_ino ) ;
2012-11-02 12:11:10 +04:00
2018-05-04 15:23:01 +03:00
d_instantiate_new ( dentry , inode ) ;
2012-11-02 12:11:10 +04:00
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out_fail :
2016-05-20 20:13:22 +03:00
clear_inode_flag ( inode , FI_INC_LINK ) ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
static int f2fs_rmdir ( struct inode * dir , struct dentry * dentry )
{
2015-03-18 01:25:59 +03:00
struct inode * inode = d_inode ( dentry ) ;
2021-04-06 04:47:35 +03:00
2012-11-02 12:11:10 +04:00
if ( f2fs_empty_dir ( inode ) )
return f2fs_unlink ( dir , dentry ) ;
return - ENOTEMPTY ;
}
2023-01-13 14:49:16 +03:00
static int f2fs_mknod ( struct mnt_idmap * idmap , struct inode * dir ,
2021-01-21 16:19:43 +03:00
struct dentry * dentry , umode_t mode , dev_t rdev )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2012-11-02 12:11:10 +04:00
struct inode * inode ;
int err = 0 ;
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2023-01-13 14:49:25 +03:00
inode = f2fs_new_inode ( idmap , dir , mode , NULL ) ;
2012-11-02 12:11:10 +04:00
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
init_special_inode ( inode , inode - > i_mode , rdev ) ;
inode - > i_op = & f2fs_special_inode_operations ;
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( dentry , inode ) ;
if ( err )
goto out ;
2014-09-25 22:55:53 +04:00
f2fs_unlock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_alloc_nid_done ( sbi , inode - > i_ino ) ;
2014-11-10 09:15:31 +03:00
2018-05-04 15:23:01 +03:00
d_instantiate_new ( dentry , inode ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
out :
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_handle_failed_inode ( inode ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
2023-01-13 14:49:25 +03:00
static int __f2fs_tmpfile ( struct mnt_idmap * idmap , struct inode * dir ,
2022-09-24 08:00:00 +03:00
struct file * file , umode_t mode , bool is_whiteout ,
2022-04-28 21:18:09 +03:00
struct inode * * new_inode )
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
{
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
struct inode * inode ;
int err ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
return err ;
2023-01-13 14:49:25 +03:00
inode = f2fs_new_inode ( idmap , dir , mode , NULL ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
2022-04-28 21:18:09 +03:00
if ( is_whiteout ) {
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
init_special_inode ( inode , inode - > i_mode , WHITEOUT_DEV ) ;
inode - > i_op = & f2fs_special_inode_operations ;
} else {
inode - > i_op = & f2fs_file_inode_operations ;
inode - > i_fop = & f2fs_file_operations ;
inode - > i_mapping - > a_ops = & f2fs_dblock_aops ;
}
f2fs_lock_op ( sbi ) ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
err = f2fs_acquire_orphan_inode ( sbi ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( err )
goto out ;
err = f2fs_do_tmpfile ( inode , dir ) ;
if ( err )
goto release_out ;
/*
* add this non - linked tmpfile to orphan list , in this way we could
* remove all unused data of tmpfile after abnormal power - off .
*/
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_add_orphan_inode ( inode ) ;
f2fs_alloc_nid_done ( sbi , inode - > i_ino ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
2022-04-28 21:18:09 +03:00
if ( is_whiteout ) {
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( inode , false ) ;
2021-01-12 04:55:09 +03:00
spin_lock ( & inode - > i_lock ) ;
2019-12-07 03:59:58 +03:00
inode - > i_state | = I_LINKABLE ;
2021-01-12 04:55:09 +03:00
spin_unlock ( & inode - > i_lock ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
} else {
2022-09-24 08:00:00 +03:00
if ( file )
d_tmpfile ( file , inode ) ;
2022-04-28 21:18:09 +03:00
else
f2fs_i_links_write ( inode , false ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
}
2016-05-20 19:43:20 +03:00
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op ( sbi ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
unlock_new_inode ( inode ) ;
2017-04-12 05:01:26 +03:00
2022-04-28 21:18:09 +03:00
if ( new_inode )
* new_inode = inode ;
2017-04-12 05:01:26 +03:00
f2fs_balance_fs ( sbi , true ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
return 0 ;
release_out :
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_release_orphan_inode ( sbi ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
out :
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_handle_failed_inode ( inode ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
return err ;
}
2023-01-13 14:49:18 +03:00
static int f2fs_tmpfile ( struct mnt_idmap * idmap , struct inode * dir ,
2022-09-24 08:00:00 +03:00
struct file * file , umode_t mode )
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
{
2018-03-15 13:51:42 +03:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( dir ) ;
2022-09-24 08:00:00 +03:00
int err ;
2018-03-15 13:51:42 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
2017-10-24 00:48:49 +03:00
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2023-01-13 14:49:25 +03:00
err = __f2fs_tmpfile ( idmap , dir , file , mode , false , NULL ) ;
2022-09-24 08:00:00 +03:00
return finish_open_simple ( file , err ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
}
2023-01-13 14:49:25 +03:00
static int f2fs_create_whiteout ( struct mnt_idmap * idmap ,
2022-02-04 08:24:56 +03:00
struct inode * dir , struct inode * * whiteout )
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
{
2023-01-13 14:49:25 +03:00
return __f2fs_tmpfile ( idmap , dir , NULL ,
2022-04-28 21:18:09 +03:00
S_IFCHR | WHITEOUT_MODE , true , whiteout ) ;
}
2023-01-13 14:49:25 +03:00
int f2fs_get_tmpfile ( struct mnt_idmap * idmap , struct inode * dir ,
2022-04-28 21:18:09 +03:00
struct inode * * new_inode )
{
2023-01-13 14:49:25 +03:00
return __f2fs_tmpfile ( idmap , dir , NULL , S_IFREG , false , new_inode ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
}
2023-01-13 14:49:25 +03:00
static int f2fs_rename ( struct mnt_idmap * idmap , struct inode * old_dir ,
2022-02-04 08:24:56 +03:00
struct dentry * old_dentry , struct inode * new_dir ,
struct dentry * new_dentry , unsigned int flags )
2012-11-02 12:11:10 +04:00
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( old_dir ) ;
2015-03-18 01:25:59 +03:00
struct inode * old_inode = d_inode ( old_dentry ) ;
struct inode * new_inode = d_inode ( new_dentry ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
struct inode * whiteout = NULL ;
2019-12-12 02:10:47 +03:00
struct page * old_dir_page = NULL ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
struct page * old_page , * new_page = NULL ;
2012-11-02 12:11:10 +04:00
struct f2fs_dir_entry * old_dir_entry = NULL ;
struct f2fs_dir_entry * old_entry ;
struct f2fs_dir_entry * new_entry ;
2023-10-13 00:14:23 +03:00
bool old_is_dir = S_ISDIR ( old_inode - > i_mode ) ;
2018-09-17 23:25:04 +03:00
int err ;
2012-11-02 12:11:10 +04:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2017-07-25 19:01:41 +03:00
if ( is_inode_flag_set ( new_dir , FI_PROJ_INHERIT ) & &
( ! projid_eq ( F2FS_I ( new_dir ) - > i_projid ,
F2FS_I ( old_dentry - > d_inode ) - > i_projid ) ) )
return - EXDEV ;
2019-12-10 06:03:05 +03:00
/*
* If new_inode is null , the below renaming flow will
2023-02-06 14:56:00 +03:00
* add a link in old_dir which can convert inline_dir .
2019-12-10 06:03:05 +03:00
* After then , if we failed to get the entry due to other
* reasons like ENOMEM , we had to remove the new entry .
* Instead of adding such the error handling routine , let ' s
* simply convert first here .
*/
if ( old_dir = = new_dir & & ! new_inode ) {
err = f2fs_try_convert_inline_dir ( old_dir , new_dentry ) ;
if ( err )
return err ;
}
2019-12-07 03:59:58 +03:00
if ( flags & RENAME_WHITEOUT ) {
2023-01-13 14:49:25 +03:00
err = f2fs_create_whiteout ( idmap , old_dir , & whiteout ) ;
2019-12-07 03:59:58 +03:00
if ( err )
return err ;
}
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( old_dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
goto out ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( new_dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
goto out ;
2017-10-24 00:50:15 +03:00
if ( new_inode ) {
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( new_inode ) ;
2017-10-24 00:50:15 +03:00
if ( err )
goto out ;
}
2018-09-17 23:25:04 +03:00
err = - ENOENT ;
2012-11-02 12:11:10 +04:00
old_entry = f2fs_find_entry ( old_dir , & old_dentry - > d_name , & old_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! old_entry ) {
if ( IS_ERR ( old_page ) )
err = PTR_ERR ( old_page ) ;
2023-06-01 13:58:23 +03:00
goto out ;
2016-07-19 03:27:47 +03:00
}
2012-11-02 12:11:10 +04:00
2023-10-13 00:14:23 +03:00
if ( old_is_dir & & old_dir ! = new_dir ) {
2012-11-02 12:11:10 +04:00
old_dir_entry = f2fs_parent_dir ( old_inode , & old_dir_page ) ;
2016-06-10 00:57:19 +03:00
if ( ! old_dir_entry ) {
2016-07-19 03:27:47 +03:00
if ( IS_ERR ( old_dir_page ) )
err = PTR_ERR ( old_dir_page ) ;
2012-11-02 12:11:10 +04:00
goto out_old ;
2016-06-10 00:57:19 +03:00
}
2012-11-02 12:11:10 +04:00
}
if ( new_inode ) {
err = - ENOTEMPTY ;
2023-10-13 00:14:23 +03:00
if ( old_is_dir & & ! f2fs_empty_dir ( new_inode ) )
2019-12-07 03:59:58 +03:00
goto out_dir ;
2012-11-02 12:11:10 +04:00
err = - ENOENT ;
new_entry = f2fs_find_entry ( new_dir , & new_dentry - > d_name ,
& new_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! new_entry ) {
if ( IS_ERR ( new_page ) )
err = PTR_ERR ( new_page ) ;
2019-12-07 03:59:58 +03:00
goto out_dir ;
2016-07-19 03:27:47 +03:00
}
2012-11-02 12:11:10 +04:00
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2014-06-24 10:16:24 +04:00
f2fs_lock_op ( sbi ) ;
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
err = f2fs_acquire_orphan_inode ( sbi ) ;
2013-07-30 06:36:53 +04:00
if ( err )
goto put_out_dir ;
2012-11-02 12:11:10 +04:00
f2fs_set_link ( new_dir , new_entry , new_page , old_inode ) ;
2019-12-12 02:10:47 +03:00
new_page = NULL ;
2012-11-02 12:11:10 +04:00
2023-07-05 22:01:08 +03:00
inode_set_ctime_current ( new_inode ) ;
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2023-10-13 00:14:23 +03:00
if ( old_is_dir )
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( new_inode , false ) ;
f2fs_i_links_write ( new_inode , false ) ;
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2014-03-20 14:10:08 +04:00
2012-11-02 12:11:10 +04:00
if ( ! new_inode - > i_nlink )
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_add_orphan_inode ( new_inode ) ;
2013-07-30 06:36:53 +04:00
else
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_release_orphan_inode ( sbi ) ;
2012-11-02 12:11:10 +04:00
} else {
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2014-06-24 10:16:24 +04:00
f2fs_lock_op ( sbi ) ;
2012-11-02 12:11:10 +04:00
err = f2fs_add_link ( new_dentry , old_inode ) ;
2014-06-24 10:16:24 +04:00
if ( err ) {
f2fs_unlock_op ( sbi ) ;
2019-12-07 03:59:58 +03:00
goto out_dir ;
2014-06-24 10:16:24 +04:00
}
2012-11-02 12:11:10 +04:00
2023-10-13 00:14:23 +03:00
if ( old_is_dir )
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( new_dir , true ) ;
2012-11-02 12:11:10 +04:00
}
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
2023-10-13 00:14:23 +03:00
if ( ! old_is_dir | | whiteout )
2017-06-26 05:41:36 +03:00
file_lost_pino ( old_inode ) ;
else
2019-11-07 09:12:05 +03:00
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write ( old_inode , new_dir - > i_ino ) ;
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
f2fs: do checkpoint for the renamed inode
If an inode is renamed, it should be registered as file_lost_pino to conduct
checkpoint at f2fs_sync_file.
Otherwise, the inode cannot be recovered due to no dent_mark in the following
scenario.
Note that, this scenario is from xfstests/322.
1. create "a"
2. fsync "a"
3. rename "a" to "b"
4. fsync "b"
5. Sudden power-cut
After recovery is done, "b" should be seen.
However, the result shows "a", since the recovery procedure does not enter
recover_dentry due to no dent_mark.
The reason is like below.
- The nid of "a" is checkpointed during #2, f2fs_sync_file.
- The inode page for "b" produced by #3 is written without dent_mark by
sync_node_pages.
So, this patch fixes this bug by assinging file_lost_pino to the "a"'s inode.
If the pino is lost, f2fs_sync_file conducts checkpoint, and then recovers
the latest pino and its dentry information for further recovery.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-06-30 13:09:55 +04:00
2023-07-05 22:01:08 +03:00
inode_set_ctime_current ( old_inode ) ;
2016-10-14 21:51:23 +03:00
f2fs_mark_inode_dirty_sync ( old_inode , false ) ;
2012-11-02 12:11:10 +04:00
2014-09-24 14:17:04 +04:00
f2fs_delete_entry ( old_entry , old_page , old_dir , NULL ) ;
2019-12-12 02:10:47 +03:00
old_page = NULL ;
2012-11-02 12:11:10 +04:00
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( whiteout ) {
2016-05-20 20:13:22 +03:00
set_inode_flag ( whiteout , FI_INC_LINK ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
err = f2fs_add_link ( old_dentry , whiteout ) ;
if ( err )
goto put_out_dir ;
2021-01-12 04:55:09 +03:00
spin_lock ( & whiteout - > i_lock ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
whiteout - > i_state & = ~ I_LINKABLE ;
2021-01-12 04:55:09 +03:00
spin_unlock ( & whiteout - > i_lock ) ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
iput ( whiteout ) ;
}
2023-10-13 00:14:23 +03:00
if ( old_is_dir ) {
f2fs update for 6.8-rc1
In this series, we've some progress to support Zoned block device regarding to
the power-cut recovery flow and enabling checkpoint=disable feature which is
essential for Android OTA. Other than that, some patches touched sysfs entries
and tracepoints which are minor, while several bug fixes on error handlers and
compression flows are good to improve the overall stability.
Enhancement:
- enable checkpoint=disable for zoned block device
- sysfs entries such as discard status, discard_io_aware, dir_level
- tracepoints such as f2fs_vm_page_mkwrite(), f2fs_rename(), f2fs_new_inode()
- use shared inode lock during f2fs_fiemap() and f2fs_seek_block()
Bug fix:
- address some power-cut recovery issues on zoned block device
- handle errors and logics on do_garbage_collect(), f2fs_reserve_new_block(),
f2fs_move_file_range(), f2fs_recover_xattr_data()
- don't set FI_PREALLOCATED_ALL for partial write
- fix to update iostat correctly in f2fs_filemap_fault()
- fix to wait on block writeback for post_read case
- fix to tag gcing flag on page during block migration
- restrict max filesize for 16K f2fs
- fix to avoid dirent corruption
- explicitly null-terminate the xattr list
There are also several clean-up patches to remove dead codes and better
readability.
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEE00UqedjCtOrGVvQiQBSofoJIUNIFAmWgMYcACgkQQBSofoJI
UNJShxAAiYOXP7LPOAbPS1251BBgl8AIfs6u96hGTZkxOYsLHrBBbPbkWf3+nVbC
JsBsVOe9K50rssK9kPg6XHPbmFGC8ERlyYcZTpONLfjtHOaQicbRnc//2qOvnCx8
JOKcMVkZyLU/HbOCoUW6mzNCQlOl0aAV8tRcb7jwAxT0HgpjHTHxej/62gRcPKzC
1E5w4iNTY//R97YGB36jPeGlKhbBZ7Ox1NM6AWadgE7B0j9rcYiBnPQllyeyaVVo
XMCWRdl42tNMks2zgvU+vC41OrZ55bwLTQmVj3P1wnyKXig5/ZLQsrEcIGE+b2tP
Mx+imCIRNYZqLwv5KYl6FU+KuLQGuZT1AjpP70Cb95WLyiYvVE6+xeiZg0fVTCEF
3Hg7lEqMtAEAh1NEmJyYmbiAm9KQ3vHyse9ix++tfm+Xvgqj8b2flmzAtIFKpCBV
J+yFI+A55IYuYZt7gzPoZLkQL0tULPf80TKQrzwlnHNtZ6T6FK2Nunu+Urwf1/Th
s5IulqHJZxHU/Bgd6yQZUVfDILcXTkqNCpO3+qLZMPZizlH1hXiJFTeVzS6mnGvZ
sK2LL4rEJ8EhDHU1F0SJzCWJcuR8cQ/t2zKYUygo9LvHbtEM1bZwC1Bqfolt7NrU
+pgiM2wnE9yjkPdfZN1JgYZDq0/lGvxPQ5NAc/5ERX71QonRyn8=
=MQl3
-----END PGP SIGNATURE-----
Merge tag 'f2fs-for-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs update from Jaegeuk Kim:
"In this series, we've some progress to support Zoned block device
regarding to the power-cut recovery flow and enabling
checkpoint=disable feature which is essential for Android OTA.
Other than that, some patches touched sysfs entries and tracepoints
which are minor, while several bug fixes on error handlers and
compression flows are good to improve the overall stability.
Enhancements:
- enable checkpoint=disable for zoned block device
- sysfs entries such as discard status, discard_io_aware, dir_level
- tracepoints such as f2fs_vm_page_mkwrite(), f2fs_rename(),
f2fs_new_inode()
- use shared inode lock during f2fs_fiemap() and f2fs_seek_block()
Bug fixes:
- address some power-cut recovery issues on zoned block device
- handle errors and logics on do_garbage_collect(),
f2fs_reserve_new_block(), f2fs_move_file_range(),
f2fs_recover_xattr_data()
- don't set FI_PREALLOCATED_ALL for partial write
- fix to update iostat correctly in f2fs_filemap_fault()
- fix to wait on block writeback for post_read case
- fix to tag gcing flag on page during block migration
- restrict max filesize for 16K f2fs
- fix to avoid dirent corruption
- explicitly null-terminate the xattr list
There are also several clean-up patches to remove dead codes and
better readability"
* tag 'f2fs-for-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (33 commits)
f2fs: show more discard status by sysfs
f2fs: Add error handling for negative returns from do_garbage_collect
f2fs: Constrain the modification range of dir_level in the sysfs
f2fs: Use wait_event_freezable_timeout() for freezable kthread
f2fs: fix to check return value of f2fs_recover_xattr_data
f2fs: don't set FI_PREALLOCATED_ALL for partial write
f2fs: fix to update iostat correctly in f2fs_filemap_fault()
f2fs: fix to check compress file in f2fs_move_file_range()
f2fs: fix to wait on block writeback for post_read case
f2fs: fix to tag gcing flag on page during block migration
f2fs: add tracepoint for f2fs_vm_page_mkwrite()
f2fs: introduce f2fs_invalidate_internal_cache() for cleanup
f2fs: update blkaddr in __set_data_blkaddr() for cleanup
f2fs: introduce get_dnode_addr() to clean up codes
f2fs: delete obsolete FI_DROP_CACHE
f2fs: delete obsolete FI_FIRST_BLOCK_WRITTEN
f2fs: Restrict max filesize for 16K f2fs
f2fs: let's finish or reset zones all the time
f2fs: check write pointers when checkpoint=disable
f2fs: fix write pointers on zoned device after roll forward
...
2024-01-12 07:39:15 +03:00
if ( old_dir_entry )
2012-11-02 12:11:10 +04:00
f2fs_set_link ( old_inode , old_dir_entry ,
old_dir_page , new_dir ) ;
2018-02-28 15:31:52 +03:00
else
2012-11-02 12:11:10 +04:00
f2fs_put_page ( old_dir_page , 0 ) ;
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( old_dir , false ) ;
2012-11-02 12:11:10 +04:00
}
2018-04-25 07:43:01 +03:00
if ( F2FS_OPTION ( sbi ) . fsync_mode = = FSYNC_MODE_STRICT ) {
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_add_ino_entry ( sbi , new_dir - > i_ino , TRANS_DIR_INO ) ;
2018-04-25 07:43:01 +03:00
if ( S_ISDIR ( old_inode - > i_mode ) )
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_add_ino_entry ( sbi , old_inode - > i_ino ,
TRANS_DIR_INO ) ;
2018-04-25 07:43:01 +03:00
}
2012-11-02 12:11:10 +04:00
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 14:08:30 +04:00
f2fs_unlock_op ( sbi ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( old_dir ) | | IS_DIRSYNC ( new_dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2018-10-05 08:17:40 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2012-11-02 12:11:10 +04:00
return 0 ;
2013-07-30 06:36:53 +04:00
put_out_dir :
2014-06-24 10:16:24 +04:00
f2fs_unlock_op ( sbi ) ;
2019-12-12 02:10:47 +03:00
f2fs_put_page ( new_page , 0 ) ;
2012-11-02 12:11:10 +04:00
out_dir :
2018-02-28 15:31:52 +03:00
if ( old_dir_entry )
2012-11-02 12:11:10 +04:00
f2fs_put_page ( old_dir_page , 0 ) ;
out_old :
f2fs_put_page ( old_page , 0 ) ;
out :
2022-03-14 10:15:15 +03:00
iput ( whiteout ) ;
2012-11-02 12:11:10 +04:00
return err ;
}
2014-07-12 15:13:54 +04:00
static int f2fs_cross_rename ( struct inode * old_dir , struct dentry * old_dentry ,
struct inode * new_dir , struct dentry * new_dentry )
{
2014-09-03 02:31:18 +04:00
struct f2fs_sb_info * sbi = F2FS_I_SB ( old_dir ) ;
2015-03-18 01:25:59 +03:00
struct inode * old_inode = d_inode ( old_dentry ) ;
struct inode * new_inode = d_inode ( new_dentry ) ;
2014-07-12 15:13:54 +04:00
struct page * old_dir_page , * new_dir_page ;
struct page * old_page , * new_page ;
struct f2fs_dir_entry * old_dir_entry = NULL , * new_dir_entry = NULL ;
struct f2fs_dir_entry * old_entry , * new_entry ;
int old_nlink = 0 , new_nlink = 0 ;
2018-09-17 23:25:04 +03:00
int err ;
2016-12-29 04:31:15 +03:00
2017-10-24 00:48:49 +03:00
if ( unlikely ( f2fs_cp_error ( sbi ) ) )
return - EIO ;
2019-08-23 12:58:36 +03:00
if ( ! f2fs_is_checkpoint_ready ( sbi ) )
return - ENOSPC ;
2017-10-24 00:48:49 +03:00
2017-07-25 19:01:41 +03:00
if ( ( is_inode_flag_set ( new_dir , FI_PROJ_INHERIT ) & &
! projid_eq ( F2FS_I ( new_dir ) - > i_projid ,
F2FS_I ( old_dentry - > d_inode ) - > i_projid ) ) | |
( is_inode_flag_set ( new_dir , FI_PROJ_INHERIT ) & &
! projid_eq ( F2FS_I ( old_dir ) - > i_projid ,
F2FS_I ( new_dentry - > d_inode ) - > i_projid ) ) )
return - EXDEV ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( old_dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
goto out ;
2021-10-28 16:03:05 +03:00
err = f2fs_dquot_initialize ( new_dir ) ;
2017-07-08 19:13:07 +03:00
if ( err )
goto out ;
2018-09-17 23:25:04 +03:00
err = - ENOENT ;
2014-07-12 15:13:54 +04:00
old_entry = f2fs_find_entry ( old_dir , & old_dentry - > d_name , & old_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! old_entry ) {
if ( IS_ERR ( old_page ) )
err = PTR_ERR ( old_page ) ;
2014-07-12 15:13:54 +04:00
goto out ;
2016-07-19 03:27:47 +03:00
}
2014-07-12 15:13:54 +04:00
new_entry = f2fs_find_entry ( new_dir , & new_dentry - > d_name , & new_page ) ;
2016-07-19 03:27:47 +03:00
if ( ! new_entry ) {
if ( IS_ERR ( new_page ) )
err = PTR_ERR ( new_page ) ;
2014-07-12 15:13:54 +04:00
goto out_old ;
2016-07-19 03:27:47 +03:00
}
2014-07-12 15:13:54 +04:00
/* prepare for updating ".." directory entry info later */
if ( old_dir ! = new_dir ) {
if ( S_ISDIR ( old_inode - > i_mode ) ) {
old_dir_entry = f2fs_parent_dir ( old_inode ,
& old_dir_page ) ;
2016-06-10 00:57:19 +03:00
if ( ! old_dir_entry ) {
2016-07-19 03:27:47 +03:00
if ( IS_ERR ( old_dir_page ) )
err = PTR_ERR ( old_dir_page ) ;
2014-07-12 15:13:54 +04:00
goto out_new ;
2016-06-10 00:57:19 +03:00
}
2014-07-12 15:13:54 +04:00
}
if ( S_ISDIR ( new_inode - > i_mode ) ) {
new_dir_entry = f2fs_parent_dir ( new_inode ,
& new_dir_page ) ;
2016-06-10 00:57:19 +03:00
if ( ! new_dir_entry ) {
2016-07-19 03:27:47 +03:00
if ( IS_ERR ( new_dir_page ) )
err = PTR_ERR ( new_dir_page ) ;
2014-07-12 15:13:54 +04:00
goto out_old_dir ;
2016-06-10 00:57:19 +03:00
}
2014-07-12 15:13:54 +04:00
}
}
/*
* If cross rename between file and directory those are not
* in the same directory , we will inc nlink of file ' s parent
* later , so we should check upper boundary of its nlink .
*/
if ( ( ! old_dir_entry | | ! new_dir_entry ) & &
old_dir_entry ! = new_dir_entry ) {
old_nlink = old_dir_entry ? - 1 : 1 ;
new_nlink = - old_nlink ;
err = - EMLINK ;
2017-03-04 16:48:28 +03:00
if ( ( old_nlink > 0 & & old_dir - > i_nlink > = F2FS_LINK_MAX ) | |
( new_nlink > 0 & & new_dir - > i_nlink > = F2FS_LINK_MAX ) )
2014-07-12 15:13:54 +04:00
goto out_new_dir ;
}
2016-01-08 01:15:04 +03:00
f2fs_balance_fs ( sbi , true ) ;
2015-12-22 22:56:08 +03:00
2014-07-12 15:13:54 +04:00
f2fs_lock_op ( sbi ) ;
/* update ".." directory entry info of old dentry */
if ( old_dir_entry )
f2fs_set_link ( old_inode , old_dir_entry , old_dir_page , new_dir ) ;
/* update ".." directory entry info of new dentry */
if ( new_dir_entry )
f2fs_set_link ( new_inode , new_dir_entry , new_dir_page , old_dir ) ;
/* update directory entry info of old dir inode */
f2fs_set_link ( old_dir , old_entry , old_page , new_inode ) ;
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
2019-11-07 09:12:05 +03:00
if ( ! old_dir_entry )
file_lost_pino ( old_inode ) ;
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write ( old_inode , new_dir - > i_ino ) ;
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & F2FS_I ( old_inode ) - > i_sem ) ;
2014-07-12 15:13:54 +04:00
2023-07-05 22:01:08 +03:00
inode_set_ctime_current ( old_dir ) ;
2014-07-12 15:13:54 +04:00
if ( old_nlink ) {
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & F2FS_I ( old_dir ) - > i_sem ) ;
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( old_dir , old_nlink > 0 ) ;
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & F2FS_I ( old_dir ) - > i_sem ) ;
2014-07-12 15:13:54 +04:00
}
2016-10-14 21:51:23 +03:00
f2fs_mark_inode_dirty_sync ( old_dir , false ) ;
2014-07-12 15:13:54 +04:00
/* update directory entry info of new dir inode */
f2fs_set_link ( new_dir , new_entry , new_page , old_inode ) ;
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2019-11-07 09:12:05 +03:00
if ( ! new_dir_entry )
file_lost_pino ( new_inode ) ;
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write ( new_inode , old_dir - > i_ino ) ;
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & F2FS_I ( new_inode ) - > i_sem ) ;
2014-07-12 15:13:54 +04:00
2023-07-05 22:01:08 +03:00
inode_set_ctime_current ( new_dir ) ;
2014-07-12 15:13:54 +04:00
if ( new_nlink ) {
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & F2FS_I ( new_dir ) - > i_sem ) ;
2016-05-20 19:43:20 +03:00
f2fs_i_links_write ( new_dir , new_nlink > 0 ) ;
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & F2FS_I ( new_dir ) - > i_sem ) ;
2014-07-12 15:13:54 +04:00
}
2016-10-14 21:51:23 +03:00
f2fs_mark_inode_dirty_sync ( new_dir , false ) ;
2014-07-12 15:13:54 +04:00
2018-03-08 09:22:56 +03:00
if ( F2FS_OPTION ( sbi ) . fsync_mode = = FSYNC_MODE_STRICT ) {
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_add_ino_entry ( sbi , old_dir - > i_ino , TRANS_DIR_INO ) ;
f2fs_add_ino_entry ( sbi , new_dir - > i_ino , TRANS_DIR_INO ) ;
2018-03-07 07:07:49 +03:00
}
2017-12-28 19:09:44 +03:00
2014-07-12 15:13:54 +04:00
f2fs_unlock_op ( sbi ) ;
2014-11-10 09:15:31 +03:00
if ( IS_DIRSYNC ( old_dir ) | | IS_DIRSYNC ( new_dir ) )
f2fs_sync_fs ( sbi - > sb , 1 ) ;
2018-10-05 08:17:40 +03:00
f2fs_update_time ( sbi , REQ_TIME ) ;
2014-07-12 15:13:54 +04:00
return 0 ;
out_new_dir :
if ( new_dir_entry ) {
f2fs_put_page ( new_dir_page , 0 ) ;
}
out_old_dir :
if ( old_dir_entry ) {
f2fs_put_page ( old_dir_page , 0 ) ;
}
out_new :
f2fs_put_page ( new_page , 0 ) ;
out_old :
f2fs_put_page ( old_page , 0 ) ;
out :
return err ;
}
2023-01-13 14:49:17 +03:00
static int f2fs_rename2 ( struct mnt_idmap * idmap ,
2021-01-21 16:19:43 +03:00
struct inode * old_dir , struct dentry * old_dentry ,
2014-07-12 15:13:54 +04:00
struct inode * new_dir , struct dentry * new_dentry ,
unsigned int flags )
{
2017-11-29 23:35:30 +03:00
int err ;
f2fs: support RENAME_WHITEOUT
As the description of rename in manual, RENAME_WHITEOUT is a special operation
that only makes sense for overlay/union type filesystem.
When performing rename with RENAME_WHITEOUT, dst will be replace with src, and
meanwhile, a 'whiteout' will be create with name of src.
A "whiteout" is designed to be a char device with 0,0 device number, it has
specially meaning for stackable filesystem. In these filesystems, there are
multiple layers exist, and only top of these can be modified. So a whiteout
in top layer is used to hide a corresponding file in lower layer, as well
removal of whiteout will make the file appear.
Now in overlayfs, when we rename a file which is exist in lower layer, it
will be copied up to upper if it is not on upper layer yet, and then rename
it on upper layer, source file will be whiteouted to hide corresponding file
in lower layer at the same time.
So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a
atomic operation for stackable filesystem to support rename operation.
There are multiple ways to implement RENAME_WHITEOUT in log of this commit:
7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by
Dave Chinner.
For now, we just try to follow the way that xfs/ext4 use.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-05-19 12:37:26 +03:00
if ( flags & ~ ( RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT ) )
2014-07-12 15:13:54 +04:00
return - EINVAL ;
2023-11-28 12:31:29 +03:00
trace_f2fs_rename_start ( old_dir , old_dentry , new_dir , new_dentry ,
flags ) ;
2017-11-29 23:35:30 +03:00
err = fscrypt_prepare_rename ( old_dir , old_dentry , new_dir , new_dentry ,
flags ) ;
if ( err )
return err ;
2023-11-28 12:31:29 +03:00
if ( flags & RENAME_EXCHANGE )
err = f2fs_cross_rename ( old_dir , old_dentry ,
new_dir , new_dentry ) ;
else
2014-07-12 15:13:54 +04:00
/*
* VFS has already handled the new dentry existence case ,
* here , we just deal with " RENAME_NOREPLACE " as regular rename .
*/
2023-11-28 12:31:29 +03:00
err = f2fs_rename ( idmap , old_dir , old_dentry ,
2022-02-04 08:24:56 +03:00
new_dir , new_dentry , flags ) ;
2023-11-28 12:31:29 +03:00
trace_f2fs_rename_end ( old_dentry , new_dentry , flags , err ) ;
return err ;
2014-07-12 15:13:54 +04:00
}
2015-11-17 18:20:54 +03:00
static const char * f2fs_encrypted_get_link ( struct dentry * dentry ,
2015-12-29 23:58:39 +03:00
struct inode * inode ,
struct delayed_call * done )
2014-06-19 12:23:19 +04:00
{
2018-01-12 07:26:49 +03:00
struct page * page ;
const char * target ;
2015-04-30 01:10:53 +03:00
2015-11-17 18:20:54 +03:00
if ( ! dentry )
return ERR_PTR ( - ECHILD ) ;
2018-01-12 07:26:49 +03:00
page = read_mapping_page ( inode - > i_mapping , 0 , NULL ) ;
if ( IS_ERR ( page ) )
return ERR_CAST ( page ) ;
2015-04-30 01:10:53 +03:00
2018-01-12 07:26:49 +03:00
target = fscrypt_get_symlink ( inode , page_address ( page ) ,
inode - > i_sb - > s_blocksize , done ) ;
put_page ( page ) ;
return target ;
2014-06-19 12:23:19 +04:00
}
2023-01-13 14:49:12 +03:00
static int f2fs_encrypted_symlink_getattr ( struct mnt_idmap * idmap ,
2021-07-02 09:53:48 +03:00
const struct path * path ,
struct kstat * stat , u32 request_mask ,
unsigned int query_flags )
{
2023-01-13 14:49:12 +03:00
f2fs_getattr ( idmap , path , stat , request_mask , query_flags ) ;
2021-07-02 09:53:48 +03:00
return fscrypt_symlink_getattr ( path , stat ) ;
}
2015-04-30 01:10:53 +03:00
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
2020-07-24 11:55:28 +03:00
. get_link = f2fs_encrypted_get_link ,
2021-07-02 09:53:48 +03:00
. getattr = f2fs_encrypted_symlink_getattr ,
2015-04-30 01:10:53 +03:00
. setattr = f2fs_setattr ,
. listxattr = f2fs_listxattr ,
} ;
2012-11-02 12:11:10 +04:00
const struct inode_operations f2fs_dir_inode_operations = {
. create = f2fs_create ,
. lookup = f2fs_lookup ,
. link = f2fs_link ,
. unlink = f2fs_unlink ,
. symlink = f2fs_symlink ,
. mkdir = f2fs_mkdir ,
. rmdir = f2fs_rmdir ,
. mknod = f2fs_mknod ,
2016-09-27 12:03:58 +03:00
. rename = f2fs_rename2 ,
2014-06-19 12:23:19 +04:00
. tmpfile = f2fs_tmpfile ,
2013-06-07 11:33:07 +04:00
. getattr = f2fs_getattr ,
2012-11-02 12:11:10 +04:00
. setattr = f2fs_setattr ,
2022-09-22 18:17:00 +03:00
. get_inode_acl = f2fs_get_acl ,
2013-12-20 17:16:45 +04:00
. set_acl = f2fs_set_acl ,
2012-11-02 12:11:10 +04:00
. listxattr = f2fs_listxattr ,
2019-07-22 13:03:50 +03:00
. fiemap = f2fs_fiemap ,
2021-04-07 15:36:43 +03:00
. fileattr_get = f2fs_fileattr_get ,
. fileattr_set = f2fs_fileattr_set ,
2012-11-02 12:11:10 +04:00
} ;
const struct inode_operations f2fs_symlink_inode_operations = {
2020-07-24 11:55:28 +03:00
. get_link = f2fs_get_link ,
2013-06-07 11:33:07 +04:00
. getattr = f2fs_getattr ,
2012-11-02 12:11:10 +04:00
. setattr = f2fs_setattr ,
. listxattr = f2fs_listxattr ,
} ;
const struct inode_operations f2fs_special_inode_operations = {
2013-06-07 11:33:07 +04:00
. getattr = f2fs_getattr ,
2020-07-24 11:55:28 +03:00
. setattr = f2fs_setattr ,
2022-09-22 18:17:00 +03:00
. get_inode_acl = f2fs_get_acl ,
2013-12-20 17:16:45 +04:00
. set_acl = f2fs_set_acl ,
2012-11-02 12:11:10 +04:00
. listxattr = f2fs_listxattr ,
} ;