2018-09-12 04:16:07 +03:00
// SPDX-License-Identifier: GPL-2.0
2017-06-14 12:39:47 +03:00
/*
* f2fs sysfs interface
*
* Copyright ( c ) 2012 Samsung Electronics Co . , Ltd .
* http : //www.samsung.com/
* Copyright ( c ) 2017 Chao Yu < chao @ kernel . org >
*/
2018-07-07 06:50:57 +03:00
# include <linux/compiler.h>
2017-06-14 12:39:47 +03:00
# include <linux/proc_fs.h>
# include <linux/f2fs_fs.h>
2017-07-14 03:45:21 +03:00
# include <linux/seq_file.h>
2019-07-24 02:05:28 +03:00
# include <linux/unicode.h>
2021-01-21 16:45:29 +03:00
# include <linux/ioprio.h>
2021-03-15 11:12:33 +03:00
# include <linux/sysfs.h>
2017-06-14 12:39:47 +03:00
# include "f2fs.h"
# include "segment.h"
# include "gc.h"
2021-08-20 06:52:28 +03:00
# include "iostat.h"
2020-03-30 06:30:59 +03:00
# include <trace/events/f2fs.h>
2017-06-14 12:39:47 +03:00
static struct proc_dir_entry * f2fs_proc_root ;
/* Sysfs support for f2fs */
enum {
GC_THREAD , /* struct f2fs_gc_thread */
SM_INFO , /* struct f2fs_sm_info */
DCC_INFO , /* struct discard_cmd_control */
NM_INFO , /* struct f2fs_nm_info */
F2FS_SBI , /* struct f2fs_sb_info */
2020-01-22 21:51:16 +03:00
# ifdef CONFIG_F2FS_STAT_FS
2020-07-24 11:55:28 +03:00
STAT_INFO , /* struct f2fs_stat_info */
2020-01-22 21:51:16 +03:00
# endif
2017-06-14 12:39:47 +03:00
# ifdef CONFIG_F2FS_FAULT_INJECTION
FAULT_INFO_RATE , /* struct f2fs_fault_info */
FAULT_INFO_TYPE , /* struct f2fs_fault_info */
# endif
2017-10-27 15:45:05 +03:00
RESERVED_BLOCKS , /* struct f2fs_sb_info */
2021-01-21 16:45:29 +03:00
CPRC_INFO , /* struct ckpt_req_control */
2021-05-12 05:07:19 +03:00
ATGC_INFO , /* struct atgc_management */
2017-06-14 12:39:47 +03:00
} ;
2022-03-18 22:13:23 +03:00
static const char * gc_mode_names [ MAX_GC_MODE ] = {
" GC_NORMAL " ,
" GC_IDLE_CB " ,
" GC_IDLE_GREEDY " ,
" GC_IDLE_AT " ,
" GC_URGENT_HIGH " ,
" GC_URGENT_LOW " ,
" GC_URGENT_MID "
} ;
2017-06-14 12:39:47 +03:00
struct f2fs_attr {
struct attribute attr ;
2022-11-29 07:15:23 +03:00
ssize_t ( * show ) ( struct f2fs_attr * a , struct f2fs_sb_info * sbi , char * buf ) ;
ssize_t ( * store ) ( struct f2fs_attr * a , struct f2fs_sb_info * sbi ,
const char * buf , size_t len ) ;
2017-06-14 12:39:47 +03:00
int struct_type ;
int offset ;
2017-07-22 03:14:09 +03:00
int id ;
2017-06-14 12:39:47 +03:00
} ;
2020-01-22 21:51:16 +03:00
static ssize_t f2fs_sbi_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf ) ;
2017-06-14 12:39:47 +03:00
static unsigned char * __struct_ptr ( struct f2fs_sb_info * sbi , int struct_type )
{
if ( struct_type = = GC_THREAD )
return ( unsigned char * ) sbi - > gc_thread ;
else if ( struct_type = = SM_INFO )
return ( unsigned char * ) SM_I ( sbi ) ;
else if ( struct_type = = DCC_INFO )
return ( unsigned char * ) SM_I ( sbi ) - > dcc_info ;
else if ( struct_type = = NM_INFO )
return ( unsigned char * ) NM_I ( sbi ) ;
2017-06-26 11:24:41 +03:00
else if ( struct_type = = F2FS_SBI | | struct_type = = RESERVED_BLOCKS )
2017-06-14 12:39:47 +03:00
return ( unsigned char * ) sbi ;
# ifdef CONFIG_F2FS_FAULT_INJECTION
else if ( struct_type = = FAULT_INFO_RATE | |
struct_type = = FAULT_INFO_TYPE )
2018-03-08 09:22:56 +03:00
return ( unsigned char * ) & F2FS_OPTION ( sbi ) . fault_info ;
2020-01-22 21:51:16 +03:00
# endif
# ifdef CONFIG_F2FS_STAT_FS
else if ( struct_type = = STAT_INFO )
return ( unsigned char * ) F2FS_STAT ( sbi ) ;
2017-06-14 12:39:47 +03:00
# endif
2021-01-21 16:45:29 +03:00
else if ( struct_type = = CPRC_INFO )
return ( unsigned char * ) & sbi - > cprc_info ;
2021-05-12 05:07:19 +03:00
else if ( struct_type = = ATGC_INFO )
return ( unsigned char * ) & sbi - > am ;
2017-06-14 12:39:47 +03:00
return NULL ;
}
2017-10-24 10:46:54 +03:00
static ssize_t dirty_segments_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2020-01-22 21:51:16 +03:00
( unsigned long long ) ( dirty_segments ( sbi ) ) ) ;
2017-10-24 10:46:54 +03:00
}
2020-01-22 21:51:16 +03:00
static ssize_t free_segments_show ( struct f2fs_attr * a ,
2019-05-30 03:49:06 +03:00
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2020-01-22 21:51:16 +03:00
( unsigned long long ) ( free_segments ( sbi ) ) ) ;
2019-07-24 02:05:28 +03:00
}
2019-05-30 03:49:06 +03:00
2021-03-02 04:28:16 +03:00
static ssize_t ovp_segments_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2021-03-02 04:28:16 +03:00
( unsigned long long ) ( overprovision_segments ( sbi ) ) ) ;
}
2017-06-14 12:39:47 +03:00
static ssize_t lifetime_write_kbytes_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2020-01-22 21:51:16 +03:00
( unsigned long long ) ( sbi - > kbytes_written +
2020-11-27 16:20:06 +03:00
( ( f2fs_get_sectors_written ( sbi ) -
sbi - > sectors_written_start ) > > 1 ) ) ) ;
2017-06-14 12:39:47 +03:00
}
f2fs: introduce sb_status sysfs node
Introduce /sys/fs/f2fs/<devname>/stat/sb_status to show superblock
status in real time as a hexadecimal value.
value sb status macro description
0x1 SBI_IS_DIRTY, /* dirty flag for checkpoint */
0x2 SBI_IS_CLOSE, /* specify unmounting */
0x4 SBI_NEED_FSCK, /* need fsck.f2fs to fix */
0x8 SBI_POR_DOING, /* recovery is doing or not */
0x10 SBI_NEED_SB_WRITE, /* need to recover superblock */
0x20 SBI_NEED_CP, /* need to checkpoint */
0x40 SBI_IS_SHUTDOWN, /* shutdown by ioctl */
0x80 SBI_IS_RECOVERED, /* recovered orphan/data */
0x100 SBI_CP_DISABLED, /* CP was disabled last mount */
0x200 SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
0x400 SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
0x800 SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
0x1000 SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
0x2000 SBI_IS_RESIZEFS, /* resizefs is in process */
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2021-01-14 04:41:27 +03:00
static ssize_t sb_status_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %lx \n " , sbi - > s_flag ) ;
f2fs: introduce sb_status sysfs node
Introduce /sys/fs/f2fs/<devname>/stat/sb_status to show superblock
status in real time as a hexadecimal value.
value sb status macro description
0x1 SBI_IS_DIRTY, /* dirty flag for checkpoint */
0x2 SBI_IS_CLOSE, /* specify unmounting */
0x4 SBI_NEED_FSCK, /* need fsck.f2fs to fix */
0x8 SBI_POR_DOING, /* recovery is doing or not */
0x10 SBI_NEED_SB_WRITE, /* need to recover superblock */
0x20 SBI_NEED_CP, /* need to checkpoint */
0x40 SBI_IS_SHUTDOWN, /* shutdown by ioctl */
0x80 SBI_IS_RECOVERED, /* recovered orphan/data */
0x100 SBI_CP_DISABLED, /* CP was disabled last mount */
0x200 SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
0x400 SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
0x800 SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
0x1000 SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
0x2000 SBI_IS_RESIZEFS, /* resizefs is in process */
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2021-01-14 04:41:27 +03:00
}
2022-09-27 05:44:47 +03:00
static ssize_t cp_status_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %x \n " , le32_to_cpu ( F2FS_CKPT ( sbi ) - > ckpt_flags ) ) ;
2022-09-27 05:44:47 +03:00
}
2021-11-29 21:36:12 +03:00
static ssize_t pending_discard_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
if ( ! SM_I ( sbi ) - > dcc_info )
return - EINVAL ;
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " , ( unsigned long long ) atomic_read (
2021-11-29 21:36:12 +03:00
& SM_I ( sbi ) - > dcc_info - > discard_cmd_cnt ) ) ;
}
2023-12-20 04:59:58 +03:00
static ssize_t issued_discard_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
if ( ! SM_I ( sbi ) - > dcc_info )
return - EINVAL ;
return sysfs_emit ( buf , " %llu \n " , ( unsigned long long ) atomic_read (
& SM_I ( sbi ) - > dcc_info - > issued_discard ) ) ;
}
static ssize_t queued_discard_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
if ( ! SM_I ( sbi ) - > dcc_info )
return - EINVAL ;
return sysfs_emit ( buf , " %llu \n " , ( unsigned long long ) atomic_read (
& SM_I ( sbi ) - > dcc_info - > queued_discard ) ) ;
}
static ssize_t undiscard_blks_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
if ( ! SM_I ( sbi ) - > dcc_info )
return - EINVAL ;
return sysfs_emit ( buf , " %u \n " ,
SM_I ( sbi ) - > dcc_info - > undiscard_blks ) ;
}
2022-10-25 06:32:16 +03:00
static ssize_t gc_mode_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
return sysfs_emit ( buf , " %s \n " , gc_mode_names [ sbi - > gc_mode ] ) ;
}
2017-07-22 03:14:09 +03:00
static ssize_t features_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
int len = 0 ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_encrypt ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf , PAGE_SIZE - len , " %s " ,
2017-07-22 03:14:09 +03:00
" encryption " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_blkzoned ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2017-07-22 03:14:09 +03:00
len ? " , " : " " , " blkzoned " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_extra_attr ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2017-07-22 03:14:09 +03:00
len ? " , " : " " , " extra_attr " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_project_quota ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2017-07-22 03:14:09 +03:00
len ? " , " : " " , " projquota " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_inode_chksum ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2017-07-22 03:14:09 +03:00
len ? " , " : " " , " inode_checksum " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_flexible_inline_xattr ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
len ? " , " : " " , " flexible_inline_xattr " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_quota_ino ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2017-10-06 07:03:06 +03:00
len ? " , " : " " , " quota_ino " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_inode_crtime ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2018-01-25 09:54:42 +03:00
len ? " , " : " " , " inode_crtime " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_lost_found ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2018-03-15 13:51:41 +03:00
len ? " , " : " " , " lost_found " ) ;
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-22 19:26:24 +03:00
if ( f2fs_sb_has_verity ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-22 19:26:24 +03:00
len ? " , " : " " , " verity " ) ;
2018-10-24 13:34:26 +03:00
if ( f2fs_sb_has_sb_chksum ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2018-09-28 15:25:56 +03:00
len ? " , " : " " , " sb_checksum " ) ;
2019-07-24 02:05:28 +03:00
if ( f2fs_sb_has_casefold ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2019-07-24 02:05:28 +03:00
len ? " , " : " " , " casefold " ) ;
2021-05-21 11:32:53 +03:00
if ( f2fs_sb_has_readonly ( sbi ) )
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
len ? " , " : " " , " readonly " ) ;
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
if ( f2fs_sb_has_compression ( sbi ) )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
len ? " , " : " " , " compression " ) ;
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s%s " ,
2019-10-18 20:06:40 +03:00
len ? " , " : " " , " pin_file " ) ;
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " \n " ) ;
2017-07-22 03:14:09 +03:00
return len ;
}
2017-10-27 15:45:05 +03:00
static ssize_t current_reserved_blocks_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %u \n " , sbi - > current_reserved_blocks ) ;
2020-01-22 21:51:16 +03:00
}
static ssize_t unusable_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
block_t unusable ;
if ( test_opt ( sbi , DISABLE_CHECKPOINT ) )
unusable = sbi - > unusable_block_count ;
else
unusable = f2fs_get_unusable_blocks ( sbi ) ;
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " , ( unsigned long long ) unusable ) ;
2017-10-27 15:45:05 +03:00
}
2020-01-22 21:51:16 +03:00
static ssize_t encoding_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
2020-07-08 12:12:36 +03:00
struct super_block * sb = sbi - > sb ;
2020-01-22 21:51:16 +03:00
if ( f2fs_sb_has_casefold ( sbi ) )
2022-01-17 06:40:02 +03:00
return sysfs_emit ( buf , " UTF-8 (%d.%d.%d) \n " ,
2020-07-08 12:12:36 +03:00
( sb - > s_encoding - > version > > 16 ) & 0xff ,
( sb - > s_encoding - > version > > 8 ) & 0xff ,
sb - > s_encoding - > version & 0xff ) ;
2020-01-22 21:51:16 +03:00
# endif
2022-11-29 07:15:23 +03:00
return sysfs_emit ( buf , " (none) \ n " ) ;
2020-01-22 21:51:16 +03:00
}
2020-02-26 06:08:16 +03:00
static ssize_t mounted_time_sec_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-11-29 07:15:23 +03:00
return sysfs_emit ( buf , " %llu \n " , SIT_I ( sbi ) - > mounted_time ) ;
2020-02-26 06:08:16 +03:00
}
2020-01-22 21:51:16 +03:00
# ifdef CONFIG_F2FS_STAT_FS
static ssize_t moved_blocks_foreground_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
struct f2fs_stat_info * si = F2FS_STAT ( sbi ) ;
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2020-01-22 21:51:16 +03:00
( unsigned long long ) ( si - > tot_blks -
( si - > bg_data_blks + si - > bg_node_blks ) ) ) ;
}
static ssize_t moved_blocks_background_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
struct f2fs_stat_info * si = F2FS_STAT ( sbi ) ;
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2020-01-22 21:51:16 +03:00
( unsigned long long ) ( si - > bg_data_blks + si - > bg_node_blks ) ) ;
}
static ssize_t avg_vblocks_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
struct f2fs_stat_info * si = F2FS_STAT ( sbi ) ;
si - > dirty_count = dirty_segments ( sbi ) ;
f2fs_update_sit_info ( sbi ) ;
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %llu \n " , ( unsigned long long ) ( si - > avg_vblocks ) ) ;
2020-01-22 21:51:16 +03:00
}
# endif
2020-07-03 12:51:29 +03:00
static ssize_t main_blkaddr_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2021-10-13 06:29:04 +03:00
return sysfs_emit ( buf , " %llu \n " ,
2020-07-03 12:51:29 +03:00
( unsigned long long ) MAIN_BLKADDR ( sbi ) ) ;
}
2017-06-14 12:39:47 +03:00
static ssize_t f2fs_sbi_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
unsigned char * ptr = NULL ;
unsigned int * ui ;
ptr = __struct_ptr ( sbi , a - > struct_type ) ;
if ( ! ptr )
return - EINVAL ;
2018-02-26 17:04:13 +03:00
if ( ! strcmp ( a - > attr . name , " extension_list " ) ) {
__u8 ( * extlist ) [ F2FS_EXTENSION_LEN ] =
sbi - > raw_super - > extension_list ;
2018-02-28 12:07:27 +03:00
int cold_count = le32_to_cpu ( sbi - > raw_super - > extension_count ) ;
int hot_count = sbi - > raw_super - > hot_ext_count ;
2018-02-26 17:04:13 +03:00
int len = 0 , i ;
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len ,
2018-04-30 18:27:44 +03:00
" cold file extension: \n " ) ;
2018-02-28 12:07:27 +03:00
for ( i = 0 ; i < cold_count ; i + + )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s \n " ,
2018-02-28 12:07:27 +03:00
extlist [ i ] ) ;
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len ,
2018-04-30 18:27:44 +03:00
" hot file extension: \n " ) ;
2018-02-28 12:07:27 +03:00
for ( i = cold_count ; i < cold_count + hot_count ; i + + )
2020-03-11 12:33:53 +03:00
len + = scnprintf ( buf + len , PAGE_SIZE - len , " %s \n " ,
2018-02-26 17:04:13 +03:00
extlist [ i ] ) ;
return len ;
}
2021-01-21 16:45:29 +03:00
if ( ! strcmp ( a - > attr . name , " ckpt_thread_ioprio " ) ) {
struct ckpt_req_control * cprc = & sbi - > cprc_info ;
int class = IOPRIO_PRIO_CLASS ( cprc - > ckpt_thread_ioprio ) ;
int data = IOPRIO_PRIO_DATA ( cprc - > ckpt_thread_ioprio ) ;
2023-03-31 14:33:05 +03:00
if ( class ! = IOPRIO_CLASS_RT & & class ! = IOPRIO_CLASS_BE )
2021-01-21 16:45:29 +03:00
return - EINVAL ;
2023-03-31 14:33:05 +03:00
return sysfs_emit ( buf , " %s,%d \n " ,
class = = IOPRIO_CLASS_RT ? " rt " : " be " , data ) ;
2021-01-21 16:45:29 +03:00
}
2021-03-15 11:12:33 +03:00
# ifdef CONFIG_F2FS_FS_COMPRESSION
if ( ! strcmp ( a - > attr . name , " compr_written_block " ) )
return sysfs_emit ( buf , " %llu \n " , sbi - > compr_written_block ) ;
if ( ! strcmp ( a - > attr . name , " compr_saved_block " ) )
return sysfs_emit ( buf , " %llu \n " , sbi - > compr_saved_block ) ;
if ( ! strcmp ( a - > attr . name , " compr_new_inode " ) )
return sysfs_emit ( buf , " %u \n " , sbi - > compr_new_inode ) ;
# endif
2021-07-10 08:53:57 +03:00
if ( ! strcmp ( a - > attr . name , " gc_segment_mode " ) )
2022-10-25 06:32:16 +03:00
return sysfs_emit ( buf , " %u \n " , sbi - > gc_segment_mode ) ;
2021-07-10 08:53:57 +03:00
if ( ! strcmp ( a - > attr . name , " gc_reclaimed_segments " ) ) {
return sysfs_emit ( buf , " %u \n " ,
sbi - > gc_reclaimed_segs [ sbi - > gc_segment_mode ] ) ;
}
2022-07-19 02:02:48 +03:00
if ( ! strcmp ( a - > attr . name , " current_atomic_write " ) ) {
s64 current_write = atomic64_read ( & sbi - > current_atomic_write ) ;
return sysfs_emit ( buf , " %lld \n " , current_write ) ;
}
if ( ! strcmp ( a - > attr . name , " peak_atomic_write " ) )
return sysfs_emit ( buf , " %lld \n " , sbi - > peak_atomic_write ) ;
if ( ! strcmp ( a - > attr . name , " committed_atomic_block " ) )
return sysfs_emit ( buf , " %llu \n " , sbi - > committed_atomic_block ) ;
if ( ! strcmp ( a - > attr . name , " revoked_atomic_block " ) )
return sysfs_emit ( buf , " %llu \n " , sbi - > revoked_atomic_block ) ;
2023-08-08 03:59:49 +03:00
# ifdef CONFIG_F2FS_STAT_FS
if ( ! strcmp ( a - > attr . name , " cp_foreground_calls " ) )
return sysfs_emit ( buf , " %d \n " ,
atomic_read ( & sbi - > cp_call_count [ TOTAL_CALL ] ) -
atomic_read ( & sbi - > cp_call_count [ BACKGROUND ] ) ) ;
if ( ! strcmp ( a - > attr . name , " cp_background_calls " ) )
return sysfs_emit ( buf , " %d \n " ,
atomic_read ( & sbi - > cp_call_count [ BACKGROUND ] ) ) ;
# endif
2017-06-14 12:39:47 +03:00
ui = ( unsigned int * ) ( ptr + a - > offset ) ;
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " %u \n " , * ui ) ;
2017-06-14 12:39:47 +03:00
}
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
static ssize_t __sbi_store ( struct f2fs_attr * a ,
2017-06-14 12:39:47 +03:00
struct f2fs_sb_info * sbi ,
const char * buf , size_t count )
{
unsigned char * ptr ;
unsigned long t ;
unsigned int * ui ;
ssize_t ret ;
ptr = __struct_ptr ( sbi , a - > struct_type ) ;
if ( ! ptr )
return - EINVAL ;
2018-02-26 17:04:13 +03:00
if ( ! strcmp ( a - > attr . name , " extension_list " ) ) {
const char * name = strim ( ( char * ) buf ) ;
2018-02-28 12:07:27 +03:00
bool set = true , hot ;
if ( ! strncmp ( name , " [h] " , 3 ) )
hot = true ;
else if ( ! strncmp ( name , " [c] " , 3 ) )
hot = false ;
else
return - EINVAL ;
name + = 3 ;
2018-02-26 17:04:13 +03:00
2018-02-28 12:07:27 +03:00
if ( * name = = ' ! ' ) {
2018-02-26 17:04:13 +03:00
name + + ;
set = false ;
}
2021-07-09 11:34:53 +03:00
if ( ! strlen ( name ) | | strlen ( name ) > = F2FS_EXTENSION_LEN )
2018-02-26 17:04:13 +03:00
return - EINVAL ;
2022-01-07 23:48:44 +03:00
f2fs_down_write ( & sbi - > sb_lock ) ;
2018-02-26 17:04:13 +03:00
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
ret = f2fs_update_extension_list ( sbi , name , hot , set ) ;
2018-02-26 17:04:13 +03:00
if ( ret )
goto out ;
ret = f2fs_commit_super ( sbi , false ) ;
if ( ret )
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
f2fs_update_extension_list ( sbi , name , hot , ! set ) ;
2018-02-26 17:04:13 +03:00
out :
2022-01-07 23:48:44 +03:00
f2fs_up_write ( & sbi - > sb_lock ) ;
2018-02-26 17:04:13 +03:00
return ret ? ret : count ;
}
2021-01-21 16:45:29 +03:00
if ( ! strcmp ( a - > attr . name , " ckpt_thread_ioprio " ) ) {
const char * name = strim ( ( char * ) buf ) ;
struct ckpt_req_control * cprc = & sbi - > cprc_info ;
int class ;
long data ;
int ret ;
if ( ! strncmp ( name , " rt, " , 3 ) )
class = IOPRIO_CLASS_RT ;
else if ( ! strncmp ( name , " be, " , 3 ) )
class = IOPRIO_CLASS_BE ;
else
return - EINVAL ;
name + = 3 ;
ret = kstrtol ( name , 10 , & data ) ;
if ( ret )
return ret ;
2021-08-11 06:37:01 +03:00
if ( data > = IOPRIO_NR_LEVELS | | data < 0 )
2021-01-21 16:45:29 +03:00
return - EINVAL ;
cprc - > ckpt_thread_ioprio = IOPRIO_PRIO_VALUE ( class , data ) ;
if ( test_opt ( sbi , MERGE_CHECKPOINT ) ) {
ret = set_task_ioprio ( cprc - > f2fs_issue_ckpt ,
cprc - > ckpt_thread_ioprio ) ;
if ( ret )
return ret ;
}
return count ;
}
2017-06-14 12:39:47 +03:00
ui = ( unsigned int * ) ( ptr + a - > offset ) ;
ret = kstrtoul ( skip_spaces ( buf ) , 0 , & t ) ;
if ( ret < 0 )
return ret ;
# ifdef CONFIG_F2FS_FAULT_INJECTION
2024-05-07 06:38:47 +03:00
if ( a - > struct_type = = FAULT_INFO_TYPE ) {
if ( f2fs_build_fault_attr ( sbi , 0 , t ) )
return - EINVAL ;
return count ;
}
if ( a - > struct_type = = FAULT_INFO_RATE ) {
if ( f2fs_build_fault_attr ( sbi , t , 0 ) )
return - EINVAL ;
return count ;
}
2017-06-14 12:39:47 +03:00
# endif
2017-06-26 11:24:41 +03:00
if ( a - > struct_type = = RESERVED_BLOCKS ) {
spin_lock ( & sbi - > stat_lock ) ;
2017-12-28 02:05:52 +03:00
if ( t > ( unsigned long ) ( sbi - > user_block_count -
f2fs: fix to reserve space for IO align feature
https://bugzilla.kernel.org/show_bug.cgi?id=204137
With below script, we will hit panic during new segment allocation:
DISK=bingo.img
MOUNT_DIR=/mnt/f2fs
dd if=/dev/zero of=$DISK bs=1M count=105
mkfs.f2fe -a 1 -o 19 -t 1 -z 1 -f -q $DISK
mount -t f2fs $DISK $MOUNT_DIR -o "noinline_dentry,flush_merge,noextent_cache,mode=lfs,io_bits=7,fsync_mode=strict"
for (( i = 0; i < 4096; i++ )); do
name=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 10`
mkdir $MOUNT_DIR/$name
done
umount $MOUNT_DIR
rm $DISK
--- Core dump ---
Call Trace:
allocate_segment_by_default+0x9d/0x100 [f2fs]
f2fs_allocate_data_block+0x3c0/0x5c0 [f2fs]
do_write_page+0x62/0x110 [f2fs]
f2fs_outplace_write_data+0x43/0xc0 [f2fs]
f2fs_do_write_data_page+0x386/0x560 [f2fs]
__write_data_page+0x706/0x850 [f2fs]
f2fs_write_cache_pages+0x267/0x6a0 [f2fs]
f2fs_write_data_pages+0x19c/0x2e0 [f2fs]
do_writepages+0x1c/0x70
__filemap_fdatawrite_range+0xaa/0xe0
filemap_fdatawrite+0x1f/0x30
f2fs_sync_dirty_inodes+0x74/0x1f0 [f2fs]
block_operations+0xdc/0x350 [f2fs]
f2fs_write_checkpoint+0x104/0x1150 [f2fs]
f2fs_sync_fs+0xa2/0x120 [f2fs]
f2fs_balance_fs_bg+0x33c/0x390 [f2fs]
f2fs_write_node_pages+0x4c/0x1f0 [f2fs]
do_writepages+0x1c/0x70
__writeback_single_inode+0x45/0x320
writeback_sb_inodes+0x273/0x5c0
wb_writeback+0xff/0x2e0
wb_workfn+0xa1/0x370
process_one_work+0x138/0x350
worker_thread+0x4d/0x3d0
kthread+0x109/0x140
ret_from_fork+0x25/0x30
The root cause here is, with IO alignment feature enables, in worst
case, we need F2FS_IO_SIZE() free blocks space for single one 4k write
due to IO alignment feature will fill dummy pages to make IO being
aligned.
So we will easily run out of free segments during non-inline directory's
data writeback, even in process of foreground GC.
In order to fix this issue, I just propose to reserve additional free
space for IO alignment feature to handle worst case of free space usage
ratio during FGGC.
Fixes: 0a595ebaaa6b ("f2fs: support IO alignment for DATA and NODE writes")
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2021-12-11 16:27:36 +03:00
F2FS_OPTION ( sbi ) . root_reserved_blocks -
2024-03-04 06:28:55 +03:00
SEGS_TO_BLKS ( sbi ,
SM_I ( sbi ) - > additional_reserved_segments ) ) ) {
2017-06-26 11:24:41 +03:00
spin_unlock ( & sbi - > stat_lock ) ;
return - EINVAL ;
}
* ui = t ;
2017-10-27 15:45:05 +03:00
sbi - > current_reserved_blocks = min ( sbi - > reserved_blocks ,
sbi - > user_block_count - valid_user_blocks ( sbi ) ) ;
2017-06-26 11:24:41 +03:00
spin_unlock ( & sbi - > stat_lock ) ;
return count ;
}
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 18:09:56 +03:00
2023-01-04 14:40:29 +03:00
if ( ! strcmp ( a - > attr . name , " discard_io_aware_gran " ) ) {
if ( t > MAX_PLIST_NUM )
return - EINVAL ;
if ( ! f2fs_block_unit_discard ( sbi ) )
return - EINVAL ;
if ( t = = * ui )
return count ;
* ui = t ;
return count ;
}
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 18:09:56 +03:00
if ( ! strcmp ( a - > attr . name , " discard_granularity " ) ) {
if ( t = = 0 | | t > MAX_PLIST_NUM )
return - EINVAL ;
f2fs: introduce discard_unit mount option
As James Z reported in bugzilla:
https://bugzilla.kernel.org/show_bug.cgi?id=213877
[1.] One-line summary of the problem:
Mount multiple SMR block devices exceed certain number cause system non-response
[2.] Full description of the problem/report:
Created some F2FS on SMR devices (mkfs.f2fs -m), then mounted in sequence. Each device is the same Model: HGST HSH721414AL (Size 14TB).
Empirically, found that when the amount of SMR device * 1.5Gb > System RAM, the system ran out of memory and hung. No dmesg output. For example, 24 SMR Disk need 24*1.5GB = 36GB. A system with 32G RAM can only mount 21 devices, the 22nd device will be a reproducible cause of system hang.
The number of SMR devices with other FS mounted on this system does not interfere with the result above.
[3.] Keywords (i.e., modules, networking, kernel):
F2FS, SMR, Memory
[4.] Kernel information
[4.1.] Kernel version (uname -a):
Linux 5.13.4-200.fc34.x86_64 #1 SMP Tue Jul 20 20:27:29 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
[4.2.] Kernel .config file:
Default Fedora 34 with f2fs-tools-1.14.0-2.fc34.x86_64
[5.] Most recent kernel version which did not have the bug:
None
[6.] Output of Oops.. message (if applicable) with symbolic information
resolved (see Documentation/admin-guide/oops-tracing.rst)
None
[7.] A small shell script or example program which triggers the
problem (if possible)
mount /dev/sdX /mnt/0X
[8.] Memory consumption
With 24 * 14T SMR Block device with F2FS
free -g
total used free shared buff/cache available
Mem: 46 36 0 0 10 10
Swap: 0 0 0
With 3 * 14T SMR Block device with F2FS
free -g
total used free shared buff/cache available
Mem: 7 5 0 0 1 1
Swap: 7 0 7
The root cause is, there are three bitmaps:
- cur_valid_map
- ckpt_valid_map
- discard_map
and each of them will cost ~500MB memory, {cur, ckpt}_valid_map are
necessary, but discard_map is optional, since this bitmap will only be
useful in mountpoint that small discard is enabled.
For a blkzoned device such as SMR or ZNS devices, f2fs will only issue
discard for a section(zone) when all blocks of that section are invalid,
so, for such device, we don't need small discard functionality at all.
This patch introduces a new mountoption "discard_unit=block|segment|
section" to support issuing discard with different basic unit which is
aligned to block, segment or section, so that user can specify
"discard_unit=segment" or "discard_unit=section" to disable small
discard functionality.
Note that this mount option can not be changed by remount() due to
related metadata need to be initialized during mount().
In order to save memory, let's use "discard_unit=section" for blkzoned
device by default.
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2021-08-03 03:15:43 +03:00
if ( ! f2fs_block_unit_discard ( sbi ) )
return - EINVAL ;
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 18:09:56 +03:00
if ( t = = * ui )
return count ;
2017-09-12 09:25:35 +03:00
* ui = t ;
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 18:09:56 +03:00
return count ;
}
2022-10-25 11:32:26 +03:00
if ( ! strcmp ( a - > attr . name , " max_ordered_discard " ) ) {
if ( t = = 0 | | t > MAX_PLIST_NUM )
return - EINVAL ;
if ( ! f2fs_block_unit_discard ( sbi ) )
return - EINVAL ;
* ui = t ;
return count ;
}
2022-11-23 19:44:02 +03:00
if ( ! strcmp ( a - > attr . name , " discard_urgent_util " ) ) {
if ( t > 100 )
return - EINVAL ;
* ui = t ;
return count ;
}
2023-11-22 17:47:15 +03:00
if ( ! strcmp ( a - > attr . name , " discard_io_aware " ) ) {
if ( t > = DPOLICY_IO_AWARE_MAX )
return - EINVAL ;
* ui = t ;
return count ;
}
2018-10-25 11:19:28 +03:00
if ( ! strcmp ( a - > attr . name , " migration_granularity " ) ) {
2024-02-07 00:56:27 +03:00
if ( t = = 0 | | t > SEGS_PER_SEC ( sbi ) )
2018-10-25 11:19:28 +03:00
return - EINVAL ;
}
2018-05-08 00:22:40 +03:00
if ( ! strcmp ( a - > attr . name , " gc_urgent " ) ) {
2020-07-02 07:14:14 +03:00
if ( t = = 0 ) {
sbi - > gc_mode = GC_NORMAL ;
} else if ( t = = 1 ) {
sbi - > gc_mode = GC_URGENT_HIGH ;
2018-05-08 00:22:40 +03:00
if ( sbi - > gc_thread ) {
2022-12-12 16:36:44 +03:00
sbi - > gc_thread - > gc_wake = true ;
2018-05-08 00:22:40 +03:00
wake_up_interruptible_all (
& sbi - > gc_thread - > gc_wait_queue_head ) ;
wake_up_discard_thread ( sbi , true ) ;
}
2020-07-02 07:14:14 +03:00
} else if ( t = = 2 ) {
sbi - > gc_mode = GC_URGENT_LOW ;
2022-03-16 07:14:14 +03:00
} else if ( t = = 3 ) {
sbi - > gc_mode = GC_URGENT_MID ;
if ( sbi - > gc_thread ) {
2022-12-12 16:36:44 +03:00
sbi - > gc_thread - > gc_wake = true ;
2022-03-16 07:14:14 +03:00
wake_up_interruptible_all (
& sbi - > gc_thread - > gc_wait_queue_head ) ;
}
2018-05-08 00:22:40 +03:00
} else {
2020-07-02 07:14:14 +03:00
return - EINVAL ;
2018-05-08 00:22:40 +03:00
}
return count ;
}
if ( ! strcmp ( a - > attr . name , " gc_idle " ) ) {
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 16:14:49 +03:00
if ( t = = GC_IDLE_CB ) {
2018-05-08 00:22:40 +03:00
sbi - > gc_mode = GC_IDLE_CB ;
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 16:14:49 +03:00
} else if ( t = = GC_IDLE_GREEDY ) {
2018-05-08 00:22:40 +03:00
sbi - > gc_mode = GC_IDLE_GREEDY ;
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 16:14:49 +03:00
} else if ( t = = GC_IDLE_AT ) {
if ( ! sbi - > am . atgc_enabled )
return - EINVAL ;
2022-01-18 06:48:02 +03:00
sbi - > gc_mode = GC_IDLE_AT ;
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 16:14:49 +03:00
} else {
2018-05-08 00:22:40 +03:00
sbi - > gc_mode = GC_NORMAL ;
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 16:14:49 +03:00
}
2018-05-08 00:22:40 +03:00
return count ;
}
2022-10-25 09:50:25 +03:00
if ( ! strcmp ( a - > attr . name , " gc_remaining_trials " ) ) {
spin_lock ( & sbi - > gc_remaining_trials_lock ) ;
sbi - > gc_remaining_trials = t ;
spin_unlock ( & sbi - > gc_remaining_trials_lock ) ;
2021-12-09 03:41:51 +03:00
return count ;
}
2021-08-20 06:52:28 +03:00
# ifdef CONFIG_F2FS_IOSTAT
2019-01-15 23:02:15 +03:00
if ( ! strcmp ( a - > attr . name , " iostat_enable " ) ) {
sbi - > iostat_enable = ! ! t ;
if ( ! sbi - > iostat_enable )
f2fs_reset_iostat ( sbi ) ;
return count ;
}
2020-03-30 06:30:59 +03:00
if ( ! strcmp ( a - > attr . name , " iostat_period_ms " ) ) {
if ( t < MIN_IOSTAT_PERIOD_MS | | t > MAX_IOSTAT_PERIOD_MS )
return - EINVAL ;
2023-03-31 12:26:56 +03:00
spin_lock_irq ( & sbi - > iostat_lock ) ;
2020-03-30 06:30:59 +03:00
sbi - > iostat_period_ms = ( unsigned int ) t ;
2023-03-31 12:26:56 +03:00
spin_unlock_irq ( & sbi - > iostat_lock ) ;
2020-03-30 06:30:59 +03:00
return count ;
}
2021-08-20 06:52:28 +03:00
# endif
2020-03-30 06:30:59 +03:00
2021-03-15 11:12:33 +03:00
# ifdef CONFIG_F2FS_FS_COMPRESSION
if ( ! strcmp ( a - > attr . name , " compr_written_block " ) | |
! strcmp ( a - > attr . name , " compr_saved_block " ) ) {
if ( t ! = 0 )
return - EINVAL ;
sbi - > compr_written_block = 0 ;
sbi - > compr_saved_block = 0 ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " compr_new_inode " ) ) {
if ( t ! = 0 )
return - EINVAL ;
sbi - > compr_new_inode = 0 ;
return count ;
}
2023-02-16 17:09:35 +03:00
if ( ! strcmp ( a - > attr . name , " compress_percent " ) ) {
if ( t = = 0 | | t > 100 )
return - EINVAL ;
* ui = t ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " compress_watermark " ) ) {
if ( t = = 0 | | t > 100 )
return - EINVAL ;
* ui = t ;
return count ;
}
2021-03-15 11:12:33 +03:00
# endif
2021-05-12 05:07:19 +03:00
if ( ! strcmp ( a - > attr . name , " atgc_candidate_ratio " ) ) {
if ( t > 100 )
return - EINVAL ;
sbi - > am . candidate_ratio = t ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " atgc_age_weight " ) ) {
if ( t > 100 )
return - EINVAL ;
sbi - > am . age_weight = t ;
return count ;
}
2021-07-10 08:53:57 +03:00
if ( ! strcmp ( a - > attr . name , " gc_segment_mode " ) ) {
if ( t < MAX_GC_MODE )
sbi - > gc_segment_mode = t ;
else
return - EINVAL ;
return count ;
}
2024-05-06 13:45:38 +03:00
if ( ! strcmp ( a - > attr . name , " gc_pin_file_threshold " ) ) {
if ( t > MAX_GC_FAILED_PINNED_FILES )
return - EINVAL ;
sbi - > gc_pin_file_threshold = t ;
return count ;
}
2021-07-10 08:53:57 +03:00
if ( ! strcmp ( a - > attr . name , " gc_reclaimed_segments " ) ) {
if ( t ! = 0 )
return - EINVAL ;
sbi - > gc_reclaimed_segs [ sbi - > gc_segment_mode ] = 0 ;
return count ;
}
2021-08-03 07:22:45 +03:00
if ( ! strcmp ( a - > attr . name , " seq_file_ra_mul " ) ) {
if ( t > = MIN_RA_MUL & & t < = MAX_RA_MUL )
sbi - > seq_file_ra_mul = t ;
else
return - EINVAL ;
return count ;
}
2021-09-29 21:12:03 +03:00
if ( ! strcmp ( a - > attr . name , " max_fragment_chunk " ) ) {
if ( t > = MIN_FRAGMENT_SIZE & & t < = MAX_FRAGMENT_SIZE )
sbi - > max_fragment_chunk = t ;
else
return - EINVAL ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " max_fragment_hole " ) ) {
if ( t > = MIN_FRAGMENT_SIZE & & t < = MAX_FRAGMENT_SIZE )
sbi - > max_fragment_hole = t ;
else
return - EINVAL ;
return count ;
}
2022-07-19 02:02:48 +03:00
if ( ! strcmp ( a - > attr . name , " peak_atomic_write " ) ) {
if ( t ! = 0 )
return - EINVAL ;
sbi - > peak_atomic_write = 0 ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " committed_atomic_block " ) ) {
if ( t ! = 0 )
return - EINVAL ;
sbi - > committed_atomic_block = 0 ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " revoked_atomic_block " ) ) {
if ( t ! = 0 )
return - EINVAL ;
sbi - > revoked_atomic_block = 0 ;
return count ;
}
2022-11-15 09:35:37 +03:00
if ( ! strcmp ( a - > attr . name , " readdir_ra " ) ) {
sbi - > readdir_ra = ! ! t ;
return count ;
}
2022-12-02 04:37:15 +03:00
if ( ! strcmp ( a - > attr . name , " hot_data_age_threshold " ) ) {
if ( t = = 0 | | t > = sbi - > warm_data_age_threshold )
return - EINVAL ;
if ( t = = * ui )
return count ;
* ui = ( unsigned int ) t ;
return count ;
}
if ( ! strcmp ( a - > attr . name , " warm_data_age_threshold " ) ) {
2023-01-17 16:24:42 +03:00
if ( t < = sbi - > hot_data_age_threshold )
2022-12-02 04:37:15 +03:00
return - EINVAL ;
if ( t = = * ui )
return count ;
* ui = ( unsigned int ) t ;
return count ;
}
2023-02-04 12:43:45 +03:00
if ( ! strcmp ( a - > attr . name , " last_age_weight " ) ) {
if ( t > 100 )
return - EINVAL ;
if ( t = = * ui )
return count ;
* ui = ( unsigned int ) t ;
2023-02-06 17:43:08 +03:00
return count ;
}
if ( ! strcmp ( a - > attr . name , " ipu_policy " ) ) {
if ( t > = BIT ( F2FS_IPU_MAX ) )
return - EINVAL ;
if ( t & & f2fs_lfs_mode ( sbi ) )
return - EINVAL ;
SM_I ( sbi ) - > ipu_policy = ( unsigned int ) t ;
2023-02-04 12:43:45 +03:00
return count ;
}
2023-12-22 06:29:00 +03:00
if ( ! strcmp ( a - > attr . name , " dir_level " ) ) {
if ( t > MAX_DIR_HASH_DEPTH )
return - EINVAL ;
sbi - > dir_level = t ;
return count ;
}
2019-01-15 23:02:15 +03:00
* ui = ( unsigned int ) t ;
2017-06-14 12:39:47 +03:00
return count ;
}
2018-05-28 11:57:32 +03:00
static ssize_t f2fs_sbi_store ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi ,
const char * buf , size_t count )
{
ssize_t ret ;
bool gc_entry = ( ! strcmp ( a - > attr . name , " gc_urgent " ) | |
a - > struct_type = = GC_THREAD ) ;
2018-07-15 03:58:08 +03:00
if ( gc_entry ) {
if ( ! down_read_trylock ( & sbi - > sb - > s_umount ) )
return - EAGAIN ;
}
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 19:20:41 +03:00
ret = __sbi_store ( a , sbi , buf , count ) ;
2018-05-28 11:57:32 +03:00
if ( gc_entry )
up_read ( & sbi - > sb - > s_umount ) ;
return ret ;
}
2017-06-14 12:39:47 +03:00
static ssize_t f2fs_attr_show ( struct kobject * kobj ,
struct attribute * attr , char * buf )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_kobj ) ;
struct f2fs_attr * a = container_of ( attr , struct f2fs_attr , attr ) ;
return a - > show ? a - > show ( a , sbi , buf ) : 0 ;
}
static ssize_t f2fs_attr_store ( struct kobject * kobj , struct attribute * attr ,
const char * buf , size_t len )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_kobj ) ;
struct f2fs_attr * a = container_of ( attr , struct f2fs_attr , attr ) ;
return a - > store ? a - > store ( a , sbi , buf , len ) : 0 ;
}
static void f2fs_sb_release ( struct kobject * kobj )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_kobj ) ;
complete ( & sbi - > s_kobj_unregister ) ;
}
2021-06-03 22:31:08 +03:00
/*
* Note that there are three feature list entries :
* 1 ) / sys / fs / f2fs / features
* : shows runtime features supported by in - kernel f2fs along with Kconfig .
* - ref . F2FS_FEATURE_RO_ATTR ( )
*
* 2 ) / sys / fs / f2fs / $ s_id / features < deprecated >
* : shows on - disk features enabled by mkfs . f2fs , used for old kernels . This
* won ' t add new feature anymore , and thus , users should check entries in 3 )
* instead of this 2 ) .
*
* 3 ) / sys / fs / f2fs / $ s_id / feature_list
* : shows on - disk features enabled by mkfs . f2fs per instance , which follows
* sysfs entry rule where each entry should expose single value .
* This list covers old feature list provided by 2 ) and beyond . Therefore ,
* please add new on - disk feature in this list only .
* - ref . F2FS_SB_FEATURE_RO_ATTR ( )
*/
2017-07-22 03:14:09 +03:00
static ssize_t f2fs_feature_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " supported \n " ) ;
2021-06-03 22:31:08 +03:00
}
# define F2FS_FEATURE_RO_ATTR(_name) \
static struct f2fs_attr f2fs_attr_ # # _name = { \
. attr = { . name = __stringify ( _name ) , . mode = 0444 } , \
. show = f2fs_feature_show , \
}
static ssize_t f2fs_sb_feature_show ( struct f2fs_attr * a ,
struct f2fs_sb_info * sbi , char * buf )
{
if ( F2FS_HAS_FEATURE ( sbi , a - > id ) )
2022-10-28 19:49:53 +03:00
return sysfs_emit ( buf , " supported \n " ) ;
return sysfs_emit ( buf , " unsupported \n " ) ;
2021-06-03 22:31:08 +03:00
}
# define F2FS_SB_FEATURE_RO_ATTR(_name, _feat) \
static struct f2fs_attr f2fs_attr_sb_ # # _name = { \
. attr = { . name = __stringify ( _name ) , . mode = 0444 } , \
. show = f2fs_sb_feature_show , \
. id = F2FS_FEATURE_ # # _feat , \
2017-07-22 03:14:09 +03:00
}
2017-06-14 12:39:47 +03:00
# define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
static struct f2fs_attr f2fs_attr_ # # _name = { \
. attr = { . name = __stringify ( _name ) , . mode = _mode } , \
. show = _show , \
. store = _store , \
. struct_type = _struct_type , \
. offset = _offset \
}
2022-06-29 01:49:47 +03:00
# define F2FS_RO_ATTR(struct_type, struct_name, name, elname) \
F2FS_ATTR_OFFSET ( struct_type , name , 0444 , \
f2fs_sbi_show , NULL , \
offsetof ( struct struct_name , elname ) )
2017-06-14 12:39:47 +03:00
# define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
F2FS_ATTR_OFFSET ( struct_type , name , 0644 , \
f2fs_sbi_show , f2fs_sbi_store , \
offsetof ( struct struct_name , elname ) )
# define F2FS_GENERAL_RO_ATTR(name) \
static struct f2fs_attr f2fs_attr_ # # name = __ATTR ( name , 0444 , name # # _show , NULL )
2023-05-06 18:16:03 +03:00
# ifdef CONFIG_F2FS_STAT_FS
# define STAT_INFO_RO_ATTR(name, elname) \
F2FS_RO_ATTR ( STAT_INFO , f2fs_stat_info , name , elname )
# endif
2020-01-22 21:51:16 +03:00
2023-05-06 18:16:03 +03:00
# define GC_THREAD_RW_ATTR(name, elname) \
F2FS_RW_ATTR ( GC_THREAD , f2fs_gc_kthread , name , elname )
# define SM_INFO_RW_ATTR(name, elname) \
F2FS_RW_ATTR ( SM_INFO , f2fs_sm_info , name , elname )
# define SM_INFO_GENERAL_RW_ATTR(elname) \
SM_INFO_RW_ATTR ( elname , elname )
# define DCC_INFO_RW_ATTR(name, elname) \
F2FS_RW_ATTR ( DCC_INFO , discard_cmd_control , name , elname )
# define DCC_INFO_GENERAL_RW_ATTR(elname) \
DCC_INFO_RW_ATTR ( elname , elname )
# define NM_INFO_RW_ATTR(name, elname) \
F2FS_RW_ATTR ( NM_INFO , f2fs_nm_info , name , elname )
# define NM_INFO_GENERAL_RW_ATTR(elname) \
NM_INFO_RW_ATTR ( elname , elname )
# define F2FS_SBI_RW_ATTR(name, elname) \
F2FS_RW_ATTR ( F2FS_SBI , f2fs_sb_info , name , elname )
# define F2FS_SBI_GENERAL_RW_ATTR(elname) \
F2FS_SBI_RW_ATTR ( elname , elname )
# define F2FS_SBI_GENERAL_RO_ATTR(elname) \
F2FS_RO_ATTR ( F2FS_SBI , f2fs_sb_info , elname , elname )
# ifdef CONFIG_F2FS_FAULT_INJECTION
# define FAULT_INFO_GENERAL_RW_ATTR(type, elname) \
F2FS_RW_ATTR ( type , f2fs_fault_info , elname , elname )
2021-08-20 06:52:28 +03:00
# endif
2023-05-06 18:16:03 +03:00
# define RESERVED_BLOCKS_GENERAL_RW_ATTR(elname) \
F2FS_RW_ATTR ( RESERVED_BLOCKS , f2fs_sb_info , elname , elname )
# define CPRC_INFO_GENERAL_RW_ATTR(elname) \
F2FS_RW_ATTR ( CPRC_INFO , ckpt_req_control , elname , elname )
# define ATGC_INFO_RW_ATTR(name, elname) \
F2FS_RW_ATTR ( ATGC_INFO , atgc_management , name , elname )
/* GC_THREAD ATTR */
GC_THREAD_RW_ATTR ( gc_urgent_sleep_time , urgent_sleep_time ) ;
GC_THREAD_RW_ATTR ( gc_min_sleep_time , min_sleep_time ) ;
GC_THREAD_RW_ATTR ( gc_max_sleep_time , max_sleep_time ) ;
GC_THREAD_RW_ATTR ( gc_no_gc_sleep_time , no_gc_sleep_time ) ;
/* SM_INFO ATTR */
SM_INFO_RW_ATTR ( reclaim_segments , rec_prefree_segments ) ;
SM_INFO_GENERAL_RW_ATTR ( ipu_policy ) ;
SM_INFO_GENERAL_RW_ATTR ( min_ipu_util ) ;
SM_INFO_GENERAL_RW_ATTR ( min_fsync_blocks ) ;
SM_INFO_GENERAL_RW_ATTR ( min_seq_blocks ) ;
SM_INFO_GENERAL_RW_ATTR ( min_hot_blocks ) ;
SM_INFO_GENERAL_RW_ATTR ( min_ssr_sections ) ;
/* DCC_INFO ATTR */
DCC_INFO_RW_ATTR ( max_small_discards , max_discards ) ;
DCC_INFO_GENERAL_RW_ATTR ( max_discard_request ) ;
DCC_INFO_GENERAL_RW_ATTR ( min_discard_issue_time ) ;
DCC_INFO_GENERAL_RW_ATTR ( mid_discard_issue_time ) ;
DCC_INFO_GENERAL_RW_ATTR ( max_discard_issue_time ) ;
DCC_INFO_GENERAL_RW_ATTR ( discard_io_aware_gran ) ;
DCC_INFO_GENERAL_RW_ATTR ( discard_urgent_util ) ;
DCC_INFO_GENERAL_RW_ATTR ( discard_granularity ) ;
DCC_INFO_GENERAL_RW_ATTR ( max_ordered_discard ) ;
2023-11-22 17:47:15 +03:00
DCC_INFO_GENERAL_RW_ATTR ( discard_io_aware ) ;
2023-05-06 18:16:03 +03:00
/* NM_INFO ATTR */
NM_INFO_RW_ATTR ( max_roll_forward_node_blocks , max_rf_node_blocks ) ;
NM_INFO_GENERAL_RW_ATTR ( ram_thresh ) ;
NM_INFO_GENERAL_RW_ATTR ( ra_nid_pages ) ;
NM_INFO_GENERAL_RW_ATTR ( dirty_nats_ratio ) ;
/* F2FS_SBI ATTR */
2018-02-26 17:04:13 +03:00
F2FS_RW_ATTR ( F2FS_SBI , f2fs_super_block , extension_list , extension_list ) ;
2023-05-06 18:16:03 +03:00
F2FS_SBI_RW_ATTR ( gc_idle , gc_mode ) ;
F2FS_SBI_RW_ATTR ( gc_urgent , gc_mode ) ;
F2FS_SBI_RW_ATTR ( cp_interval , interval_time [ CP_TIME ] ) ;
F2FS_SBI_RW_ATTR ( idle_interval , interval_time [ REQ_TIME ] ) ;
F2FS_SBI_RW_ATTR ( discard_idle_interval , interval_time [ DISCARD_TIME ] ) ;
F2FS_SBI_RW_ATTR ( gc_idle_interval , interval_time [ GC_TIME ] ) ;
F2FS_SBI_RW_ATTR ( umount_discard_timeout , interval_time [ UMOUNT_DISCARD_TIMEOUT ] ) ;
F2FS_SBI_RW_ATTR ( gc_pin_file_thresh , gc_pin_file_threshold ) ;
F2FS_SBI_RW_ATTR ( gc_reclaimed_segments , gc_reclaimed_segs ) ;
F2FS_SBI_GENERAL_RW_ATTR ( max_victim_search ) ;
F2FS_SBI_GENERAL_RW_ATTR ( migration_granularity ) ;
F2FS_SBI_GENERAL_RW_ATTR ( dir_level ) ;
# ifdef CONFIG_F2FS_IOSTAT
F2FS_SBI_GENERAL_RW_ATTR ( iostat_enable ) ;
F2FS_SBI_GENERAL_RW_ATTR ( iostat_period_ms ) ;
# endif
F2FS_SBI_GENERAL_RW_ATTR ( readdir_ra ) ;
F2FS_SBI_GENERAL_RW_ATTR ( max_io_bytes ) ;
F2FS_SBI_GENERAL_RW_ATTR ( data_io_flag ) ;
F2FS_SBI_GENERAL_RW_ATTR ( node_io_flag ) ;
F2FS_SBI_GENERAL_RW_ATTR ( gc_remaining_trials ) ;
F2FS_SBI_GENERAL_RW_ATTR ( seq_file_ra_mul ) ;
F2FS_SBI_GENERAL_RW_ATTR ( gc_segment_mode ) ;
F2FS_SBI_GENERAL_RW_ATTR ( max_fragment_chunk ) ;
F2FS_SBI_GENERAL_RW_ATTR ( max_fragment_hole ) ;
# ifdef CONFIG_F2FS_FS_COMPRESSION
F2FS_SBI_GENERAL_RW_ATTR ( compr_written_block ) ;
F2FS_SBI_GENERAL_RW_ATTR ( compr_saved_block ) ;
F2FS_SBI_GENERAL_RW_ATTR ( compr_new_inode ) ;
F2FS_SBI_GENERAL_RW_ATTR ( compress_percent ) ;
F2FS_SBI_GENERAL_RW_ATTR ( compress_watermark ) ;
# endif
/* atomic write */
F2FS_SBI_GENERAL_RO_ATTR ( current_atomic_write ) ;
F2FS_SBI_GENERAL_RW_ATTR ( peak_atomic_write ) ;
F2FS_SBI_GENERAL_RW_ATTR ( committed_atomic_block ) ;
F2FS_SBI_GENERAL_RW_ATTR ( revoked_atomic_block ) ;
/* block age extent cache */
F2FS_SBI_GENERAL_RW_ATTR ( hot_data_age_threshold ) ;
F2FS_SBI_GENERAL_RW_ATTR ( warm_data_age_threshold ) ;
F2FS_SBI_GENERAL_RW_ATTR ( last_age_weight ) ;
# ifdef CONFIG_BLK_DEV_ZONED
F2FS_SBI_GENERAL_RO_ATTR ( unusable_blocks_per_sec ) ;
# endif
/* STAT_INFO ATTR */
# ifdef CONFIG_F2FS_STAT_FS
2023-08-08 03:59:49 +03:00
STAT_INFO_RO_ATTR ( cp_foreground_calls , cp_call_count [ FOREGROUND ] ) ;
STAT_INFO_RO_ATTR ( cp_background_calls , cp_call_count [ BACKGROUND ] ) ;
2023-08-08 03:59:48 +03:00
STAT_INFO_RO_ATTR ( gc_foreground_calls , gc_call_count [ FOREGROUND ] ) ;
STAT_INFO_RO_ATTR ( gc_background_calls , gc_call_count [ BACKGROUND ] ) ;
2023-05-06 18:16:03 +03:00
# endif
/* FAULT_INFO ATTR */
2017-06-14 12:39:47 +03:00
# ifdef CONFIG_F2FS_FAULT_INJECTION
2023-05-06 18:16:03 +03:00
FAULT_INFO_GENERAL_RW_ATTR ( FAULT_INFO_RATE , inject_rate ) ;
FAULT_INFO_GENERAL_RW_ATTR ( FAULT_INFO_TYPE , inject_type ) ;
2017-06-14 12:39:47 +03:00
# endif
2023-05-06 18:16:03 +03:00
/* RESERVED_BLOCKS ATTR */
RESERVED_BLOCKS_GENERAL_RW_ATTR ( reserved_blocks ) ;
/* CPRC_INFO ATTR */
CPRC_INFO_GENERAL_RW_ATTR ( ckpt_thread_ioprio ) ;
/* ATGC_INFO ATTR */
ATGC_INFO_RW_ATTR ( atgc_candidate_ratio , candidate_ratio ) ;
ATGC_INFO_RW_ATTR ( atgc_candidate_count , max_candidate_count ) ;
ATGC_INFO_RW_ATTR ( atgc_age_weight , age_weight ) ;
ATGC_INFO_RW_ATTR ( atgc_age_threshold , age_threshold ) ;
2017-10-24 10:46:54 +03:00
F2FS_GENERAL_RO_ATTR ( dirty_segments ) ;
2020-01-22 21:51:16 +03:00
F2FS_GENERAL_RO_ATTR ( free_segments ) ;
2021-03-02 04:28:16 +03:00
F2FS_GENERAL_RO_ATTR ( ovp_segments ) ;
2017-06-14 12:39:47 +03:00
F2FS_GENERAL_RO_ATTR ( lifetime_write_kbytes ) ;
2017-07-22 03:14:09 +03:00
F2FS_GENERAL_RO_ATTR ( features ) ;
2017-10-27 15:45:05 +03:00
F2FS_GENERAL_RO_ATTR ( current_reserved_blocks ) ;
2019-05-30 03:49:06 +03:00
F2FS_GENERAL_RO_ATTR ( unusable ) ;
2019-07-24 02:05:28 +03:00
F2FS_GENERAL_RO_ATTR ( encoding ) ;
2020-02-26 06:08:16 +03:00
F2FS_GENERAL_RO_ATTR ( mounted_time_sec ) ;
2020-07-03 12:51:29 +03:00
F2FS_GENERAL_RO_ATTR ( main_blkaddr ) ;
2021-11-29 21:36:12 +03:00
F2FS_GENERAL_RO_ATTR ( pending_discard ) ;
2022-10-25 06:32:16 +03:00
F2FS_GENERAL_RO_ATTR ( gc_mode ) ;
2020-01-22 21:51:16 +03:00
# ifdef CONFIG_F2FS_STAT_FS
F2FS_GENERAL_RO_ATTR ( moved_blocks_background ) ;
F2FS_GENERAL_RO_ATTR ( moved_blocks_foreground ) ;
F2FS_GENERAL_RO_ATTR ( avg_vblocks ) ;
# endif
2017-07-22 03:14:09 +03:00
2018-12-12 12:50:12 +03:00
# ifdef CONFIG_FS_ENCRYPTION
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( encryption ) ;
F2FS_FEATURE_RO_ATTR ( test_dummy_encryption_v2 ) ;
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( encrypted_casefold ) ;
2017-07-22 03:14:09 +03:00
# endif
2021-06-03 12:50:38 +03:00
# endif /* CONFIG_FS_ENCRYPTION */
2017-07-22 03:14:09 +03:00
# ifdef CONFIG_BLK_DEV_ZONED
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( block_zoned ) ;
2017-07-22 03:14:09 +03:00
# endif
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( atomic_write ) ;
F2FS_FEATURE_RO_ATTR ( extra_attr ) ;
F2FS_FEATURE_RO_ATTR ( project_quota ) ;
F2FS_FEATURE_RO_ATTR ( inode_checksum ) ;
F2FS_FEATURE_RO_ATTR ( flexible_inline_xattr ) ;
F2FS_FEATURE_RO_ATTR ( quota_ino ) ;
F2FS_FEATURE_RO_ATTR ( inode_crtime ) ;
F2FS_FEATURE_RO_ATTR ( lost_found ) ;
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-22 19:26:24 +03:00
# ifdef CONFIG_FS_VERITY
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( verity ) ;
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-22 19:26:24 +03:00
# endif
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( sb_checksum ) ;
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( casefold ) ;
2021-06-03 12:50:37 +03:00
# endif
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( readonly ) ;
2020-03-27 13:29:00 +03:00
# ifdef CONFIG_F2FS_FS_COMPRESSION
2021-06-03 22:31:08 +03:00
F2FS_FEATURE_RO_ATTR ( compression ) ;
2020-03-27 13:29:00 +03:00
# endif
2021-06-04 08:30:09 +03:00
F2FS_FEATURE_RO_ATTR ( pin_file ) ;
2021-05-21 11:32:53 +03:00
2017-06-14 12:39:47 +03:00
# define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute * f2fs_attrs [ ] = {
2017-08-07 08:09:00 +03:00
ATTR_LIST ( gc_urgent_sleep_time ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( gc_min_sleep_time ) ,
ATTR_LIST ( gc_max_sleep_time ) ,
ATTR_LIST ( gc_no_gc_sleep_time ) ,
ATTR_LIST ( gc_idle ) ,
2017-08-07 08:09:00 +03:00
ATTR_LIST ( gc_urgent ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( reclaim_segments ) ,
2019-11-22 22:53:10 +03:00
ATTR_LIST ( main_blkaddr ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( max_small_discards ) ,
2021-12-14 04:12:43 +03:00
ATTR_LIST ( max_discard_request ) ,
ATTR_LIST ( min_discard_issue_time ) ,
ATTR_LIST ( mid_discard_issue_time ) ,
ATTR_LIST ( max_discard_issue_time ) ,
2023-01-04 14:40:29 +03:00
ATTR_LIST ( discard_io_aware_gran ) ,
2022-11-23 19:44:02 +03:00
ATTR_LIST ( discard_urgent_util ) ,
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 18:09:56 +03:00
ATTR_LIST ( discard_granularity ) ,
2022-10-25 11:32:26 +03:00
ATTR_LIST ( max_ordered_discard ) ,
2023-11-22 17:47:15 +03:00
ATTR_LIST ( discard_io_aware ) ,
2021-11-29 21:36:12 +03:00
ATTR_LIST ( pending_discard ) ,
2022-10-25 06:32:16 +03:00
ATTR_LIST ( gc_mode ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( ipu_policy ) ,
ATTR_LIST ( min_ipu_util ) ,
ATTR_LIST ( min_fsync_blocks ) ,
2018-08-10 03:53:34 +03:00
ATTR_LIST ( min_seq_blocks ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( min_hot_blocks ) ,
2017-10-28 11:52:33 +03:00
ATTR_LIST ( min_ssr_sections ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( max_victim_search ) ,
2018-10-25 11:19:28 +03:00
ATTR_LIST ( migration_granularity ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( dir_level ) ,
ATTR_LIST ( ram_thresh ) ,
ATTR_LIST ( ra_nid_pages ) ,
ATTR_LIST ( dirty_nats_ratio ) ,
2022-01-28 00:31:43 +03:00
ATTR_LIST ( max_roll_forward_node_blocks ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( cp_interval ) ,
ATTR_LIST ( idle_interval ) ,
2018-09-19 11:48:47 +03:00
ATTR_LIST ( discard_idle_interval ) ,
ATTR_LIST ( gc_idle_interval ) ,
2019-01-14 21:42:11 +03:00
ATTR_LIST ( umount_discard_timeout ) ,
2021-08-20 06:52:28 +03:00
# ifdef CONFIG_F2FS_IOSTAT
2017-08-02 18:21:48 +03:00
ATTR_LIST ( iostat_enable ) ,
2020-03-30 06:30:59 +03:00
ATTR_LIST ( iostat_period_ms ) ,
2021-08-20 06:52:28 +03:00
# endif
2017-11-22 13:23:38 +03:00
ATTR_LIST ( readdir_ra ) ,
2020-12-03 20:52:45 +03:00
ATTR_LIST ( max_io_bytes ) ,
2017-12-08 03:25:39 +03:00
ATTR_LIST ( gc_pin_file_thresh ) ,
2018-02-26 17:04:13 +03:00
ATTR_LIST ( extension_list ) ,
2017-06-14 12:39:47 +03:00
# ifdef CONFIG_F2FS_FAULT_INJECTION
ATTR_LIST ( inject_rate ) ,
ATTR_LIST ( inject_type ) ,
# endif
2020-04-02 19:32:35 +03:00
ATTR_LIST ( data_io_flag ) ,
2020-06-04 21:49:43 +03:00
ATTR_LIST ( node_io_flag ) ,
2022-10-25 09:50:25 +03:00
ATTR_LIST ( gc_remaining_trials ) ,
2021-01-21 16:45:29 +03:00
ATTR_LIST ( ckpt_thread_ioprio ) ,
2017-10-24 10:46:54 +03:00
ATTR_LIST ( dirty_segments ) ,
2020-01-22 21:51:16 +03:00
ATTR_LIST ( free_segments ) ,
2021-03-02 04:28:16 +03:00
ATTR_LIST ( ovp_segments ) ,
2019-05-30 03:49:06 +03:00
ATTR_LIST ( unusable ) ,
2017-06-14 12:39:47 +03:00
ATTR_LIST ( lifetime_write_kbytes ) ,
2017-07-22 03:14:09 +03:00
ATTR_LIST ( features ) ,
2017-06-26 11:24:41 +03:00
ATTR_LIST ( reserved_blocks ) ,
2017-10-27 15:45:05 +03:00
ATTR_LIST ( current_reserved_blocks ) ,
2019-07-24 02:05:28 +03:00
ATTR_LIST ( encoding ) ,
2020-02-26 06:08:16 +03:00
ATTR_LIST ( mounted_time_sec ) ,
2020-01-22 21:51:16 +03:00
# ifdef CONFIG_F2FS_STAT_FS
ATTR_LIST ( cp_foreground_calls ) ,
ATTR_LIST ( cp_background_calls ) ,
ATTR_LIST ( gc_foreground_calls ) ,
ATTR_LIST ( gc_background_calls ) ,
ATTR_LIST ( moved_blocks_foreground ) ,
ATTR_LIST ( moved_blocks_background ) ,
ATTR_LIST ( avg_vblocks ) ,
2021-03-15 11:12:33 +03:00
# endif
2022-06-29 01:49:47 +03:00
# ifdef CONFIG_BLK_DEV_ZONED
ATTR_LIST ( unusable_blocks_per_sec ) ,
# endif
2021-03-15 11:12:33 +03:00
# ifdef CONFIG_F2FS_FS_COMPRESSION
ATTR_LIST ( compr_written_block ) ,
ATTR_LIST ( compr_saved_block ) ,
ATTR_LIST ( compr_new_inode ) ,
2023-02-16 17:09:35 +03:00
ATTR_LIST ( compress_percent ) ,
ATTR_LIST ( compress_watermark ) ,
2020-01-22 21:51:16 +03:00
# endif
2021-05-12 05:07:19 +03:00
/* For ATGC */
ATTR_LIST ( atgc_candidate_ratio ) ,
ATTR_LIST ( atgc_candidate_count ) ,
ATTR_LIST ( atgc_age_weight ) ,
ATTR_LIST ( atgc_age_threshold ) ,
2021-08-03 07:22:45 +03:00
ATTR_LIST ( seq_file_ra_mul ) ,
2021-07-10 08:53:57 +03:00
ATTR_LIST ( gc_segment_mode ) ,
ATTR_LIST ( gc_reclaimed_segments ) ,
2021-09-29 21:12:03 +03:00
ATTR_LIST ( max_fragment_chunk ) ,
ATTR_LIST ( max_fragment_hole ) ,
2022-07-19 02:02:48 +03:00
ATTR_LIST ( current_atomic_write ) ,
ATTR_LIST ( peak_atomic_write ) ,
ATTR_LIST ( committed_atomic_block ) ,
ATTR_LIST ( revoked_atomic_block ) ,
2022-12-02 04:37:15 +03:00
ATTR_LIST ( hot_data_age_threshold ) ,
ATTR_LIST ( warm_data_age_threshold ) ,
2023-02-04 12:43:45 +03:00
ATTR_LIST ( last_age_weight ) ,
2017-06-14 12:39:47 +03:00
NULL ,
} ;
2019-06-07 20:40:41 +03:00
ATTRIBUTE_GROUPS ( f2fs ) ;
2017-06-14 12:39:47 +03:00
2017-07-22 03:14:09 +03:00
static struct attribute * f2fs_feat_attrs [ ] = {
2018-12-12 12:50:12 +03:00
# ifdef CONFIG_FS_ENCRYPTION
2017-07-22 03:14:09 +03:00
ATTR_LIST ( encryption ) ,
fscrypt: support test_dummy_encryption=v2
v1 encryption policies are deprecated in favor of v2, and some new
features (e.g. encryption+casefolding) are only being added for v2.
Therefore, the "test_dummy_encryption" mount option (which is used for
encryption I/O testing with xfstests) needs to support v2 policies.
To do this, extend its syntax to be "test_dummy_encryption=v1" or
"test_dummy_encryption=v2". The existing "test_dummy_encryption" (no
argument) also continues to be accepted, to specify the default setting
-- currently v1, but the next patch changes it to v2.
To cleanly support both v1 and v2 while also making it easy to support
specifying other encryption settings in the future (say, accepting
"$contents_mode:$filenames_mode:v2"), make ext4 and f2fs maintain a
pointer to the dummy fscrypt_context rather than using mount flags.
To avoid concurrency issues, don't allow test_dummy_encryption to be set
or changed during a remount. (The former restriction is new, but
xfstests doesn't run into it, so no one should notice.)
Tested with 'gce-xfstests -c {ext4,f2fs}/encrypt -g auto'. On ext4,
there are two regressions, both of which are test bugs: ext4/023 and
ext4/028 fail because they set an xattr and expect it to be stored
inline, but the increase in size of the fscrypt_context from
24 to 40 bytes causes this xattr to be spilled into an external block.
Link: https://lore.kernel.org/r/20200512233251.118314-4-ebiggers@kernel.org
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2020-05-13 02:32:50 +03:00
ATTR_LIST ( test_dummy_encryption_v2 ) ,
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
2021-06-03 12:50:38 +03:00
ATTR_LIST ( encrypted_casefold ) ,
2017-07-22 03:14:09 +03:00
# endif
2021-06-03 12:50:38 +03:00
# endif /* CONFIG_FS_ENCRYPTION */
2017-07-22 03:14:09 +03:00
# ifdef CONFIG_BLK_DEV_ZONED
ATTR_LIST ( block_zoned ) ,
# endif
ATTR_LIST ( atomic_write ) ,
ATTR_LIST ( extra_attr ) ,
ATTR_LIST ( project_quota ) ,
ATTR_LIST ( inode_checksum ) ,
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 16:59:50 +03:00
ATTR_LIST ( flexible_inline_xattr ) ,
2017-10-06 07:03:06 +03:00
ATTR_LIST ( quota_ino ) ,
2018-01-25 09:54:42 +03:00
ATTR_LIST ( inode_crtime ) ,
2018-03-15 13:51:41 +03:00
ATTR_LIST ( lost_found ) ,
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-22 19:26:24 +03:00
# ifdef CONFIG_FS_VERITY
ATTR_LIST ( verity ) ,
# endif
2018-09-28 15:25:56 +03:00
ATTR_LIST ( sb_checksum ) ,
2022-01-18 09:56:14 +03:00
# if IS_ENABLED(CONFIG_UNICODE)
2019-07-24 02:05:28 +03:00
ATTR_LIST ( casefold ) ,
2021-06-03 12:50:37 +03:00
# endif
2021-05-21 11:32:53 +03:00
ATTR_LIST ( readonly ) ,
2020-03-27 13:29:00 +03:00
# ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 13:07:14 +03:00
ATTR_LIST ( compression ) ,
2020-03-27 13:29:00 +03:00
# endif
2021-06-04 08:30:09 +03:00
ATTR_LIST ( pin_file ) ,
2017-07-22 03:14:09 +03:00
NULL ,
} ;
2019-06-07 20:40:41 +03:00
ATTRIBUTE_GROUPS ( f2fs_feat ) ;
2017-07-22 03:14:09 +03:00
f2fs: introduce sb_status sysfs node
Introduce /sys/fs/f2fs/<devname>/stat/sb_status to show superblock
status in real time as a hexadecimal value.
value sb status macro description
0x1 SBI_IS_DIRTY, /* dirty flag for checkpoint */
0x2 SBI_IS_CLOSE, /* specify unmounting */
0x4 SBI_NEED_FSCK, /* need fsck.f2fs to fix */
0x8 SBI_POR_DOING, /* recovery is doing or not */
0x10 SBI_NEED_SB_WRITE, /* need to recover superblock */
0x20 SBI_NEED_CP, /* need to checkpoint */
0x40 SBI_IS_SHUTDOWN, /* shutdown by ioctl */
0x80 SBI_IS_RECOVERED, /* recovered orphan/data */
0x100 SBI_CP_DISABLED, /* CP was disabled last mount */
0x200 SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
0x400 SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
0x800 SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
0x1000 SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
0x2000 SBI_IS_RESIZEFS, /* resizefs is in process */
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2021-01-14 04:41:27 +03:00
F2FS_GENERAL_RO_ATTR ( sb_status ) ;
2022-09-27 05:44:47 +03:00
F2FS_GENERAL_RO_ATTR ( cp_status ) ;
2023-12-20 04:59:58 +03:00
F2FS_GENERAL_RO_ATTR ( issued_discard ) ;
F2FS_GENERAL_RO_ATTR ( queued_discard ) ;
F2FS_GENERAL_RO_ATTR ( undiscard_blks ) ;
2020-12-09 11:43:27 +03:00
static struct attribute * f2fs_stat_attrs [ ] = {
f2fs: introduce sb_status sysfs node
Introduce /sys/fs/f2fs/<devname>/stat/sb_status to show superblock
status in real time as a hexadecimal value.
value sb status macro description
0x1 SBI_IS_DIRTY, /* dirty flag for checkpoint */
0x2 SBI_IS_CLOSE, /* specify unmounting */
0x4 SBI_NEED_FSCK, /* need fsck.f2fs to fix */
0x8 SBI_POR_DOING, /* recovery is doing or not */
0x10 SBI_NEED_SB_WRITE, /* need to recover superblock */
0x20 SBI_NEED_CP, /* need to checkpoint */
0x40 SBI_IS_SHUTDOWN, /* shutdown by ioctl */
0x80 SBI_IS_RECOVERED, /* recovered orphan/data */
0x100 SBI_CP_DISABLED, /* CP was disabled last mount */
0x200 SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
0x400 SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
0x800 SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
0x1000 SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
0x2000 SBI_IS_RESIZEFS, /* resizefs is in process */
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2021-01-14 04:41:27 +03:00
ATTR_LIST ( sb_status ) ,
2022-09-27 05:44:47 +03:00
ATTR_LIST ( cp_status ) ,
2023-12-20 04:59:58 +03:00
ATTR_LIST ( issued_discard ) ,
ATTR_LIST ( queued_discard ) ,
ATTR_LIST ( undiscard_blks ) ,
2020-12-09 11:43:27 +03:00
NULL ,
} ;
ATTRIBUTE_GROUPS ( f2fs_stat ) ;
2021-06-03 22:31:08 +03:00
F2FS_SB_FEATURE_RO_ATTR ( encryption , ENCRYPT ) ;
F2FS_SB_FEATURE_RO_ATTR ( block_zoned , BLKZONED ) ;
F2FS_SB_FEATURE_RO_ATTR ( extra_attr , EXTRA_ATTR ) ;
F2FS_SB_FEATURE_RO_ATTR ( project_quota , PRJQUOTA ) ;
F2FS_SB_FEATURE_RO_ATTR ( inode_checksum , INODE_CHKSUM ) ;
F2FS_SB_FEATURE_RO_ATTR ( flexible_inline_xattr , FLEXIBLE_INLINE_XATTR ) ;
F2FS_SB_FEATURE_RO_ATTR ( quota_ino , QUOTA_INO ) ;
F2FS_SB_FEATURE_RO_ATTR ( inode_crtime , INODE_CRTIME ) ;
F2FS_SB_FEATURE_RO_ATTR ( lost_found , LOST_FOUND ) ;
F2FS_SB_FEATURE_RO_ATTR ( verity , VERITY ) ;
F2FS_SB_FEATURE_RO_ATTR ( sb_checksum , SB_CHKSUM ) ;
F2FS_SB_FEATURE_RO_ATTR ( casefold , CASEFOLD ) ;
F2FS_SB_FEATURE_RO_ATTR ( compression , COMPRESSION ) ;
F2FS_SB_FEATURE_RO_ATTR ( readonly , RO ) ;
static struct attribute * f2fs_sb_feat_attrs [ ] = {
ATTR_LIST ( sb_encryption ) ,
ATTR_LIST ( sb_block_zoned ) ,
ATTR_LIST ( sb_extra_attr ) ,
ATTR_LIST ( sb_project_quota ) ,
ATTR_LIST ( sb_inode_checksum ) ,
ATTR_LIST ( sb_flexible_inline_xattr ) ,
ATTR_LIST ( sb_quota_ino ) ,
ATTR_LIST ( sb_inode_crtime ) ,
ATTR_LIST ( sb_lost_found ) ,
ATTR_LIST ( sb_verity ) ,
ATTR_LIST ( sb_sb_checksum ) ,
ATTR_LIST ( sb_casefold ) ,
ATTR_LIST ( sb_compression ) ,
ATTR_LIST ( sb_readonly ) ,
NULL ,
} ;
ATTRIBUTE_GROUPS ( f2fs_sb_feat ) ;
2017-06-14 12:39:47 +03:00
static const struct sysfs_ops f2fs_attr_ops = {
. show = f2fs_attr_show ,
. store = f2fs_attr_store ,
} ;
2023-02-09 06:20:10 +03:00
static const struct kobj_type f2fs_sb_ktype = {
2019-06-07 20:40:41 +03:00
. default_groups = f2fs_groups ,
2017-06-14 12:39:47 +03:00
. sysfs_ops = & f2fs_attr_ops ,
. release = f2fs_sb_release ,
} ;
2023-02-09 06:20:10 +03:00
static const struct kobj_type f2fs_ktype = {
2017-07-22 03:14:09 +03:00
. sysfs_ops = & f2fs_attr_ops ,
} ;
static struct kset f2fs_kset = {
2020-07-24 11:55:28 +03:00
. kobj = { . ktype = & f2fs_ktype } ,
2017-07-22 03:14:09 +03:00
} ;
2023-02-09 06:20:10 +03:00
static const struct kobj_type f2fs_feat_ktype = {
2019-06-07 20:40:41 +03:00
. default_groups = f2fs_feat_groups ,
2017-07-22 03:14:09 +03:00
. sysfs_ops = & f2fs_attr_ops ,
} ;
static struct kobject f2fs_feat = {
. kset = & f2fs_kset ,
} ;
2020-12-09 11:43:27 +03:00
static ssize_t f2fs_stat_attr_show ( struct kobject * kobj ,
struct attribute * attr , char * buf )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_stat_kobj ) ;
struct f2fs_attr * a = container_of ( attr , struct f2fs_attr , attr ) ;
return a - > show ? a - > show ( a , sbi , buf ) : 0 ;
}
static ssize_t f2fs_stat_attr_store ( struct kobject * kobj , struct attribute * attr ,
const char * buf , size_t len )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_stat_kobj ) ;
struct f2fs_attr * a = container_of ( attr , struct f2fs_attr , attr ) ;
return a - > store ? a - > store ( a , sbi , buf , len ) : 0 ;
}
static void f2fs_stat_kobj_release ( struct kobject * kobj )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_stat_kobj ) ;
complete ( & sbi - > s_stat_kobj_unregister ) ;
}
static const struct sysfs_ops f2fs_stat_attr_ops = {
. show = f2fs_stat_attr_show ,
. store = f2fs_stat_attr_store ,
} ;
2023-02-09 06:20:10 +03:00
static const struct kobj_type f2fs_stat_ktype = {
2020-12-09 11:43:27 +03:00
. default_groups = f2fs_stat_groups ,
. sysfs_ops = & f2fs_stat_attr_ops ,
. release = f2fs_stat_kobj_release ,
} ;
2021-06-03 22:31:08 +03:00
static ssize_t f2fs_sb_feat_attr_show ( struct kobject * kobj ,
struct attribute * attr , char * buf )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_feature_list_kobj ) ;
struct f2fs_attr * a = container_of ( attr , struct f2fs_attr , attr ) ;
return a - > show ? a - > show ( a , sbi , buf ) : 0 ;
}
static void f2fs_feature_list_kobj_release ( struct kobject * kobj )
{
struct f2fs_sb_info * sbi = container_of ( kobj , struct f2fs_sb_info ,
s_feature_list_kobj ) ;
complete ( & sbi - > s_feature_list_kobj_unregister ) ;
}
static const struct sysfs_ops f2fs_feature_list_attr_ops = {
. show = f2fs_sb_feat_attr_show ,
} ;
2023-02-09 06:20:10 +03:00
static const struct kobj_type f2fs_feature_list_ktype = {
2021-06-03 22:31:08 +03:00
. default_groups = f2fs_sb_feat_groups ,
. sysfs_ops = & f2fs_feature_list_attr_ops ,
. release = f2fs_feature_list_kobj_release ,
} ;
2018-07-07 06:50:57 +03:00
static int __maybe_unused segment_info_seq_show ( struct seq_file * seq ,
void * offset )
2017-06-14 12:39:47 +03:00
{
struct super_block * sb = seq - > private ;
struct f2fs_sb_info * sbi = F2FS_SB ( sb ) ;
unsigned int total_segs =
le32_to_cpu ( sbi - > raw_super - > segment_count_main ) ;
int i ;
seq_puts ( seq , " format: segment_type|valid_blocks \n "
" segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN) \n " ) ;
for ( i = 0 ; i < total_segs ; i + + ) {
struct seg_entry * se = get_seg_entry ( sbi , i ) ;
if ( ( i % 10 ) = = 0 )
seq_printf ( seq , " %-10d " , i ) ;
2019-06-18 13:00:09 +03:00
seq_printf ( seq , " %d|%-3u " , se - > type , se - > valid_blocks ) ;
2017-06-14 12:39:47 +03:00
if ( ( i % 10 ) = = 9 | | i = = ( total_segs - 1 ) )
seq_putc ( seq , ' \n ' ) ;
else
seq_putc ( seq , ' ' ) ;
}
return 0 ;
}
2018-07-07 06:50:57 +03:00
static int __maybe_unused segment_bits_seq_show ( struct seq_file * seq ,
void * offset )
2017-06-14 12:39:47 +03:00
{
struct super_block * sb = seq - > private ;
struct f2fs_sb_info * sbi = F2FS_SB ( sb ) ;
unsigned int total_segs =
le32_to_cpu ( sbi - > raw_super - > segment_count_main ) ;
int i , j ;
seq_puts ( seq , " format: segment_type|valid_blocks|bitmaps \n "
" segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN) \n " ) ;
for ( i = 0 ; i < total_segs ; i + + ) {
struct seg_entry * se = get_seg_entry ( sbi , i ) ;
seq_printf ( seq , " %-10d " , i ) ;
2019-06-18 13:00:09 +03:00
seq_printf ( seq , " %d|%-3u| " , se - > type , se - > valid_blocks ) ;
2017-06-14 12:39:47 +03:00
for ( j = 0 ; j < SIT_VBLOCK_MAP_SIZE ; j + + )
seq_printf ( seq , " %.2x " , se - > cur_valid_map [ j ] ) ;
seq_putc ( seq , ' \n ' ) ;
}
return 0 ;
}
2018-07-23 17:10:22 +03:00
static int __maybe_unused victim_bits_seq_show ( struct seq_file * seq ,
void * offset )
{
struct super_block * sb = seq - > private ;
struct f2fs_sb_info * sbi = F2FS_SB ( sb ) ;
struct dirty_seglist_info * dirty_i = DIRTY_I ( sbi ) ;
int i ;
seq_puts ( seq , " format: victim_secmap bitmaps \n " ) ;
for ( i = 0 ; i < MAIN_SECS ( sbi ) ; i + + ) {
if ( ( i % 10 ) = = 0 )
seq_printf ( seq , " %-10d " , i ) ;
seq_printf ( seq , " %d " , test_bit ( i , dirty_i - > victim_secmap ) ? 1 : 0 ) ;
if ( ( i % 10 ) = = 9 | | i = = ( MAIN_SECS ( sbi ) - 1 ) )
seq_putc ( seq , ' \n ' ) ;
else
seq_putc ( seq , ' ' ) ;
}
return 0 ;
}
2022-10-25 16:26:38 +03:00
static int __maybe_unused discard_plist_seq_show ( struct seq_file * seq ,
void * offset )
{
struct super_block * sb = seq - > private ;
struct f2fs_sb_info * sbi = F2FS_SB ( sb ) ;
struct discard_cmd_control * dcc = SM_I ( sbi ) - > dcc_info ;
int i , count ;
seq_puts ( seq , " Discard pend list(Show diacrd_cmd count on each entry, .:not exist): \n " ) ;
if ( ! f2fs_realtime_discard_enable ( sbi ) )
return 0 ;
if ( dcc ) {
mutex_lock ( & dcc - > cmd_lock ) ;
for ( i = 0 ; i < MAX_PLIST_NUM ; i + + ) {
struct list_head * pend_list ;
struct discard_cmd * dc , * tmp ;
if ( i % 8 = = 0 )
seq_printf ( seq , " %-3d " , i ) ;
count = 0 ;
pend_list = & dcc - > pend_list [ i ] ;
list_for_each_entry_safe ( dc , tmp , pend_list , list )
count + + ;
if ( count )
seq_printf ( seq , " %7d " , count ) ;
else
seq_puts ( seq , " . " ) ;
if ( i % 8 = = 7 )
seq_putc ( seq , ' \n ' ) ;
}
seq_putc ( seq , ' \n ' ) ;
mutex_unlock ( & dcc - > cmd_lock ) ;
}
return 0 ;
}
2024-03-05 02:16:56 +03:00
static int __maybe_unused disk_map_seq_show ( struct seq_file * seq ,
void * offset )
{
struct super_block * sb = seq - > private ;
struct f2fs_sb_info * sbi = F2FS_SB ( sb ) ;
int i ;
seq_printf ( seq , " Address Layout : %5luB Block address (# of Segments) \n " ,
F2FS_BLKSIZE ) ;
seq_printf ( seq , " SB : %12s \n " , " 0/1024B " ) ;
seq_printf ( seq , " seg0_blkaddr : 0x%010x \n " , SEG0_BLKADDR ( sbi ) ) ;
seq_printf ( seq , " Checkpoint : 0x%010x (%10d) \n " ,
le32_to_cpu ( F2FS_RAW_SUPER ( sbi ) - > cp_blkaddr ) , 2 ) ;
seq_printf ( seq , " SIT : 0x%010x (%10d) \n " ,
SIT_I ( sbi ) - > sit_base_addr ,
le32_to_cpu ( F2FS_RAW_SUPER ( sbi ) - > segment_count_sit ) ) ;
seq_printf ( seq , " NAT : 0x%010x (%10d) \n " ,
NM_I ( sbi ) - > nat_blkaddr ,
le32_to_cpu ( F2FS_RAW_SUPER ( sbi ) - > segment_count_nat ) ) ;
seq_printf ( seq , " SSA : 0x%010x (%10d) \n " ,
SM_I ( sbi ) - > ssa_blkaddr ,
le32_to_cpu ( F2FS_RAW_SUPER ( sbi ) - > segment_count_ssa ) ) ;
seq_printf ( seq , " Main : 0x%010x (%10d) \n " ,
SM_I ( sbi ) - > main_blkaddr ,
le32_to_cpu ( F2FS_RAW_SUPER ( sbi ) - > segment_count_main ) ) ;
seq_printf ( seq , " # of Sections : %12d \n " ,
le32_to_cpu ( F2FS_RAW_SUPER ( sbi ) - > section_count ) ) ;
seq_printf ( seq , " Segs/Sections : %12d \n " ,
SEGS_PER_SEC ( sbi ) ) ;
seq_printf ( seq , " Section size : %12d MB \n " ,
SEGS_PER_SEC ( sbi ) < < 1 ) ;
if ( ! f2fs_is_multi_device ( sbi ) )
return 0 ;
seq_puts ( seq , " \n Disk Map for multi devices: \n " ) ;
for ( i = 0 ; i < sbi - > s_ndevs ; i + + )
seq_printf ( seq , " Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s \n " ,
i , bdev_is_zoned ( FDEV ( i ) . bdev ) ,
FDEV ( i ) . start_blk , FDEV ( i ) . end_blk ,
FDEV ( i ) . path ) ;
return 0 ;
}
2017-07-26 21:24:13 +03:00
int __init f2fs_init_sysfs ( void )
2017-06-14 12:39:47 +03:00
{
2017-07-22 03:14:09 +03:00
int ret ;
2017-06-14 12:39:47 +03:00
2017-07-22 03:14:09 +03:00
kobject_set_name ( & f2fs_kset . kobj , " f2fs " ) ;
f2fs_kset . kobj . parent = fs_kobj ;
ret = kset_register ( & f2fs_kset ) ;
if ( ret )
return ret ;
ret = kobject_init_and_add ( & f2fs_feat , & f2fs_feat_ktype ,
NULL , " features " ) ;
2023-04-25 19:06:11 +03:00
if ( ret )
goto put_kobject ;
f2fs_proc_root = proc_mkdir ( " fs/f2fs " , NULL ) ;
if ( ! f2fs_proc_root ) {
ret = - ENOMEM ;
goto put_kobject ;
2019-12-30 12:41:41 +03:00
}
2023-04-25 19:06:11 +03:00
return 0 ;
put_kobject :
kobject_put ( & f2fs_feat ) ;
kset_unregister ( & f2fs_kset ) ;
2017-07-22 03:14:09 +03:00
return ret ;
2017-06-14 12:39:47 +03:00
}
2017-07-26 21:24:13 +03:00
void f2fs_exit_sysfs ( void )
2017-06-14 12:39:47 +03:00
{
2017-07-22 03:14:09 +03:00
kobject_put ( & f2fs_feat ) ;
kset_unregister ( & f2fs_kset ) ;
2017-06-14 12:39:47 +03:00
remove_proc_entry ( " fs/f2fs " , NULL ) ;
2017-07-22 03:14:09 +03:00
f2fs_proc_root = NULL ;
2017-06-14 12:39:47 +03:00
}
2017-07-26 21:24:13 +03:00
int f2fs_register_sysfs ( struct f2fs_sb_info * sbi )
2017-06-14 12:39:47 +03:00
{
struct super_block * sb = sbi - > sb ;
int err ;
2017-07-22 03:14:09 +03:00
sbi - > s_kobj . kset = & f2fs_kset ;
init_completion ( & sbi - > s_kobj_unregister ) ;
err = kobject_init_and_add ( & sbi - > s_kobj , & f2fs_sb_ktype , NULL ,
" %s " , sb - > s_id ) ;
2020-12-09 11:43:27 +03:00
if ( err )
goto put_sb_kobj ;
sbi - > s_stat_kobj . kset = & f2fs_kset ;
init_completion ( & sbi - > s_stat_kobj_unregister ) ;
err = kobject_init_and_add ( & sbi - > s_stat_kobj , & f2fs_stat_ktype ,
& sbi - > s_kobj , " stat " ) ;
if ( err )
goto put_stat_kobj ;
2017-07-22 03:14:09 +03:00
2021-06-03 22:31:08 +03:00
sbi - > s_feature_list_kobj . kset = & f2fs_kset ;
init_completion ( & sbi - > s_feature_list_kobj_unregister ) ;
err = kobject_init_and_add ( & sbi - > s_feature_list_kobj ,
& f2fs_feature_list_ktype ,
& sbi - > s_kobj , " feature_list " ) ;
if ( err )
goto put_feature_list_kobj ;
2023-04-25 19:06:11 +03:00
sbi - > s_proc = proc_mkdir ( sb - > s_id , f2fs_proc_root ) ;
if ( ! sbi - > s_proc ) {
err = - ENOMEM ;
goto put_feature_list_kobj ;
}
2017-06-14 12:39:47 +03:00
2023-04-25 19:06:11 +03:00
proc_create_single_data ( " segment_info " , 0444 , sbi - > s_proc ,
2018-05-15 16:57:23 +03:00
segment_info_seq_show , sb ) ;
2023-04-25 19:06:11 +03:00
proc_create_single_data ( " segment_bits " , 0444 , sbi - > s_proc ,
2018-05-15 16:57:23 +03:00
segment_bits_seq_show , sb ) ;
2021-08-20 06:52:28 +03:00
# ifdef CONFIG_F2FS_IOSTAT
2023-04-25 19:06:11 +03:00
proc_create_single_data ( " iostat_info " , 0444 , sbi - > s_proc ,
2018-05-15 16:57:23 +03:00
iostat_info_seq_show , sb ) ;
2021-08-20 06:52:28 +03:00
# endif
2023-04-25 19:06:11 +03:00
proc_create_single_data ( " victim_bits " , 0444 , sbi - > s_proc ,
2018-07-23 17:10:22 +03:00
victim_bits_seq_show , sb ) ;
2023-04-25 19:06:11 +03:00
proc_create_single_data ( " discard_plist_info " , 0444 , sbi - > s_proc ,
2022-10-25 16:26:38 +03:00
discard_plist_seq_show , sb ) ;
2024-03-05 02:16:56 +03:00
proc_create_single_data ( " disk_map " , 0444 , sbi - > s_proc ,
disk_map_seq_show , sb ) ;
2017-06-14 12:39:47 +03:00
return 0 ;
2021-06-03 22:31:08 +03:00
put_feature_list_kobj :
kobject_put ( & sbi - > s_feature_list_kobj ) ;
wait_for_completion ( & sbi - > s_feature_list_kobj_unregister ) ;
2020-12-09 11:43:27 +03:00
put_stat_kobj :
kobject_put ( & sbi - > s_stat_kobj ) ;
wait_for_completion ( & sbi - > s_stat_kobj_unregister ) ;
put_sb_kobj :
kobject_put ( & sbi - > s_kobj ) ;
wait_for_completion ( & sbi - > s_kobj_unregister ) ;
return err ;
2017-06-14 12:39:47 +03:00
}
2017-07-26 21:24:13 +03:00
void f2fs_unregister_sysfs ( struct f2fs_sb_info * sbi )
2017-06-14 12:39:47 +03:00
{
2023-04-25 19:06:11 +03:00
remove_proc_subtree ( sbi - > sb - > s_id , f2fs_proc_root ) ;
2020-12-09 11:43:27 +03:00
kobject_put ( & sbi - > s_stat_kobj ) ;
wait_for_completion ( & sbi - > s_stat_kobj_unregister ) ;
2021-06-03 22:31:08 +03:00
kobject_put ( & sbi - > s_feature_list_kobj ) ;
wait_for_completion ( & sbi - > s_feature_list_kobj_unregister ) ;
2020-12-09 11:43:27 +03:00
2019-12-14 05:32:16 +03:00
kobject_put ( & sbi - > s_kobj ) ;
2020-10-12 16:09:48 +03:00
wait_for_completion ( & sbi - > s_kobj_unregister ) ;
2017-06-14 12:39:47 +03:00
}