From 2a4bd0c37c87f9f82b6265ec7e716d2a2d4b0c71 Mon Sep 17 00:00:00 2001 From: jiahao Date: Fri, 19 Feb 2021 20:46:32 +0800 Subject: [PATCH 01/48] f2fs: fix a spacing coding style Add a space before the plus. Signed-off-by: jiahao Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 174a0819ad96..d0e5ef7e232e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1456,7 +1456,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); if (__remain_node_summaries(cpc->reason)) - ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); else From c67c8c0f47eb0bf49d6cf165389554e379443968 Mon Sep 17 00:00:00 2001 From: xuyehan Date: Tue, 23 Feb 2021 09:31:43 +0800 Subject: [PATCH 02/48] f2fs: fix a spelling error Delete the letter 'e' before 'number' Signed-off-by: xuyehan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index cbeac1bebe2f..9fa5a528cc23 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -276,7 +276,7 @@ Date April 2019 Contact: "Daniel Rosenberg" Description: If checkpoint=disable, it displays the number of blocks that are unusable. - If checkpoint=enable it displays the enumber of blocks that + If checkpoint=enable it displays the number of blocks that would be unusable if checkpoint=disable were to be set. What: /sys/fs/f2fs//encoding From 7dede88659df38f96128ab3922c50dde2d29c574 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Feb 2021 17:35:40 +0800 Subject: [PATCH 03/48] f2fs: fix to allow migrating fully valid segment F2FS_IOC_FLUSH_DEVICE/F2FS_IOC_RESIZE_FS needs to migrate all blocks of target segment to other place, no matter the segment has partially or fully valid blocks. However, after commit 803e74be04b3 ("f2fs: stop GC when the victim becomes fully valid"), we may skip migration due to target segment is fully valid, result in failing the ioctl interface, fix this. Fixes: 803e74be04b3 ("f2fs: stop GC when the victim becomes fully valid") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 9 +++++---- fs/f2fs/gc.c | 21 ++++++++++++--------- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 2 +- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e2d302ae3a46..cccdfb1a40ab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3547,7 +3547,7 @@ void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); int f2fs_start_gc_thread(struct f2fs_sb_info *sbi); void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d26ff2ae3f5e..1863944f4073 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1658,7 +1658,7 @@ next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { down_write(&sbi->gc_lock); - err = f2fs_gc(sbi, true, false, NULL_SEGNO); + err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err && err != -ENODATA && err != -EAGAIN) goto out_err; } @@ -2489,7 +2489,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) down_write(&sbi->gc_lock); } - ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); + ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO); out: mnt_drop_write_file(filp); return ret; @@ -2525,7 +2525,8 @@ do_more: down_write(&sbi->gc_lock); } - ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start)); + ret = f2fs_gc(sbi, range->sync, true, false, + GET_SEGNO(sbi, range->start)); if (ret) { if (ret == -EBUSY) ret = -EAGAIN; @@ -2978,7 +2979,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) sm->last_victim[GC_CB] = end_segno + 1; sm->last_victim[GC_GREEDY] = end_segno + 1; sm->last_victim[ALLOC_NEXT] = end_segno + 1; - ret = f2fs_gc(sbi, true, true, start_segno); + ret = f2fs_gc(sbi, true, true, true, start_segno); if (ret == -EAGAIN) ret = 0; else if (ret < 0) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 39330ad3c44e..b3af76340026 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -112,7 +112,7 @@ do_gc: sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, @@ -1354,7 +1354,8 @@ out: * the victim data block is ignored. */ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct gc_inode_list *gc_list, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type, + bool force_migrate) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -1383,8 +1384,8 @@ next_step: * race condition along with SSR block allocation. */ if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || - get_valid_blocks(sbi, segno, true) == - BLKS_PER_SEC(sbi)) + (!force_migrate && get_valid_blocks(sbi, segno, true) == + BLKS_PER_SEC(sbi))) return submitted; if (check_valid_map(sbi, segno, off) == 0) @@ -1519,7 +1520,8 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int start_segno, - struct gc_inode_list *gc_list, int gc_type) + struct gc_inode_list *gc_list, int gc_type, + bool force_migrate) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -1606,7 +1608,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, gc_type); else submitted += gc_data_segment(sbi, sum->entries, gc_list, - segno, gc_type); + segno, gc_type, + force_migrate); stat_inc_seg_count(sbi, type, gc_type); migrated++; @@ -1634,7 +1637,7 @@ skip: } int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, - bool background, unsigned int segno) + bool background, bool force, unsigned int segno) { int gc_type = sync ? FG_GC : BG_GC; int sec_freed = 0, seg_freed = 0, total_freed = 0; @@ -1696,7 +1699,7 @@ gc_more: if (ret) goto stop; - seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); + seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force); if (gc_type == FG_GC && seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) sec_freed++; @@ -1835,7 +1838,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - do_garbage_collect(sbi, segno, &gc_list, FG_GC); + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); put_gc_inode(&gc_list); if (!gc_only && get_valid_blocks(sbi, segno, true)) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 993004f06a77..b8c20d29431d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -504,7 +504,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) */ if (has_not_enough_free_secs(sbi, 0, 0)) { down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, NULL_SEGNO); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7069793752f1..a17a4dd8d449 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1865,7 +1865,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) while (!f2fs_time_over(sbi, DISABLE_TIME)) { down_write(&sbi->gc_lock); - err = f2fs_gc(sbi, true, false, NULL_SEGNO); + err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; break; From 3ab0598e6d860ef49d029943ba80f627c15c15d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Feb 2021 17:35:41 +0800 Subject: [PATCH 04/48] f2fs: fix panic during f2fs_resize_fs() f2fs_resize_fs() hangs in below callstack with testcase: - mkfs 16GB image & mount image - dd 8GB fileA - dd 8GB fileB - sync - rm fileA - sync - resize filesystem to 8GB kernel BUG at segment.c:2484! Call Trace: allocate_segment_by_default+0x92/0xf0 [f2fs] f2fs_allocate_data_block+0x44b/0x7e0 [f2fs] do_write_page+0x5a/0x110 [f2fs] f2fs_outplace_write_data+0x55/0x100 [f2fs] f2fs_do_write_data_page+0x392/0x850 [f2fs] move_data_page+0x233/0x320 [f2fs] do_garbage_collect+0x14d9/0x1660 [f2fs] free_segment_range+0x1f7/0x310 [f2fs] f2fs_resize_fs+0x118/0x330 [f2fs] __f2fs_ioctl+0x487/0x3680 [f2fs] __x64_sys_ioctl+0x8e/0xd0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The root cause is we forgot to check that whether we have enough space in resized filesystem to store all valid blocks in before-resizing filesystem, then allocator will run out-of-space during block migration in free_segment_range(). Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b3af76340026..86ba8ed0b8a7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1977,7 +1977,20 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) /* stop CP to protect MAIN_SEC in free_segment_range */ f2fs_lock_op(sbi); + + spin_lock(&sbi->stat_lock); + if (shrunk_blocks + valid_user_blocks(sbi) + + sbi->current_reserved_blocks + sbi->unusable_block_count + + F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) + err = -ENOSPC; + spin_unlock(&sbi->stat_lock); + + if (err) + goto out_unlock; + err = free_segment_range(sbi, secs, true); + +out_unlock: f2fs_unlock_op(sbi); up_write(&sbi->gc_lock); if (err) From cd6ee739b8ee49cf5f3d7c9a0f663f9f0c5afe1b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Feb 2021 17:38:41 +0800 Subject: [PATCH 05/48] f2fs: avoid unused f2fs_show_compress_options() LKP reports: fs/f2fs/super.c:1516:20: warning: unused function 'f2fs_show_compress_options' [-Wunused-function] static inline void f2fs_show_compress_options(struct seq_file *seq, Fix this issue by covering f2fs_show_compress_options() with CONFIG_F2FS_FS_COMPRESSION macro. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reported-by: kernel test robot Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a17a4dd8d449..e03b2f0f69d0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1616,6 +1616,7 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #endif } +#ifdef CONFIG_F2FS_FS_COMPRESSION static inline void f2fs_show_compress_options(struct seq_file *seq, struct super_block *sb) { @@ -1661,6 +1662,7 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER) seq_printf(seq, ",compress_mode=%s", "user"); } +#endif static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { From 4831675c6be59dbe8e0b2a53dc237111f9307a4b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Feb 2021 17:38:42 +0800 Subject: [PATCH 06/48] f2fs: remove unused FORCE_FG_GC macro FORCE_FG_GC was introduced by commit 6aefd93b0137 ("f2fs: introduce background_gc=sync mount option"), but never be used, remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 229814b4f4a6..144980b62f9e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -172,12 +172,10 @@ enum { /* * BG_GC means the background cleaning job. * FG_GC means the on-demand cleaning job. - * FORCE_FG_GC means on-demand cleaning job in background. */ enum { BG_GC = 0, FG_GC, - FORCE_FG_GC, }; /* for a function parameter to select a victim segment */ From 3b42c741b1bf52ee9ed6fba5f9636d80ddacf73f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Feb 2021 17:38:43 +0800 Subject: [PATCH 07/48] f2fs: update comments for explicit memory barrier Add more detailed comments for explicit memory barrier used by f2fs, in order to enhance code readability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++++- fs/f2fs/segment.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d0e5ef7e232e..f6169611270f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1818,7 +1818,11 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) llist_add(&req.llnode, &cprc->issue_list); atomic_inc(&cprc->queued_ckpt); - /* update issue_list before we wake up issue_checkpoint thread */ + /* + * update issue_list before we wake up issue_checkpoint thread, + * this smp_mb() pairs with another barrier in ___wait_event(), + * see more details in comments of waitqueue_active(). + */ smp_mb(); if (waitqueue_active(&cprc->ckpt_wait_queue)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b8c20d29431d..29403540ff6e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -653,7 +653,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) llist_add(&cmd.llnode, &fcc->issue_list); - /* update issue_list before we wake up issue_flush thread */ + /* + * update issue_list before we wake up issue_flush thread, this + * smp_mb() pairs with another barrier in ___wait_event(), see + * more details in comments of waitqueue_active(). + */ smp_mb(); if (waitqueue_active(&fcc->flush_wait_queue)) From 43f8c47ea7d59c7b2270835f1d7c4618a1ea27b6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Feb 2021 18:07:33 +0800 Subject: [PATCH 08/48] f2fs: check discard command number before traversing discard pending list In trim thread, let's add a condition to check discard command number before traversing discard pending list, it can avoid unneeded traversing if there is no discard command. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29403540ff6e..b5a40a39a03f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1759,6 +1759,8 @@ static int issue_discard_thread(void *data) wait_ms = dpolicy.max_interval; continue; } + if (!atomic_read(&dcc->discard_cmd_cnt)) + continue; if (sbi->gc_mode == GC_URGENT_HIGH) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); From 72f85881249e3a7403434631b9a9f934cdd1a83d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Feb 2021 16:51:42 +0100 Subject: [PATCH 09/48] f2fs: compress: Allow modular (de)compression algorithms If F2FS_FS is modular, enabling the compressions options F2FS_FS_{LZ4,LZ4HZ,LZO,LZORLE,ZSTD} will make the (de)compression algorithms {LZ4,LZ4HC,LZO,ZSTD}_{,DE}COMPRESS builtin instead of modular, as the former depend on an intermediate boolean F2FS_FS_COMPRESSION, which in-turn depends on tristate F2FS_FS. Indeed, if a boolean symbol A depends directly on a tristate symbol B and selects another tristate symbol C: tristate B tristate C bool A depends on B select C and B is modular, then C will also be modular. However, if there is an intermediate boolean D in the dependency chain between A and B: tristate B tristate C bool D depends on B bool A depends on D select C then the modular state won't propagate from B to C, and C will be builtin instead of modular. As modular dependency propagation through intermediate symbols is obscure, fix this in a robust way by moving the selection of tristate (de)compression algorithms from the boolean compression options to the tristate main F2FS_FS option. Signed-off-by: Geert Uytterhoeven Reviewed-by: Chao Yu Reviewed-by: Masahiro Yamada Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 62e638a49bbf..7669de7b49ce 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -7,6 +7,13 @@ config F2FS_FS select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION + select LZ4_COMPRESS if F2FS_FS_LZ4 + select LZ4_DECOMPRESS if F2FS_FS_LZ4 + select LZ4HC_COMPRESS if F2FS_FS_LZ4HC + select LZO_COMPRESS if F2FS_FS_LZO + select LZO_DECOMPRESS if F2FS_FS_LZO + select ZSTD_COMPRESS if F2FS_FS_ZSTD + select ZSTD_DECOMPRESS if F2FS_FS_ZSTD help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on @@ -94,8 +101,6 @@ config F2FS_FS_COMPRESSION config F2FS_FS_LZO bool "LZO compression support" depends on F2FS_FS_COMPRESSION - select LZO_COMPRESS - select LZO_DECOMPRESS default y help Support LZO compress algorithm, if unsure, say Y. @@ -103,8 +108,6 @@ config F2FS_FS_LZO config F2FS_FS_LZ4 bool "LZ4 compression support" depends on F2FS_FS_COMPRESSION - select LZ4_COMPRESS - select LZ4_DECOMPRESS default y help Support LZ4 compress algorithm, if unsure, say Y. @@ -113,7 +116,6 @@ config F2FS_FS_LZ4HC bool "LZ4HC compression support" depends on F2FS_FS_COMPRESSION depends on F2FS_FS_LZ4 - select LZ4HC_COMPRESS default y help Support LZ4HC compress algorithm, LZ4HC has compatible on-disk @@ -122,8 +124,6 @@ config F2FS_FS_LZ4HC config F2FS_FS_ZSTD bool "ZSTD compression support" depends on F2FS_FS_COMPRESSION - select ZSTD_COMPRESS - select ZSTD_DECOMPRESS default y help Support ZSTD compress algorithm, if unsure, say Y. @@ -132,8 +132,6 @@ config F2FS_FS_LZORLE bool "LZO-RLE compression support" depends on F2FS_FS_COMPRESSION depends on F2FS_FS_LZO - select LZO_COMPRESS - select LZO_DECOMPRESS default y help Support LZO-RLE compress algorithm, if unsure, say Y. From 4260c4067fbba55a90037fe3ee32eff087749f83 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 24 Feb 2021 13:03:13 -0600 Subject: [PATCH 10/48] f2fs: Replace one-element array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Refactor the code according to the use of a flexible-array member in struct f2fs_checkpoint, instead of a one-element arrays. Notice that a temporary pointer to void '*tmp_ptr' was used in order to fix the following errors when using a flexible array instead of a one element array in struct f2fs_checkpoint: CC [M] fs/f2fs/dir.o In file included from fs/f2fs/dir.c:13: fs/f2fs/f2fs.h: In function ‘__bitmap_ptr’: fs/f2fs/f2fs.h:2227:40: error: invalid use of flexible array member 2227 | return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); | ^ fs/f2fs/f2fs.h:2227:49: error: invalid use of flexible array member 2227 | return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); | ^ fs/f2fs/f2fs.h:2238:40: error: invalid use of flexible array member 2238 | return &ckpt->sit_nat_version_bitmap + offset; | ^ make[2]: *** [scripts/Makefile.build:287: fs/f2fs/dir.o] Error 1 make[1]: *** [scripts/Makefile.build:530: fs/f2fs] Error 2 make: *** [Makefile:1819: fs] Error 2 [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Build-tested-by: kernel test robot Link: https://lore.kernel.org/lkml/603647e4.DeEFbl4eqljuwAUe%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++-- include/linux/f2fs_fs.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cccdfb1a40ab..99e243fd26d5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2215,6 +2215,7 @@ static inline block_t __cp_payload(struct f2fs_sb_info *sbi) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + void *tmp_ptr = &ckpt->sit_nat_version_bitmap; int offset; if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { @@ -2224,7 +2225,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) * if large_nat_bitmap feature is enabled, leave checksum * protection for all nat/sit bitmaps. */ - return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); + return tmp_ptr + offset + sizeof(__le32); } if (__cp_payload(sbi) > 0) { @@ -2235,7 +2236,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) } else { offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return tmp_ptr + offset; } } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c6cc0a566ef5..5487a80617a3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -168,7 +168,7 @@ struct f2fs_checkpoint { unsigned char alloc_type[MAX_ACTIVE_LOGS]; /* SIT and NAT version bitmap */ - unsigned char sit_nat_version_bitmap[1]; + unsigned char sit_nat_version_bitmap[]; } __packed; #define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ From ebc29b62a166e9116cd8159e9798044d02130279 Mon Sep 17 00:00:00 2001 From: "huangjianan@oppo.com" Date: Sat, 27 Feb 2021 20:02:29 +0800 Subject: [PATCH 11/48] f2fs: remove unnecessary IS_SWAPFILE check Now swapfile in f2fs directly submit IO to blockdev according to swapfile extents reported by f2fs when swapon, therefore there is no need to check IS_SWAPFILE when exec filesystem operation. Signed-off-by: Huang Jianan Signed-off-by: Guo Weichao Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7c95818639a6..1531463768bf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1722,7 +1722,7 @@ static int get_data_block_dio_write(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type(inode->i_write_hint), - IS_SWAPFILE(inode) ? false : true); + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 99e243fd26d5..cd004424debc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4177,8 +4177,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (F2FS_IO_ALIGNED(sbi)) return true; } - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && - !IS_SWAPFILE(inode)) + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) return true; return false; From 1da66103837077df70ddf7a49c46dfd025001a60 Mon Sep 17 00:00:00 2001 From: "huangjianan@oppo.com" Date: Sat, 27 Feb 2021 20:02:30 +0800 Subject: [PATCH 12/48] f2fs: fix last_lblock check in check_swap_activate_fast Because page_no < sis->max guarantees that the while loop break out normally, the wrong check contidion here doesn't cause a problem. Signed-off-by: Huang Jianan Signed-off-by: Guo Weichao Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1531463768bf..398498fb99d5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3803,7 +3803,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, last_lblock = bytes_to_blks(inode, i_size_read(inode)); len = i_size_read(inode); - while (cur_lblock <= last_lblock && cur_lblock < sis->max) { + while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; pgoff_t next_pgofs; From 36e4d95891ed37eb98a660babec749be3fb35fd9 Mon Sep 17 00:00:00 2001 From: "huangjianan@oppo.com" Date: Mon, 1 Mar 2021 12:58:44 +0800 Subject: [PATCH 13/48] f2fs: check if swapfile is section-alligned If the swapfile isn't created by pin and fallocate, it can't be guaranteed section-aligned, so it may be selected by f2fs gc. When gc_pin_file_threshold is reached, the address of swapfile may change, but won't be synchronized to swap_extent, so swap will write to wrong address, which will cause data corruption. Signed-off-by: Huang Jianan Signed-off-by: Guo Weichao Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 109 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 398498fb99d5..0e749cf60e11 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3780,11 +3780,64 @@ int f2fs_migrate_page(struct address_space *mapping, #endif #ifdef CONFIG_SWAP +static int f2fs_is_file_aligned(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + block_t main_blkaddr = SM_I(sbi)->main_blkaddr; + block_t cur_lblock; + block_t last_lblock; + block_t pblock; + unsigned long nr_pblocks; + unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + int ret = 0; + + cur_lblock = 0; + last_lblock = bytes_to_blks(inode, i_size_read(inode)); + + while (cur_lblock < last_lblock) { + struct f2fs_map_blocks map; + + memset(&map, 0, sizeof(map)); + map.m_lblk = cur_lblock; + map.m_len = last_lblock - cur_lblock; + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; + + ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); + if (ret) + goto out; + + /* hole */ + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + f2fs_err(sbi, "Swapfile has holes\n"); + ret = -ENOENT; + goto out; + } + + pblock = map.m_pblk; + nr_pblocks = map.m_len; + + if ((pblock - main_blkaddr) & (blocks_per_sec - 1) || + nr_pblocks & (blocks_per_sec - 1)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + + cur_lblock += nr_pblocks; + } +out: + return ret; +} + static int check_swap_activate_fast(struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); sector_t cur_lblock; sector_t last_lblock; sector_t pblock; @@ -3792,8 +3845,8 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sector_t highest_pblock = 0; int nr_extents = 0; unsigned long nr_pblocks; - u64 len; - int ret; + unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + int ret = 0; /* * Map all the blocks into the extent list. This code doesn't try @@ -3801,31 +3854,41 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, */ cur_lblock = 0; last_lblock = bytes_to_blks(inode, i_size_read(inode)); - len = i_size_read(inode); while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; - pgoff_t next_pgofs; cond_resched(); memset(&map, 0, sizeof(map)); map.m_lblk = cur_lblock; - map.m_len = bytes_to_blks(inode, len) - cur_lblock; - map.m_next_pgofs = &next_pgofs; + map.m_len = last_lblock - cur_lblock; + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); if (ret) - goto err_out; + goto out; /* hole */ - if (!(map.m_flags & F2FS_MAP_FLAGS)) - goto err_out; + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + f2fs_err(sbi, "Swapfile has holes\n"); + ret = -ENOENT; + goto out; + } pblock = map.m_pblk; nr_pblocks = map.m_len; + if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) || + nr_pblocks & (blocks_per_sec - 1)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -3854,9 +3917,6 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sis->highest_bit = cur_lblock - 1; out: return ret; -err_out: - pr_err("swapon: swapfile has holes\n"); - return -EINVAL; } /* Copied from generic_swapfile_activate() to check any holes */ @@ -3865,6 +3925,7 @@ static int check_swap_activate(struct swap_info_struct *sis, { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned blocks_per_page; unsigned long page_no; sector_t probe_block; @@ -3872,11 +3933,15 @@ static int check_swap_activate(struct swap_info_struct *sis, sector_t lowest_block = -1; sector_t highest_block = 0; int nr_extents = 0; - int ret; + int ret = 0; if (PAGE_SIZE == F2FS_BLKSIZE) return check_swap_activate_fast(sis, swap_file, span); + ret = f2fs_is_file_aligned(inode); + if (ret) + goto out; + blocks_per_page = bytes_to_blks(inode, PAGE_SIZE); /* @@ -3891,13 +3956,14 @@ static int check_swap_activate(struct swap_info_struct *sis, unsigned block_in_page; sector_t first_block; sector_t block = 0; - int err = 0; cond_resched(); block = probe_block; - err = bmap(inode, &block); - if (err || !block) + ret = bmap(inode, &block); + if (ret) + goto out; + if (!block) goto bad_bmap; first_block = block; @@ -3913,9 +3979,10 @@ static int check_swap_activate(struct swap_info_struct *sis, block_in_page++) { block = probe_block + block_in_page; - err = bmap(inode, &block); - - if (err || !block) + ret = bmap(inode, &block); + if (ret) + goto out; + if (!block) goto bad_bmap; if (block != first_block + block_in_page) { @@ -3955,8 +4022,8 @@ reprobe: out: return ret; bad_bmap: - pr_err("swapon: swapfile has holes\n"); - return -EINVAL; + f2fs_err(sbi, "Swapfile has holes\n"); + return -ENOENT; } static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, From 1153db095fd6c5cc59425171ddef4a4c83464643 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 2 Mar 2021 16:35:32 +0800 Subject: [PATCH 14/48] f2fs: remove unused file_clear_encrypt() - file_clear_encrypt() was never be used, remove it. - In addition, relocating macros for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cd004424debc..fe9e0d4d3920 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -637,21 +637,26 @@ enum { #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) -#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) -#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) + +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + #define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) #define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) -#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) + #define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) + #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) + #define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) #define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) #define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) + #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) From 28e18ee636ba28532dbe425540af06245a0bbecb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Mar 2021 09:21:18 +0000 Subject: [PATCH 15/48] f2fs: fix a redundant call to f2fs_balance_fs if an error occurs The uninitialized variable dn.node_changed does not get set when a call to f2fs_get_node_page fails. This uninitialized value gets used in the call to f2fs_balance_fs() that may or not may not balances dirty node and dentry pages depending on the uninitialized state of the variable. Fix this by only calling f2fs_balance_fs if err is not set. Thanks to Jaegeuk Kim for suggesting an appropriate fix. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: 2a3407607028 ("f2fs: call f2fs_balance_fs only when node was changed") Signed-off-by: Colin Ian King Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 993caefcd2bb..92652ca7a7c8 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -219,7 +219,8 @@ out: f2fs_put_page(page, 1); - f2fs_balance_fs(sbi, dn.node_changed); + if (!err) + f2fs_balance_fs(sbi, dn.node_changed); return err; } From 3c0315424f5e3d2a4113c7272367bee1e8e6a174 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Mar 2021 21:43:10 -0800 Subject: [PATCH 16/48] f2fs: fix error handling in f2fs_end_enable_verity() f2fs didn't properly clean up if verity failed to be enabled on a file: - It left verity metadata (pages past EOF) in the page cache, which would be exposed to userspace if the file was later extended. - It didn't truncate the verity metadata at all (either from cache or from disk) if an error occurred while setting the verity bit. Fix these bugs by adding a call to truncate_inode_pages() and ensuring that we truncate the verity metadata (both from cache and from disk) in all error paths. Also rework the code to cleanly separate the success path from the error paths, which makes it much easier to understand. Finally, log a message if f2fs_truncate() fails, since it might otherwise fail silently. Reported-by: Yunlei He Fixes: 95ae251fe828 ("f2fs: add fs-verity support") Cc: # v5.4+ Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/verity.c | 75 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 054ec852b5ea..15ba36926fad 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -152,40 +152,73 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc, size_t desc_size, u64 merkle_tree_size) { struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; struct fsverity_descriptor_location dloc = { .version = cpu_to_le32(F2FS_VERIFY_VER), .size = cpu_to_le32(desc_size), .pos = cpu_to_le64(desc_pos), }; - int err = 0; + int err = 0, err2 = 0; - if (desc != NULL) { - /* Succeeded; write the verity descriptor. */ - err = pagecache_write(inode, desc, desc_size, desc_pos); + /* + * If an error already occurred (which fs/verity/ signals by passing + * desc == NULL), then only clean-up is needed. + */ + if (desc == NULL) + goto cleanup; - /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */ - if (!err) - err = filemap_write_and_wait(inode->i_mapping); - } + /* Append the verity descriptor. */ + err = pagecache_write(inode, desc, desc_size, desc_pos); + if (err) + goto cleanup; - /* If we failed, truncate anything we wrote past i_size. */ - if (desc == NULL || err) - f2fs_truncate(inode); + /* + * Write all pages (both data and verity metadata). Note that this must + * happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond + * i_size won't be written properly. For crash consistency, this also + * must happen before the verity inode flag gets persisted. + */ + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto cleanup; + + /* Set the verity xattr. */ + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), + NULL, XATTR_CREATE); + if (err) + goto cleanup; + + /* Finally, set the verity inode flag. */ + file_set_verity(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return 0; - if (desc != NULL && !err) { - err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, - F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), - NULL, XATTR_CREATE); - if (!err) { - file_set_verity(inode); - f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, true); - } +cleanup: + /* + * Verity failed to be enabled, so clean up by truncating any verity + * metadata that was written beyond i_size (both from cache and from + * disk) and clearing FI_VERITY_IN_PROGRESS. + * + * Taking i_gc_rwsem[WRITE] is needed to stop f2fs garbage collection + * from re-instantiating cached pages we are truncating (since unlike + * normal file accesses, garbage collection isn't limited by i_size). + */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + truncate_inode_pages(inode->i_mapping, inode->i_size); + err2 = f2fs_truncate(inode); + if (err2) { + f2fs_err(sbi, "Truncating verity metadata failed (errno=%d)", + err2); + set_sbi_flag(sbi, SBI_NEED_FSCK); } - return err; + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return err ?: err2; } static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, From 0823427989c11240ad0f23561e66ff31a927018f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Mar 2021 17:28:16 -0800 Subject: [PATCH 17/48] f2fs: expose # of overprivision segments This is useful when checking conditions during checkpoint=disable in Android. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 5 +++++ fs/f2fs/sysfs.c | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 9fa5a528cc23..4aa8f38b52d7 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -409,3 +409,8 @@ Description: Give a way to change checkpoint merge daemon's io priority. I/O priority "3". We can select the class between "rt" and "be", and set the I/O priority within valid range of it. "," delimiter is necessary in between I/O class and priority number. + +What: /sys/fs/f2fs//ovp_segments +Date: March 2021 +Contact: "Jaegeuk Kim" +Description: Shows the number of overprovision segments. diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e38a7f6921dd..0c391ab2d8b7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -91,6 +91,13 @@ static ssize_t free_segments_show(struct f2fs_attr *a, (unsigned long long)(free_segments(sbi))); } +static ssize_t ovp_segments_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sprintf(buf, "%llu\n", + (unsigned long long)(overprovision_segments(sbi))); +} + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -629,6 +636,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); +F2FS_GENERAL_RO_ATTR(ovp_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); @@ -715,6 +723,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ckpt_thread_ioprio), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), + ATTR_LIST(ovp_segments), ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), From e1175f02291141bbd924fc578299305fcde35855 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 5 Mar 2021 17:56:01 +0800 Subject: [PATCH 18/48] f2fs: fix to align to section for fallocate() on pinned file Now, fallocate() on a pinned file only allocates blocks which aligns to segment rather than section, so GC may try to migrate pinned file's block, and after several times of failure, pinned file's block could be migrated to other place, however user won't be aware of such condition, and then old obsolete block address may be readed/written incorrectly. To avoid such condition, let's try to allocate pinned file's blocks with section alignment. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 19 +++++++++---------- fs/f2fs/segment.c | 34 ++++++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fe9e0d4d3920..ac57072d73cf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3389,7 +3389,7 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type); +void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1863944f4073..bd5a77091d23 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1646,27 +1646,26 @@ static int expand_inode_data(struct inode *inode, loff_t offset, return 0; if (f2fs_is_pinned_file(inode)) { - block_t len = (map.m_len >> sbi->log_blocks_per_seg) << - sbi->log_blocks_per_seg; + block_t sec_blks = BLKS_PER_SEC(sbi); + block_t sec_len = roundup(map.m_len, sec_blks); block_t done = 0; - if (map.m_len % sbi->blocks_per_seg) - len += sbi->blocks_per_seg; - - map.m_len = sbi->blocks_per_seg; + map.m_len = sec_blks; next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); - if (err && err != -ENODATA && err != -EAGAIN) + if (err && err != -ENODATA && err != -EAGAIN) { + map.m_len = done; goto out_err; + } } down_write(&sbi->pin_sem); f2fs_lock_op(sbi); - f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED); + f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED); f2fs_unlock_op(sbi); map.m_seg_type = CURSEG_COLD_DATA_PINNED; @@ -1675,9 +1674,9 @@ next_alloc: up_write(&sbi->pin_sem); done += map.m_len; - len -= map.m_len; + sec_len -= map.m_len; map.m_lblk += map.m_len; - if (!err && len) + if (!err && sec_len) goto next_alloc; map.m_len = done; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b5a40a39a03f..f2b22da8c134 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2899,7 +2899,8 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type) +static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, + bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; @@ -2907,10 +2908,22 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type) if (!curseg->inited) goto alloc; - if (!curseg->next_blkoff && - !get_valid_blocks(sbi, curseg->segno, false) && - !get_ckpt_valid_blocks(sbi, curseg->segno)) - return; + if (curseg->next_blkoff || + get_valid_blocks(sbi, curseg->segno, new_sec)) + goto alloc; + + if (new_sec) { + unsigned int segno = START_SEGNO(curseg->segno); + int i; + + for (i = 0; i < sbi->segs_per_sec; i++, segno++) { + if (get_ckpt_valid_blocks(sbi, segno)) + goto alloc; + } + } else { + if (!get_ckpt_valid_blocks(sbi, curseg->segno)) + return; + } alloc: old_segno = curseg->segno; @@ -2918,10 +2931,15 @@ alloc: locate_dirty_segment(sbi, old_segno); } -void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type) +static void __allocate_new_section(struct f2fs_sb_info *sbi, int type) +{ + __allocate_new_segment(sbi, type, true); +} + +void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type) { down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_segment(sbi, type); + __allocate_new_section(sbi, type); up_write(&SIT_I(sbi)->sentry_lock); } @@ -2931,7 +2949,7 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segment(sbi, i); + __allocate_new_segment(sbi, i, false); up_write(&SIT_I(sbi)->sentry_lock); } From 0bb2045ce5ce67b0428301c117ec960b3f705a44 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 9 Mar 2021 13:21:18 +0800 Subject: [PATCH 19/48] f2fs: fix to use per-inode maxbytes in f2fs_fiemap F2FS inode may have different max size, so change to use per-inode maxbytes. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0e749cf60e11..4bf7e79c8342 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1837,6 +1837,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int ret = 0; bool compr_cluster = false; unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; + loff_t maxbytes; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { ret = f2fs_precache_extents(inode); @@ -1850,6 +1851,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock(inode); + maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + if (start > maxbytes) { + ret = -EFBIG; + goto out; + } + + if (len > maxbytes || (maxbytes - len) < start) + len = maxbytes - start; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { ret = f2fs_xattr_fiemap(inode, fieinfo); goto out; From 5ac443e26a096429065349c640538101012ce40d Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 15 Mar 2021 17:12:33 +0900 Subject: [PATCH 20/48] f2fs: add sysfs nodes to get runtime compression stat I've added new sysfs nodes to show runtime compression stat since mount. compr_written_block - show the block count written after compression compr_saved_block - show the saved block count with compression compr_new_inode - show the count of inode newly enabled for compression Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 24 ++++++++++++++++ fs/f2fs/compress.c | 1 + fs/f2fs/f2fs.h | 19 +++++++++++++ fs/f2fs/sysfs.c | 38 +++++++++++++++++++++++++ 4 files changed, 82 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 4aa8f38b52d7..4849b8e84e42 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -414,3 +414,27 @@ What: /sys/fs/f2fs//ovp_segments Date: March 2021 Contact: "Jaegeuk Kim" Description: Shows the number of overprovision segments. + +What: /sys/fs/f2fs//compr_written_block +Date: March 2021 +Contact: "Daeho Jeong" +Description: Show the block count written after compression since mount. Note + that when the compressed blocks are deleted, this count doesn't + decrease. If you write "0" here, you can initialize + compr_written_block and compr_saved_block to "0". + +What: /sys/fs/f2fs//compr_saved_block +Date: March 2021 +Contact: "Daeho Jeong" +Description: Show the saved block count with compression since mount. Note + that when the compressed blocks are deleted, this count doesn't + decrease. If you write "0" here, you can initialize + compr_written_block and compr_saved_block to "0". + +What: /sys/fs/f2fs//compr_new_inode +Date: March 2021 +Contact: "Daeho Jeong" +Description: Show the count of inode newly enabled for compression since mount. + Note that when the compression is disabled for the files, this count + doesn't decrease. If you write "0" here, you can initialize + compr_new_inode to "0". diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 77fa342de38f..3c9d797dbdd6 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1353,6 +1353,7 @@ unlock_continue: if (fio.compr_blocks) f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false); f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true); + add_compr_block_stat(inode, cc->nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); if (cc->cluster_idx == 0) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ac57072d73cf..165bfe2a5a0e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1628,6 +1628,11 @@ struct f2fs_sb_info { #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ + + /* For runtime compression statistics */ + u64 compr_written_block; + u64 compr_saved_block; + u32 compr_new_inode; #endif }; @@ -3961,6 +3966,18 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); +#define inc_compr_inode_stat(inode) \ + do { \ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ + sbi->compr_new_inode++; \ + } while (0) +#define add_compr_block_stat(inode, blocks) \ + do { \ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ + int diff = F2FS_I(inode)->i_cluster_size - blocks; \ + sbi->compr_written_block += blocks; \ + sbi->compr_saved_block += diff; \ + } while (0) #else static inline bool f2fs_is_compressed_page(struct page *page) { return false; } static inline bool f2fs_is_compress_backend_ready(struct inode *inode) @@ -3989,6 +4006,7 @@ static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } +#define inc_compr_inode_stat(inode) do { } while (0) #endif static inline void set_compress_context(struct inode *inode) @@ -4012,6 +4030,7 @@ static inline void set_compress_context(struct inode *inode) F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); + inc_compr_inode_stat(inode); f2fs_mark_inode_dirty_sync(inode, true); } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 0c391ab2d8b7..39b522ec73e7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -289,6 +290,17 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, return len; } +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (!strcmp(a->attr.name, "compr_written_block")) + return sysfs_emit(buf, "%llu\n", sbi->compr_written_block); + + if (!strcmp(a->attr.name, "compr_saved_block")) + return sysfs_emit(buf, "%llu\n", sbi->compr_saved_block); + + if (!strcmp(a->attr.name, "compr_new_inode")) + return sysfs_emit(buf, "%u\n", sbi->compr_new_inode); +#endif + ui = (unsigned int *)(ptr + a->offset); return sprintf(buf, "%u\n", *ui); @@ -465,6 +477,24 @@ out: return count; } +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (!strcmp(a->attr.name, "compr_written_block") || + !strcmp(a->attr.name, "compr_saved_block")) { + if (t != 0) + return -EINVAL; + sbi->compr_written_block = 0; + sbi->compr_saved_block = 0; + return count; + } + + if (!strcmp(a->attr.name, "compr_new_inode")) { + if (t != 0) + return -EINVAL; + sbi->compr_new_inode = 0; + return count; + } +#endif + *ui = (unsigned int)t; return count; @@ -676,6 +706,9 @@ F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); #ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); #endif #define ATTR_LIST(name) (&f2fs_attr_##name.attr) @@ -739,6 +772,11 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), +#endif +#ifdef CONFIG_F2FS_FS_COMPRESSION + ATTR_LIST(compr_written_block), + ATTR_LIST(compr_saved_block), + ATTR_LIST(compr_new_inode), #endif NULL, }; From ac2d750b2043cbe10d42ac974e07b9876cddfff8 Mon Sep 17 00:00:00 2001 From: Weichao Guo Date: Wed, 17 Mar 2021 17:27:23 +0800 Subject: [PATCH 21/48] f2fs: do not use AT_SSR mode in FG_GC & high urgent BG_GC AT_SSR mode is introduced by age threshold based GC for better hot/cold data seperation and avoiding free segment cost. However, LFS write mode is preferred in the scenario of foreground or high urgent GC, which should be finished ASAP. Let's only use AT_SSR in background GC and not high urgent GC modes. Signed-off-by: Weichao Guo Signed-off-by: Huang Jianan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 ++- fs/f2fs/segment.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 86ba8ed0b8a7..d96acc6531f2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1120,7 +1120,8 @@ static int move_data_block(struct inode *inode, block_t bidx, block_t newaddr; int err = 0; bool lfs_mode = f2fs_lfs_mode(fio.sbi); - int type = fio.sbi->am.atgc_enabled ? + int type = fio.sbi->am.atgc_enabled && (gc_type == BG_GC) && + (fio.sbi->gc_mode != GC_URGENT_HIGH) ? CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f2b22da8c134..b6f19518afd1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3263,7 +3263,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) struct inode *inode = fio->page->mapping->host; if (is_cold_data(fio->page)) { - if (fio->sbi->am.atgc_enabled) + if (fio->sbi->am.atgc_enabled && + (fio->io_type == FS_DATA_IO) && + (fio->sbi->gc_mode != GC_URGENT_HIGH)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; From 3f7070b05052f997d571a51e750583b9dea726f8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Mar 2021 17:56:03 +0800 Subject: [PATCH 22/48] f2fs: don't start checkpoint thread in readonly mountpoint In readonly mountpoint, there should be no write IOs include checkpoint IO, so that it's not needed to create kernel checkpoint thread. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e03b2f0f69d0..721dc64609b4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2069,8 +2069,10 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } - if (!test_opt(sbi, DISABLE_CHECKPOINT) && - test_opt(sbi, MERGE_CHECKPOINT)) { + if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || + !test_opt(sbi, MERGE_CHECKPOINT)) { + f2fs_stop_ckpt_thread(sbi); + } else { err = f2fs_start_ckpt_thread(sbi); if (err) { f2fs_err(sbi, @@ -2078,8 +2080,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err); goto restore_gc; } - } else { - f2fs_stop_ckpt_thread(sbi); } /* @@ -3835,7 +3835,7 @@ try_onemore: /* setup checkpoint request control and start checkpoint issue thread */ f2fs_init_ckpt_req_control(sbi); - if (!test_opt(sbi, DISABLE_CHECKPOINT) && + if (!f2fs_readonly(sb) && !test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, MERGE_CHECKPOINT)) { err = f2fs_start_ckpt_thread(sbi); if (err) { From b862676e371715456c9dade7990c8004996d0d9e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Mar 2021 19:47:30 +0800 Subject: [PATCH 23/48] f2fs: fix to avoid out-of-bounds memory access butt3rflyh4ck reported a bug found by syzkaller fuzzer with custom modifications in 5.12.0-rc3+ [1]: dump_stack+0xfa/0x151 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x82/0x32c mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:416 f2fs_test_bit fs/f2fs/f2fs.h:2572 [inline] current_nat_addr fs/f2fs/node.h:213 [inline] get_next_nat_page fs/f2fs/node.c:123 [inline] __flush_nat_entry_set fs/f2fs/node.c:2888 [inline] f2fs_flush_nat_entries+0x258e/0x2960 fs/f2fs/node.c:2991 f2fs_write_checkpoint+0x1372/0x6a70 fs/f2fs/checkpoint.c:1640 f2fs_issue_checkpoint+0x149/0x410 fs/f2fs/checkpoint.c:1807 f2fs_sync_fs+0x20f/0x420 fs/f2fs/super.c:1454 __sync_filesystem fs/sync.c:39 [inline] sync_filesystem fs/sync.c:67 [inline] sync_filesystem+0x1b5/0x260 fs/sync.c:48 generic_shutdown_super+0x70/0x370 fs/super.c:448 kill_block_super+0x97/0xf0 fs/super.c:1394 The root cause is, if nat entry in checkpoint journal area is corrupted, e.g. nid of journalled nat entry exceeds max nid value, during checkpoint, once it tries to flush nat journal to NAT area, get_next_nat_page() may access out-of-bounds memory on nat_bitmap due to it uses wrong nid value as bitmap offset. [1] https://lore.kernel.org/lkml/CAFcO6XOMWdr8pObek6eN6-fs58KG9doRFadgJj-FnF-1x43s2g@mail.gmail.com/T/#u Reported-and-tested-by: butt3rflyh4ck Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4b0e2e3c2c88..45c8cf1afe66 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2785,6 +2785,9 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) struct f2fs_nat_entry raw_ne; nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); + if (f2fs_check_nid_range(sbi, nid)) + continue; + raw_ne = nat_in_journal(journal, i); ne = __lookup_nat_cache(nm_i, nid); From f3e367d4fe2bcccb51d64cb974f73153d23adf15 Mon Sep 17 00:00:00 2001 From: qiulaibin Date: Tue, 23 Mar 2021 19:41:30 +0800 Subject: [PATCH 24/48] f2fs: fix wrong comment of nat_tree_lock Do trivial comment fix of nat_tree_lock. Signed-off-by: qiulaibin Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 165bfe2a5a0e..eb154d9cb063 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -865,7 +865,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ From 3fd9735908287cdcd7dd04912e8ba7d749313f13 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Mar 2021 17:56:04 +0800 Subject: [PATCH 25/48] f2fs: fix error path of f2fs_remount() In error path of f2fs_remount(), it missed to restart/stop kernel thread or enable/disable checkpoint, then mount option status may not be consistent with real condition of filesystem, so let's reorder remount flow a bit as below and do recovery correctly in error path: 1) handle gc thread 2) handle ckpt thread 3) handle flush thread 4) handle checkpoint disabling Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 721dc64609b4..b48281642e98 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1927,8 +1927,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; int err; - bool need_restart_gc = false; - bool need_stop_gc = false; + bool need_restart_gc = false, need_stop_gc = false; + bool need_restart_ckpt = false, need_stop_ckpt = false; + bool need_restart_flush = false, need_stop_flush = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); @@ -2059,19 +2060,10 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } - if (checkpoint_changed) { - if (test_opt(sbi, DISABLE_CHECKPOINT)) { - err = f2fs_disable_checkpoint(sbi); - if (err) - goto restore_gc; - } else { - f2fs_enable_checkpoint(sbi); - } - } - if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || !test_opt(sbi, MERGE_CHECKPOINT)) { f2fs_stop_ckpt_thread(sbi); + need_restart_ckpt = true; } else { err = f2fs_start_ckpt_thread(sbi); if (err) { @@ -2080,6 +2072,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err); goto restore_gc; } + need_stop_ckpt = true; } /* @@ -2089,11 +2082,24 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); + need_restart_flush = true; } else { err = f2fs_create_flush_cmd_control(sbi); if (err) - goto restore_gc; + goto restore_ckpt; + need_stop_flush = true; } + + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_flush; + } else { + f2fs_enable_checkpoint(sbi); + } + } + skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ @@ -2108,6 +2114,21 @@ skip: adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; +restore_flush: + if (need_restart_flush) { + if (f2fs_create_flush_cmd_control(sbi)) + f2fs_warn(sbi, "background flush thread has stopped"); + } else if (need_stop_flush) { + clear_opt(sbi, FLUSH_MERGE); + f2fs_destroy_flush_cmd_control(sbi, false); + } +restore_ckpt: + if (need_restart_ckpt) { + if (f2fs_start_ckpt_thread(sbi)) + f2fs_warn(sbi, "background ckpt thread has stopped"); + } else if (need_stop_ckpt) { + f2fs_stop_ckpt_thread(sbi); + } restore_gc: if (need_restart_gc) { if (f2fs_start_gc_thread(sbi)) From 88f2cfc5fa90326edb569b4a81bb38ed4dcd3108 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Mar 2021 11:24:33 +0800 Subject: [PATCH 26/48] f2fs: fix to update last i_size if fallocate partially succeeds In the case of expanding pinned file, map.m_lblk and map.m_len will update in each round of section allocation, so in error path, last i_size will be calculated with wrong m_lblk and m_len, fix it. Fixes: f5a53edcf01e ("f2fs: support aligned pinned file") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bd5a77091d23..dc79694e512c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1619,9 +1619,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset, struct f2fs_map_blocks map = { .m_next_pgofs = NULL, .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = true }; - pgoff_t pg_end; + pgoff_t pg_start, pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; + block_t expanded = 0; int err; err = inode_newsize_ok(inode, (len + offset)); @@ -1634,11 +1635,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, f2fs_balance_fs(sbi, true); + pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; off_end = (offset + len) & (PAGE_SIZE - 1); - map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT; - map.m_len = pg_end - map.m_lblk; + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; if (off_end) map.m_len++; @@ -1648,7 +1650,6 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (f2fs_is_pinned_file(inode)) { block_t sec_blks = BLKS_PER_SEC(sbi); block_t sec_len = roundup(map.m_len, sec_blks); - block_t done = 0; map.m_len = sec_blks; next_alloc: @@ -1656,10 +1657,8 @@ next_alloc: GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); - if (err && err != -ENODATA && err != -EAGAIN) { - map.m_len = done; + if (err && err != -ENODATA && err != -EAGAIN) goto out_err; - } } down_write(&sbi->pin_sem); @@ -1673,24 +1672,25 @@ next_alloc: up_write(&sbi->pin_sem); - done += map.m_len; + expanded += map.m_len; sec_len -= map.m_len; map.m_lblk += map.m_len; if (!err && sec_len) goto next_alloc; - map.m_len = done; + map.m_len = expanded; } else { err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + expanded = map.m_len; } out_err: if (err) { pgoff_t last_off; - if (!map.m_len) + if (!expanded) return err; - last_off = map.m_lblk + map.m_len - 1; + last_off = pg_start + expanded - 1; /* update new size to the failed position */ new_size = (last_off == pg_end) ? offset + len : From 61461fc921b756ae16e64243f72af2bfc2e620db Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Mar 2021 11:18:28 +0800 Subject: [PATCH 27/48] f2fs: fix to avoid touching checkpointed data in get_victim() In CP disabling mode, there are two issues when using LFS or SSR | AT_SSR mode to select victim: 1. LFS is set to find source section during GC, the victim should have no checkpointed data, since after GC, section could not be set free for reuse. Previously, we only check valid chpt blocks in current segment rather than section, fix it. 2. SSR | AT_SSR are set to find target segment for writes which can be fully filled by checkpointed and newly written blocks, we should never select such segment, otherwise it can cause panic or data corruption during allocation, potential case is described as below: a) target segment has 'n' (n < 512) ckpt valid blocks b) GC migrates 'n' valid blocks to other segment (segment is still in dirty list) c) GC migrates '512 - n' blocks to target segment (segment has 'n' cp_vblocks and '512 - n' vblocks) d) If GC selects target segment via {AT,}SSR allocator, however there is no free space in targe segment. Fixes: 4354994f097d ("f2fs: checkpoint disabling") Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 28 ++++++++++++++++++++-------- fs/f2fs/segment.c | 36 +++++++++++++++++++++--------------- fs/f2fs/segment.h | 14 +++++++++++++- 4 files changed, 55 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb154d9cb063..fe380bcf8d4d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3387,6 +3387,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d96acc6531f2..a2ca483f9855 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -392,10 +392,6 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, if (p->gc_mode == GC_AT && get_valid_blocks(sbi, segno, true) == 0) return; - - if (p->alloc_mode == AT_SSR && - get_seg_entry(sbi, segno)->ckpt_valid_blocks == 0) - return; } for (i = 0; i < sbi->segs_per_sec; i++) @@ -728,11 +724,27 @@ retry: if (sec_usage_check(sbi, secno)) goto next; + /* Don't touch checkpointed data */ - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && - get_ckpt_valid_blocks(sbi, segno) && - p.alloc_mode == LFS)) - goto next; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (p.alloc_mode == LFS) { + /* + * LFS is set to find source section during GC. + * The victim should have no checkpointed data. + */ + if (get_ckpt_valid_blocks(sbi, segno, true)) + goto next; + } else { + /* + * SSR | AT_SSR are set to find target segment + * for writes which can be full by checkpointed + * and newly written blocks. + */ + if (!f2fs_segment_has_free_slot(sbi, segno)) + goto next; + } + } + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b6f19518afd1..33cb8aa5ec8f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -865,7 +865,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); - ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false); if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || ckpt_valid_blocks == usable_blocks)) { @@ -950,7 +950,7 @@ static unsigned int get_free_segment(struct f2fs_sb_info *sbi) for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; - if (get_ckpt_valid_blocks(sbi, segno)) + if (get_ckpt_valid_blocks(sbi, segno, false)) continue; mutex_unlock(&dirty_i->seglist_lock); return segno; @@ -2642,6 +2642,23 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, seg->next_blkoff++; } +bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); + unsigned long *target_map = SIT_I(sbi)->tmp_map; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; + int i, pos; + + for (i = 0; i < entries; i++) + target_map[i] = ckpt_map[i] | cur_map[i]; + + pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); + + return pos < sbi->blocks_per_seg; +} + /* * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks @@ -2912,19 +2929,8 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, get_valid_blocks(sbi, curseg->segno, new_sec)) goto alloc; - if (new_sec) { - unsigned int segno = START_SEGNO(curseg->segno); - int i; - - for (i = 0; i < sbi->segs_per_sec; i++, segno++) { - if (get_ckpt_valid_blocks(sbi, segno)) - goto alloc; - } - } else { - if (!get_ckpt_valid_blocks(sbi, curseg->segno)) - return; - } - + if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) + return; alloc: old_segno = curseg->segno; SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 144980b62f9e..dab87ecba2b5 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -359,8 +359,20 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, } static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, - unsigned int segno) + unsigned int segno, bool use_section) { + if (use_section && __is_large_section(sbi)) { + unsigned int start_segno = START_SEGNO(segno); + unsigned int blocks = 0; + int i; + + for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) { + struct seg_entry *se = get_seg_entry(sbi, start_segno); + + blocks += se->ckpt_valid_blocks; + } + return blocks; + } return get_seg_entry(sbi, segno)->ckpt_valid_blocks; } From d6d2b491a82e1e411a6766fbfb87c697d8701554 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 16 Mar 2021 14:59:18 +0530 Subject: [PATCH 28/48] f2fs: allow to change discard policy based on cached discard cmds With the default DPOLICY_BG discard thread is ioaware, which prevents the discard thread from issuing the discard commands. On low RAM setups, it is observed that these discard commands in the cache are consuming high memory. This patch aims to relax the memory pressure on the system due to f2fs pending discard cmds by changing the policy to DPOLICY_FORCE based on the nm_i->ram_thresh configured. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 ++++++++ fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 45c8cf1afe66..3eb724bb6594 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -43,11 +43,15 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct sysinfo val; unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; + if (!nm_i) + return true; + si_meminfo(&val); /* only uses low memory */ @@ -89,6 +93,10 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) /* it allows 20% / total_ram for inmemory pages */ mem_size = get_pages(sbi, F2FS_INMEM_PAGES); res = mem_size < (val.totalram / 5); + } else if (type == DISCARD_CACHE) { + mem_size = (atomic_read(&dcc->discard_cmd_cnt) * + sizeof(struct discard_cmd)) >> PAGE_SHIFT; + res = mem_size < (avail_ram * nm_i->ram_thresh / 100); } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index f84541b57acb..7a45c0f10629 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -147,6 +147,7 @@ enum mem_type { INO_ENTRIES, /* indicates inode entries */ EXTENT_CACHE, /* indicates extent cache */ INMEM_PAGES, /* indicates inmemory pages */ + DISCARD_CACHE, /* indicates memory of cached discard cmds */ BASE_CHECK, /* check kernel status */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 33cb8aa5ec8f..ad48f1f16387 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1762,7 +1762,8 @@ static int issue_discard_thread(void *data) if (!atomic_read(&dcc->discard_cmd_cnt)) continue; - if (sbi->gc_mode == GC_URGENT_HIGH) + if (sbi->gc_mode == GC_URGENT_HIGH || + !f2fs_available_free_memory(sbi, DISCARD_CACHE)) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); From 2c718feead3533647a061501122457a16a355736 Mon Sep 17 00:00:00 2001 From: Ruiqi Gong Date: Thu, 25 Mar 2021 02:38:11 -0400 Subject: [PATCH 29/48] f2fs: fix a typo in inode.c Do a trivial typo fix. s/runing/running Reported-by: Hulk Robot Signed-off-by: Ruiqi Gong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 349d9cb933ee..5d2253d53f17 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -698,7 +698,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) /* * We need to balance fs here to prevent from producing dirty node pages - * during the urgent cleaning time when runing out of free sections. + * during the urgent cleaning time when running out of free sections. */ f2fs_update_inode_page(inode); if (wbc && wbc->nr_to_write) From e8bf1f522aee3b3e1e7658e8f224dca1d88c3338 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 26 Mar 2021 22:41:43 +0800 Subject: [PATCH 30/48] f2fs: delete empty compress.h Commit 75e91c888989 ("f2fs: compress: fix compression chksum") wrongly introduced empty compress.h, delete it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 fs/f2fs/compress.h diff --git a/fs/f2fs/compress.h b/fs/f2fs/compress.h deleted file mode 100644 index e69de29bb2d1..000000000000 From 753a8ed0ae9c196a7d09a17aae1e354cabd1233d Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Thu, 25 Mar 2021 10:19:20 -0400 Subject: [PATCH 31/48] f2fs: fix wrong alloc_type in f2fs_do_replace_block If the alloc_type of the original curseg is LFS, when we change_curseg and then do recover curseg, the alloc_type becomes SSR. Signed-off-by: Wang Xiaojun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ad48f1f16387..c19114be554c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3572,6 +3572,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct seg_entry *se; int type; unsigned short old_blkoff; + unsigned char old_alloc_type; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); @@ -3605,6 +3606,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; + old_alloc_type = curseg->alloc_type; /* change the current segment */ if (segno != curseg->segno) { @@ -3639,6 +3641,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, change_curseg(sbi, type, true); } curseg->next_blkoff = old_blkoff; + curseg->alloc_type = old_alloc_type; } up_write(&sit_i->sentry_lock); From 823d13e12b6cbaef2f6e5d63c648643e7bc094dd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 26 Mar 2021 09:46:22 +0800 Subject: [PATCH 32/48] f2fs: fix to cover __allocate_new_section() with curseg_lock In order to avoid race with f2fs_do_replace_block(). Fixes: f5a53edcf01e ("f2fs: support aligned pinned file") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c19114be554c..24ad45f5e335 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2945,19 +2945,23 @@ static void __allocate_new_section(struct f2fs_sb_info *sbi, int type) void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type) { + down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); __allocate_new_section(sbi, type); up_write(&SIT_I(sbi)->sentry_lock); + up_read(&SM_I(sbi)->curseg_lock); } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segment(sbi, i, false); up_write(&SIT_I(sbi)->sentry_lock); + up_read(&SM_I(sbi)->curseg_lock); } static const struct segment_allocation default_salloc_ops = { From 5911d2d1d1a38b26585383478bd71d9254e48bdf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 Mar 2021 17:57:06 +0800 Subject: [PATCH 33/48] f2fs: introduce gc_merge mount option In this patch, we will add two new mount options: "gc_merge" and "nogc_merge", when background_gc is on, "gc_merge" option can be set to let background GC thread to handle foreground GC requests, it can eliminate the sluggish issue caused by slow foreground GC operation when GC is triggered from a process with limited I/O and CPU resources. Original idea is from Xiang. Signed-off-by: Gao Xiang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 6 ++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 26 ++++++++++++++++++++++---- fs/f2fs/gc.h | 6 ++++++ fs/f2fs/segment.c | 15 +++++++++++++-- fs/f2fs/super.c | 19 +++++++++++++++++-- 6 files changed, 65 insertions(+), 8 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 35ed01a5fbc9..63c0c49b726d 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -110,6 +110,12 @@ background_gc=%s Turn on/off cleaning operations, namely garbage on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. +gc_merge When background_gc is on, this option can be enabled to + let background GC thread to handle foreground GC requests, + it can eliminate the sluggish issue caused by slow foreground + GC operation when GC is triggered from a process with limited + I/O and CPU resources. +nogc_merge Disable GC merge feature. disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fe380bcf8d4d..87d734f5589d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -97,6 +97,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_MOUNT_ATGC 0x08000000 #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 +#define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a2ca483f9855..5c48825fd12d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,19 +31,24 @@ static int gc_thread_func(void *data) struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq; unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode; + bool sync_mode, foreground = false; wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || + waitqueue_active(fggc_wq) || gc_th->gc_wake, msecs_to_jiffies(wait_ms)); + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + foreground = true; + /* give it a try one time */ if (gc_th->gc_wake) gc_th->gc_wake = 0; @@ -90,7 +95,10 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!down_write_trylock(&sbi->gc_lock)) { + if (foreground) { + down_write(&sbi->gc_lock); + goto do_gc; + } else if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } @@ -107,14 +115,22 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi->stat_info); + if (!foreground) + stat_inc_bggc_count(sbi->stat_info); sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* foreground GC was been triggered via f2fs_balance_fs() */ + if (foreground) + sync_mode = false; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; + if (foreground) + wake_up_all(&gc_th->fggc_wq); + trace_f2fs_background_gc(sbi->sb, wait_ms, prefree_segments(sbi), free_segments(sbi)); @@ -148,6 +164,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + init_waitqueue_head(&sbi->gc_thread->fggc_wq); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { @@ -165,6 +182,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); + wake_up_all(&gc_th->fggc_wq); kfree(gc_th); sbi->gc_thread = NULL; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 0c8dae12dc51..3fe145e8e594 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -42,6 +42,12 @@ struct f2fs_gc_kthread { /* for changing gc mode */ unsigned int gc_wake; + + /* for GC_MERGE mount option */ + wait_queue_head_t fggc_wq; /* + * caller of f2fs_balance_fs() + * will wait on this wait queue. + */ }; struct gc_inode_list { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 24ad45f5e335..31ccea1378fa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -503,8 +503,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, false, NULL_SEGNO); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); + } } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b48281642e98..954b1fe97d67 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -151,6 +151,8 @@ enum { Opt_compress_chksum, Opt_compress_mode, Opt_atgc, + Opt_gc_merge, + Opt_nogc_merge, Opt_err, }; @@ -223,6 +225,8 @@ static match_table_t f2fs_tokens = { {Opt_compress_chksum, "compress_chksum"}, {Opt_compress_mode, "compress_mode=%s"}, {Opt_atgc, "atgc"}, + {Opt_gc_merge, "gc_merge"}, + {Opt_nogc_merge, "nogc_merge"}, {Opt_err, NULL}, }; @@ -1073,6 +1077,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_atgc: set_opt(sbi, ATGC); break; + case Opt_gc_merge: + set_opt(sbi, GC_MERGE); + break; + case Opt_nogc_merge: + clear_opt(sbi, GC_MERGE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1675,6 +1685,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, GC_MERGE)) + seq_puts(seq, ",gc_merge"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -2038,7 +2051,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * option. Also sync the filesystem. */ if ((*flags & SB_RDONLY) || - F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { + (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && + !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -4012,7 +4026,8 @@ reset_checkpoint: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { + if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF || + test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) From 23738e74472f9c5f3a05a68724a2ccfba97d283d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 31 Mar 2021 11:16:32 +0800 Subject: [PATCH 34/48] f2fs: fix to restrict mount condition on readonly block device When we mount an unclean f2fs image in a readonly block device, let's make mount() succeed only when there is no recoverable data in that image, otherwise after mount(), file fsyned won't be recovered as user expected. Fixes: 938a184265d7 ("f2fs: give a warning only for readonly partition") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 954b1fe97d67..14239e2b7ae7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3966,10 +3966,18 @@ try_onemore: * previous checkpoint was not done by clean system shutdown. */ if (f2fs_hw_is_readonly(sbi)) { - if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) - f2fs_err(sbi, "Need to recover fsync data, but write access unavailable"); - else - f2fs_info(sbi, "write access unavailable, skipping recovery"); + if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + err = f2fs_recover_fsync_data(sbi, true); + if (err > 0) { + err = -EROFS; + f2fs_err(sbi, "Need to recover fsync data, but " + "write access unavailable, please try " + "mount w/ disable_roll_forward or norecovery"); + } + if (err < 0) + goto free_meta; + } + f2fs_info(sbi, "write access unavailable, skipping recovery"); goto reset_checkpoint; } From be1ee45d51384161681ecf21085a42d316ae25f7 Mon Sep 17 00:00:00 2001 From: Yi Zhuang Date: Wed, 31 Mar 2021 17:34:14 +0800 Subject: [PATCH 35/48] f2fs: Fix a hungtask problem in atomic write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the cache writing process, if it is an atomic file, increase the page count of F2FS_WB_CP_DATA, otherwise increase the page count of F2FS_WB_DATA. When you step into the hook branch due to insufficient memory in f2fs_write_begin, f2fs_drop_inmem_pages_all will be called to traverse all atomic inodes and clear the FI_ATOMIC_FILE mark of all atomic files. In f2fs_drop_inmem_pages,first acquire the inmem_lock , revoke all the inmem_pages, and then clear the FI_ATOMIC_FILE mark. Before this mark is cleared, other threads may hold inmem_lock to add inmem_pages to the inode that has just been emptied inmem_pages, and increase the page count of F2FS_WB_CP_DATA. When the IO returns, it is found that the FI_ATOMIC_FILE flag is cleared by f2fs_drop_inmem_pages_all, and f2fs_is_atomic_file returns false,which causes the page count of F2FS_WB_DATA to be decremented. The page count of F2FS_WB_CP_DATA cannot be cleared. Finally, hungtask is triggered in f2fs_wait_on_all_pages because get_pages will never return zero. process A: process B: f2fs_drop_inmem_pages_all ->f2fs_drop_inmem_pages of inode#1 ->mutex_lock(&fi->inmem_lock) ->__revoke_inmem_pages of inode#1 f2fs_ioc_commit_atomic_write ->mutex_unlock(&fi->inmem_lock) ->f2fs_commit_inmem_pages of inode#1 ->mutex_lock(&fi->inmem_lock) ->__f2fs_commit_inmem_pages ->f2fs_do_write_data_page ->f2fs_outplace_write_data ->do_write_page ->f2fs_submit_page_write ->inc_page_count(sbi, F2FS_WB_CP_DATA ) ->mutex_unlock(&fi->inmem_lock) ->spin_lock(&sbi->inode_lock[ATOMIC_FILE]); ->clear_inode_flag(inode, FI_ATOMIC_FILE) ->spin_unlock(&sbi->inode_lock[ATOMIC_FILE]) f2fs_write_end_io ->dec_page_count(sbi, F2FS_WB_DATA ); We can fix the problem by putting the action of clearing the FI_ATOMIC_FILE mark into the inmem_lock lock. This operation can ensure that no one will submit the inmem pages before the FI_ATOMIC_FILE mark is cleared, so that there will be no atomic writes waiting for writeback. Fixes: 57864ae5ce3a ("f2fs: limit # of inmemory pages") Signed-off-by: Yi Zhuang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 31ccea1378fa..c517e689a9a3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -324,23 +324,27 @@ void f2fs_drop_inmem_pages(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - while (!list_empty(&fi->inmem_pages)) { + do { mutex_lock(&fi->inmem_lock); + if (list_empty(&fi->inmem_pages)) { + fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; + + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + sbi->atomic_files--; + } + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + mutex_unlock(&fi->inmem_lock); + break; + } __revoke_inmem_pages(inode, &fi->inmem_pages, true, false, true); mutex_unlock(&fi->inmem_lock); - } - - fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; - - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - sbi->atomic_files--; - } - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + } while (1); } void f2fs_drop_inmem_page(struct inode *inode, struct page *page) From b5d15199a26f6dce624b43c82764cdb3827e7c89 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 1 Apr 2021 17:25:20 -0700 Subject: [PATCH 36/48] f2fs: set checkpoint_merge by default Once we introduced checkpoint_merge, we've seen some contention w/o the option. In order to avoid it, let's set it by default. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 14239e2b7ae7..c15800c3cdb1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1839,6 +1839,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); + set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); From a303b0ac920d807cb7da4f1cd85759fbe44fa654 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 1 Apr 2021 11:01:53 +0800 Subject: [PATCH 37/48] f2fs: fix to avoid GC/mmap race with f2fs_truncate() It missed to hold i_gc_rwsem and i_map_sem around f2fs_truncate() in f2fs_file_write_iter() to avoid racing with background GC and mmap, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index dc79694e512c..f3ca63b55843 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4443,8 +4443,13 @@ write: clear_inode_flag(inode, FI_NO_PREALLOC); /* if we couldn't write data, we should deallocate blocks. */ - if (preallocated && i_size_read(inode) < target_size) + if (preallocated && i_size_read(inode) < target_size) { + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + } if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); From 25ae837e61dee712b4b1df36602ebfe724b2a0b6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 2 Apr 2021 17:22:23 +0800 Subject: [PATCH 38/48] f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block() Callers may pass fio parameter with NULL value to f2fs_allocate_data_block(), so we should make sure accessing fio's field after fio's validation check. Fixes: f608c38c59c6 ("f2fs: clean up parameter of f2fs_allocate_data_block()") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c517e689a9a3..44897cfecb1e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3417,12 +3417,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, f2fs_inode_chksum_set(sbi, page); } - if (F2FS_IO_ALIGNED(sbi)) - fio->retry = false; - if (fio) { struct f2fs_bio_info *io; + if (F2FS_IO_ALIGNED(sbi)) + fio->retry = false; + INIT_LIST_HEAD(&fio->list); fio->in_list = true; io = sbi->write_io[fio->type] + fio->temp; From c35b8d5e757e0fd0144890b7b536f7b756f3a648 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 6 Apr 2021 14:39:16 +0530 Subject: [PATCH 39/48] f2fs: fix the periodic wakeups of discard thread Fix the unnecessary periodic wakeups of discard thread that happens under below two conditions - 1. When f2fs is heavily utilized over 80%, the current discard policy sets the max sleep timeout of discard thread as 50ms (DEF_MIN_DISCARD_ISSUE_TIME). But this is set even when there are no pending discard commands to be issued. 2. In the issue_discard_thread() path when there are no pending discard commands, it fails to reset the wait_ms to max timeout value. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 44897cfecb1e..5bd0e1d7a00c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1114,6 +1114,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, int discard_type, unsigned int granularity) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + /* common policy */ dpolicy->type = discard_type; dpolicy->sync = true; @@ -1133,7 +1135,9 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->ordered = true; if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { dpolicy->granularity = 1; - dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; + if (atomic_read(&dcc->discard_cmd_cnt)) + dpolicy->max_interval = + DEF_MIN_DISCARD_ISSUE_TIME; } } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1749,8 +1753,15 @@ static int issue_discard_thread(void *data) set_freezable(); do { - __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, - dcc->discard_granularity); + if (sbi->gc_mode == GC_URGENT_HIGH || + !f2fs_available_free_memory(sbi, DISCARD_CACHE)) + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + else + __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, + dcc->discard_granularity); + + if (!atomic_read(&dcc->discard_cmd_cnt)) + wait_ms = dpolicy.max_interval; wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || @@ -1777,10 +1788,6 @@ static int issue_discard_thread(void *data) if (!atomic_read(&dcc->discard_cmd_cnt)) continue; - if (sbi->gc_mode == GC_URGENT_HIGH || - !f2fs_available_free_memory(sbi, DISCARD_CACHE)) - __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); - sb_start_intwrite(sbi->sb); issued = __issue_discard_cmd(sbi, &dpolicy); From 5f029c045c948b6cb8ccfda614e73240c4a8363b Mon Sep 17 00:00:00 2001 From: Yi Zhuang Date: Tue, 6 Apr 2021 09:47:35 +0800 Subject: [PATCH 40/48] f2fs: clean up build warnings This patch combined the below three clean-up patches. - modify open brace '{' following function definitions - ERROR: spaces required around that ':' - ERROR: spaces required before the open parenthesis '(' - ERROR: spaces prohibited before that ',' - Made suggested modifications from checkpatch in reference to WARNING: Missing a blank line after declarations Signed-off-by: Yi Zhuang Signed-off-by: Jia Yang Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 1 + fs/f2fs/checkpoint.c | 1 + fs/f2fs/data.c | 2 ++ fs/f2fs/debug.c | 3 +++ fs/f2fs/dir.c | 1 + fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 6 +++++- fs/f2fs/inode.c | 1 + fs/f2fs/namei.c | 3 +++ fs/f2fs/node.c | 8 +++++--- fs/f2fs/recovery.c | 3 ++- fs/f2fs/segment.c | 18 +++++++++++++++--- fs/f2fs/super.c | 5 +++-- fs/f2fs/xattr.c | 1 + 14 files changed, 44 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 965037a9c205..239ad9453b99 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -29,6 +29,7 @@ static inline size_t f2fs_acl_size(int count) static inline int f2fs_acl_count(size_t size) { ssize_t s; + size -= sizeof(struct f2fs_acl_header); s = size - 4 * sizeof(struct f2fs_acl_entry_short); if (s < 0) { diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f6169611270f..817d0bcb5c67 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -719,6 +719,7 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) orphan_blk = (struct f2fs_orphan_block *)page_address(page); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); + err = recover_orphan_inode(sbi, ino); if (err) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4bf7e79c8342..cf935474ffba 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1086,6 +1086,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = f2fs_data_blkaddr(dn); + if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@ -3765,6 +3766,7 @@ int f2fs_migrate_page(struct address_space *mapping, if (atomic_written) { struct inmem_pages *cur; + list_for_each_entry(cur, &fi->inmem_pages, list) if (cur->page == page) { cur->page = newpage; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 91855d5721cd..c03949a7ccff 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -173,6 +173,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->util_invalid = 50 - si->util_free - si->util_valid; for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); + si->curseg[i] = curseg->segno; si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); @@ -300,10 +301,12 @@ get_cache: si->page_mem = 0; if (sbi->node_inode) { unsigned npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } if (sbi->meta_inode) { unsigned npages = META_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e6270a867be1..ebf65c5fac40 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -473,6 +473,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; + lock_page(page); f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f3ca63b55843..d697c8900fa7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2582,7 +2582,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE , + .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5c48825fd12d..8d1f17ab94d8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -160,7 +160,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; - gc_th->gc_wake= 0; + gc_th->gc_wake = 0; sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); @@ -179,6 +179,7 @@ out: void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); @@ -858,6 +859,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); @@ -982,9 +984,11 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) bidx = node_ofs - 1; } else if (node_ofs <= indirect_blks) { int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 2 - dec; } else { int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 5 - dec; } return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 5d2253d53f17..b401f08569f7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -666,6 +666,7 @@ retry: node_page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) { int err = PTR_ERR(node_page); + if (err == -ENOMEM) { cond_resched(); goto retry; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 17bd072a5d39..405d85dbf9f1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -419,6 +419,7 @@ struct dentry *f2fs_get_parent(struct dentry *child) struct qstr dotdot = QSTR_INIT("..", 2); struct page *page; unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page); + if (!ino) { if (IS_ERR(page)) return ERR_CAST(page); @@ -628,6 +629,7 @@ static const char *f2fs_get_link(struct dentry *dentry, struct delayed_call *done) { const char *link = page_get_link(dentry, inode, done); + if (!IS_ERR(link) && !*link) { /* this is broken symlink case */ do_delayed_call(done); @@ -766,6 +768,7 @@ out_fail: static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); + if (f2fs_empty_dir(inode)) return f2fs_unlink(dir, dentry); return -ENOTEMPTY; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3eb724bb6594..e67ce5f13b98 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -470,6 +470,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* increment version no as node is removed */ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); + nat_set_version(e, inc_node_version(version)); } @@ -1391,7 +1392,7 @@ repeat: goto out_err; } page_hit: - if(unlikely(nid != nid_of_node(page))) { + if (unlikely(nid != nid_of_node(page))) { f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", nid, nid_of_node(page), ino_of_node(page), ofs_of_node(page), cpver_of_node(page), @@ -1783,7 +1784,7 @@ continue_unlock: out: if (nwritten) f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); - return ret ? -EIO: 0; + return ret ? -EIO : 0; } static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) @@ -2125,8 +2126,8 @@ static int __insert_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i) { struct f2fs_nm_info *nm_i = NM_I(sbi); - int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) return err; @@ -2991,6 +2992,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; + set_idx = setvec[found - 1]->set + 1; for (idx = 0; idx < found; idx++) __adjust_nat_entry_set(setvec[idx], &sets, diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index da75d5d52f0a..422146c6d866 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -458,6 +458,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Get the previous summary */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); + if (curseg->segno == segno) { sum = curseg->sum_blk->entries[blkoff]; goto got_it; @@ -875,5 +876,5 @@ out: #endif sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ - return ret ? ret: err; + return ret ? ret : err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5bd0e1d7a00c..0cb1ca88d4aa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1794,7 +1794,7 @@ static int issue_discard_thread(void *data) if (issued > 0) { __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; - } else if (issued == -1){ + } else if (issued == -1) { wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); if (!wait_ms) wait_ms = dpolicy.mid_interval; @@ -2171,6 +2171,7 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, unsigned int segno, int modified) { struct seg_entry *se = get_seg_entry(sbi, segno); + se->type = type; if (modified) __mark_sit_entry_dirty(sbi, segno); @@ -2362,6 +2363,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, { struct curseg_info *curseg = CURSEG_I(sbi, type); void *addr = curseg->sum_blk; + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); } @@ -3779,6 +3781,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) for (j = 0; j < blk_off; j++) { struct f2fs_summary *s; + s = (struct f2fs_summary *)(kaddr + offset); seg_i->sum_blk->entries[j] = *s; offset += SUMMARY_SIZE; @@ -3841,6 +3844,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; + for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { ns->version = 0; ns->ofs_in_node = 0; @@ -3942,6 +3946,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) /* Step 3: write summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { unsigned short blkoff; + seg_i = CURSEG_I(sbi, i); if (sbi->ckpt->alloc_type[i] == SSR) blkoff = sbi->blocks_per_seg; @@ -3978,6 +3983,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { int i, end; + if (IS_DATASEG(type)) end = type + NR_CURSEG_DATA_TYPE; else @@ -4561,6 +4567,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi) /* set use the current segments */ for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { struct curseg_info *curseg_t = CURSEG_I(sbi, type); + __set_test_and_inuse(sbi, curseg_t->segno); } } @@ -4793,7 +4800,8 @@ static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, } static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, - void *data) { + void *data) +{ memcpy(data, zone, sizeof(struct blk_zone)); return 0; } @@ -4909,8 +4917,10 @@ struct check_zone_write_pointer_args { }; static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, - void *data) { + void *data) +{ struct check_zone_write_pointer_args *args; + args = (struct check_zone_write_pointer_args *)data; return check_zone_write_pointer(args->sbi, args->fdev, zone); @@ -5189,6 +5199,7 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi, static void destroy_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + kvfree(dirty_i->victim_secmap); } @@ -5233,6 +5244,7 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) static void destroy_free_segmap(struct f2fs_sb_info *sbi) { struct free_segmap_info *free_i = SM_I(sbi)->free_info; + if (!free_i) return; SM_I(sbi)->free_info = NULL; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c15800c3cdb1..5020152aa8fc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -559,6 +559,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; /* @@ -1892,7 +1893,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) ret = sync_filesystem(sbi->sb); if (ret || err) { - err = ret ? ret: err; + err = ret ? ret : err; goto restore_flag; } @@ -3757,7 +3758,7 @@ try_onemore: sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; for (i = 0; i < NR_PAGE_TYPE; i++) { - int n = (i == META) ? 1: NR_TEMP_TYPE; + int n = (i == META) ? 1 : NR_TEMP_TYPE; int j; sbi->write_io[i] = diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 490f843ec3bf..c8f34decbf8e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -488,6 +488,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, f2fs_wait_on_page_writeback(xpage, NODE, true, true); } else { struct dnode_of_data dn; + set_new_dnode(&dn, inode, NULL, NULL, new_nid); xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { From 38740707c5bc1253069eb932bc6d244f80ec21f0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Apr 2021 17:56:53 +0800 Subject: [PATCH 41/48] f2fs: document: add description about compressed space handling User or developer may still be confused about why f2fs doesn't expose compressed space to userspace, add description about compressed space handling policy into f2fs documentation. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 63c0c49b726d..992bf91eeec8 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -819,6 +819,14 @@ Compression implementation * chattr +c file * chattr +c dir; touch dir/file * mount w/ -o compress_extension=ext; touch file.ext + * mount w/ -o compress_extension=*; touch any_file + +- At this point, compression feature doesn't expose compressed space to user + directly in order to guarantee potential data updates later to the space. + Instead, the main goal is to reduce data writes to flash disk as much as + possible, resulting in extending disk life time as well as relaxing IO + congestion. Alternatively, we've added ioctl interface to reclaim compressed + space and show it to user after putting the immutable bit. Compress metadata layout:: From 453e2ff8e4ff2747acee1799e7ef959970c5cc78 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Apr 2021 17:56:18 +0800 Subject: [PATCH 42/48] f2fs: avoid duplicated codes for cleanup f2fs_segment_has_free_slot() was copied and modified from __next_free_blkoff(), they are almost the same, clean up to reuse common code as much as possible. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0cb1ca88d4aa..41df26292d7d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2635,22 +2635,20 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) curseg->alloc_type = LFS; } -static void __next_free_blkoff(struct f2fs_sb_info *sbi, - struct curseg_info *seg, block_t start) +static int __next_free_blkoff(struct f2fs_sb_info *sbi, + int segno, block_t start) { - struct seg_entry *se = get_seg_entry(sbi, seg->segno); + struct seg_entry *se = get_seg_entry(sbi, segno); int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); unsigned long *target_map = SIT_I(sbi)->tmp_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *cur_map = (unsigned long *)se->cur_valid_map; - int i, pos; + int i; for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); - - seg->next_blkoff = pos; + return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); } /* @@ -2662,26 +2660,16 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, struct curseg_info *seg) { if (seg->alloc_type == SSR) - __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); + seg->next_blkoff = + __next_free_blkoff(sbi, seg->segno, + seg->next_blkoff + 1); else seg->next_blkoff++; } bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) { - struct seg_entry *se = get_seg_entry(sbi, segno); - int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - unsigned long *target_map = SIT_I(sbi)->tmp_map; - unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; - unsigned long *cur_map = (unsigned long *)se->cur_valid_map; - int i, pos; - - for (i = 0; i < entries; i++) - target_map[i] = ckpt_map[i] | cur_map[i]; - - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); - - return pos < sbi->blocks_per_seg; + return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; } /* @@ -2709,7 +2697,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) reset_curseg(sbi, type, 1); curseg->alloc_type = SSR; - __next_free_blkoff(sbi, curseg, 0); + curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); sum_page = f2fs_get_sum_page(sbi, new_segno); if (IS_ERR(sum_page)) { From 594b6d0428ae304e0b44457398beb458b938f005 Mon Sep 17 00:00:00 2001 From: Yi Chen Date: Tue, 13 Apr 2021 17:30:50 +0800 Subject: [PATCH 43/48] f2fs: fix to avoid NULL pointer dereference Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 pc : f2fs_put_page+0x1c/0x26c lr : __revoke_inmem_pages+0x544/0x75c f2fs_put_page+0x1c/0x26c __revoke_inmem_pages+0x544/0x75c __f2fs_commit_inmem_pages+0x364/0x3c0 f2fs_commit_inmem_pages+0xc8/0x1a0 f2fs_ioc_commit_atomic_write+0xa4/0x15c f2fs_ioctl+0x5b0/0x1574 file_ioctl+0x154/0x320 do_vfs_ioctl+0x164/0x740 __arm64_sys_ioctl+0x78/0xa4 el0_svc_common+0xbc/0x1d0 el0_svc_handler+0x74/0x98 el0_svc+0x8/0xc In f2fs_put_page, we access page->mapping is NULL. The root cause is: In some cases, the page refcount and ATOMIC_WRITTEN_PAGE flag miss set for page-priavte flag has been set. We add f2fs_bug_on like this: f2fs_register_inmem_page() { ... f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); f2fs_bug_on(F2FS_I_SB(inode), !IS_ATOMIC_WRITTEN_PAGE(page)); ... } The bug on stack follow link this: PC is at f2fs_register_inmem_page+0x238/0x2b4 LR is at f2fs_register_inmem_page+0x2a8/0x2b4 f2fs_register_inmem_page+0x238/0x2b4 f2fs_set_data_page_dirty+0x104/0x164 set_page_dirty+0x78/0xc8 f2fs_write_end+0x1b4/0x444 generic_perform_write+0x144/0x1cc __generic_file_write_iter+0xc4/0x174 f2fs_file_write_iter+0x2c0/0x350 __vfs_write+0x104/0x134 vfs_write+0xe8/0x19c SyS_pwrite64+0x78/0xb8 To fix this issue, let's add page refcount add page-priavte flag. The page-private flag is not cleared and needs further analysis. Signed-off-by: Chao Yu Signed-off-by: Ge Qiu Signed-off-by: Dehe Gu Signed-off-by: Yi Chen Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 41df26292d7d..6e740ecf0814 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -186,7 +186,10 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct inmem_pages *new; - f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); + if (PagePrivate(page)) + set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); + else + f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); From a7b4e506dcc461c214734d03816c1d47bd88c9a3 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 19 Apr 2021 10:20:03 +0800 Subject: [PATCH 44/48] f2fs: remove unnecessary struct declaration struct dnode_of_data is defined at 897th line. The declaration here is unnecessary. Remove it. Signed-off-by: Wan Jiabing Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 87d734f5589d..984a2a546745 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3311,7 +3311,6 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname); /* * node.c */ -struct dnode_of_data; struct node_info; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); From 509f1010e4fc55e2dbfc036317afd573ccd0931c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 21 Apr 2021 09:54:55 +0800 Subject: [PATCH 45/48] f2fs: avoid using native allocate_segment_by_default() As we did for other cases, in fix_curseg_write_pointer(), let's use wrapped f2fs_allocate_new_section() instead of native allocate_segment_by_default(), by this way, it fixes to cover segment allocation with curseg_lock and sentry_lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 984a2a546745..3ebb951cc426 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3395,7 +3395,7 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type); +void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d697c8900fa7..af7230fb9c1f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1664,7 +1664,7 @@ next_alloc: down_write(&sbi->pin_sem); f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED); + f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); map.m_seg_type = CURSEG_COLD_DATA_PINNED; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6e740ecf0814..efd1e57384b9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2933,7 +2933,7 @@ unlock: } static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, - bool new_sec) + bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; @@ -2941,7 +2941,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, if (!curseg->inited) goto alloc; - if (curseg->next_blkoff || + if (force || curseg->next_blkoff || get_valid_blocks(sbi, curseg->segno, new_sec)) goto alloc; @@ -2953,16 +2953,17 @@ alloc: locate_dirty_segment(sbi, old_segno); } -static void __allocate_new_section(struct f2fs_sb_info *sbi, int type) +static void __allocate_new_section(struct f2fs_sb_info *sbi, + int type, bool force) { - __allocate_new_segment(sbi, type, true); + __allocate_new_segment(sbi, type, true, force); } -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type) +void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_section(sbi, type); + __allocate_new_section(sbi, type, force); up_write(&SIT_I(sbi)->sentry_lock); up_read(&SM_I(sbi)->curseg_lock); } @@ -2974,7 +2975,7 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segment(sbi, i, false); + __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); up_read(&SM_I(sbi)->curseg_lock); } @@ -4844,7 +4845,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) f2fs_notice(sbi, "Assign new section to curseg[%d]: " "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - allocate_segment_by_default(sbi, type, true); + + f2fs_allocate_new_section(sbi, type, true); /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) From 2e22d48dca0bc5b7fccca8d7b6caed80a9d07465 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Apr 2021 14:09:38 +0800 Subject: [PATCH 46/48] f2fs: clean up left deprecated IO trace codes Commit d5f7bc0064e0 ("f2fs: deprecate f2fs_trace_io") left some dead codes, delete them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 ------ fs/f2fs/f2fs.h | 8 -------- 2 files changed, 14 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3c9d797dbdd6..6e46a00c1930 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -76,12 +76,6 @@ bool f2fs_is_compressed_page(struct page *page) return false; if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) return false; - /* - * page->private may be set with pid. - * pid_max is enough to check if it is traced. - */ - if (IS_IO_TRACED_PAGE(page)) - return false; f2fs_bug_on(F2FS_M_SB(page->mapping), *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3ebb951cc426..b9d5317db0a7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1303,14 +1303,6 @@ enum { #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == DUMMY_WRITTEN_PAGE) -#ifdef CONFIG_F2FS_IO_TRACE -#define IS_IO_TRACED_PAGE(page) \ - (page_private(page) > 0 && \ - page_private(page) < (unsigned long)PID_MAX_LIMIT) -#else -#define IS_IO_TRACED_PAGE(page) (0) -#endif - /* For compression */ enum compress_algorithm_type { COMPRESS_LZO, From 8af85f712fce319dd9fe3d41046b5163e7eb0f93 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 21 Apr 2021 16:39:41 +0800 Subject: [PATCH 47/48] f2fs: compress: remove unneed check condition In only call path of __cluster_may_compress(), __f2fs_write_data_pages() has checked SBI_POR_DOING condition, and also cluster_may_compress() has checked CP_ERROR_FLAG condition, so remove redundant check condition in __cluster_may_compress() for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6e46a00c1930..53b13787eb2c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -890,7 +890,6 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) static bool __cluster_may_compress(struct compress_ctx *cc) { - struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); loff_t i_size = i_size_read(cc->inode); unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE); int i; @@ -898,12 +897,7 @@ static bool __cluster_may_compress(struct compress_ctx *cc) for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; - f2fs_bug_on(sbi, !page); - - if (unlikely(f2fs_cp_error(sbi))) - return false; - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - return false; + f2fs_bug_on(F2FS_I_SB(cc->inode), !page); /* beyond EOF */ if (page->index >= nr_pages) From 9557727876674893d35940fddbd03d3b505e7ed8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 22 Apr 2021 18:19:25 +0800 Subject: [PATCH 48/48] f2fs: drop inplace IO if fs status is abnormal If filesystem has cp_error or need_fsck status, let's drop inplace IO to avoid further corruption of fs data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index efd1e57384b9..efac388d2468 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3552,7 +3552,13 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", __func__, segno); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto drop_bio; + } + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) { + err = -EIO; + goto drop_bio; } stat_inc_inplace_blocks(fio->sbi); @@ -3566,6 +3572,15 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } + return err; +drop_bio: + if (fio->bio) { + struct bio *bio = *(fio->bio); + + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + fio->bio = NULL; + } return err; }