From 5db479f049c4fd6cb2d61ade28c73f51487c2f45 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 Apr 2021 11:07:30 +0800 Subject: [PATCH 01/32] f2fs: compress: rename __cluster_may_compress This patch renames __cluster_may_compress() to cluster_has_invalid_data() for better readability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 925a5ca3744a..19097a1d0125 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -876,7 +876,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) return is_page_in_cluster(cc, index); } -static bool __cluster_may_compress(struct compress_ctx *cc) +static bool cluster_has_invalid_data(struct compress_ctx *cc) { loff_t i_size = i_size_read(cc->inode); unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE); @@ -889,9 +889,9 @@ static bool __cluster_may_compress(struct compress_ctx *cc) /* beyond EOF */ if (page->index >= nr_pages) - return false; + return true; } - return true; + return false; } static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) @@ -967,7 +967,7 @@ static bool cluster_may_compress(struct compress_ctx *cc) return false; if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode)))) return false; - return __cluster_may_compress(cc); + return !cluster_has_invalid_data(cc); } static void set_cluster_writeback(struct compress_ctx *cc) From ee68d27181f060fab29e60d1d31aab6a42703dd4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 Apr 2021 11:07:30 +0800 Subject: [PATCH 02/32] f2fs: add cp_error check in f2fs_write_compressed_pages This patch adds cp_error check in f2fs_write_compressed_pages() like we did in f2fs_write_single_data_page() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 19097a1d0125..ac321e217d57 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1216,6 +1216,12 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + mapping_set_error(cc->rpages[0]->mapping, -EIO); + goto out_free; + } + if (IS_NOQUOTA(inode)) { /* * We need to wait for node_write to avoid block allocation during From b763f3bedc2da2edf81bba550430847f561eae0e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Apr 2021 17:20:31 +0800 Subject: [PATCH 03/32] f2fs: restructure f2fs page.private layout Restruct f2fs page private layout for below reasons: There are some cases that f2fs wants to set a flag in a page to indicate a specified status of page: a) page is in transaction list for atomic write b) page contains dummy data for aligned write c) page is migrating for GC d) page contains inline data for inline inode flush e) page belongs to merkle tree, and is verified for fsverity f) page is dirty and has filesystem/inode reference count for writeback g) page is temporary and has decompress io context reference for compression There are existed places in page structure we can use to store f2fs private status/data: - page.flags: PG_checked, PG_private - page.private However it was a mess when we using them, which may cause potential confliction: page.private PG_private PG_checked page._refcount (+1 at most) a) -1 set +1 b) -2 set c), d), e) set f) 0 set +1 g) pointer set The other problem is page.flags has no free slot, if we can avoid set zero to page.private and set PG_private flag, then we use non-zero value to indicate PG_private status, so that we may have chance to reclaim PG_private slot for other usage. [1] The other concern is f2fs has bad scalability in aspect of indicating more page status. So in this patch, let's restructure f2fs' page.private as below to solve above issues: Layout A: lowest bit should be 1 | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... | bit 0 PAGE_PRIVATE_NOT_POINTER bit 1 PAGE_PRIVATE_ATOMIC_WRITE bit 2 PAGE_PRIVATE_DUMMY_WRITE bit 3 PAGE_PRIVATE_ONGOING_MIGRATION bit 4 PAGE_PRIVATE_INLINE_INODE bit 5 PAGE_PRIVATE_REF_RESOURCE bit 6- f2fs private data Layout B: lowest bit should be 0 page.private is a wrapped pointer. After the change: page.private PG_private PG_checked page._refcount (+1 at most) a) 11 set +1 b) 101 set +1 c) 1001 set +1 d) 10001 set +1 e) set f) 100001 set +1 g) pointer set +1 [1] https://lore.kernel.org/linux-f2fs-devel/20210422154705.GO3596236@casper.infradead.org/T/#u Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +- fs/f2fs/compress.c | 10 ++--- fs/f2fs/data.c | 65 +++++++++++++++------------ fs/f2fs/dir.c | 8 +++- fs/f2fs/f2fs.h | 102 +++++++++++++++++++++++++++++++++---------- fs/f2fs/gc.c | 6 +-- fs/f2fs/inline.c | 4 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 10 ++--- fs/f2fs/node.h | 29 ------------ fs/f2fs/segment.c | 19 ++++---- 11 files changed, 148 insertions(+), 111 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f795049e63d5..6c208108d69c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -444,7 +444,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); - f2fs_set_page_private(page, 0); + set_page_private_reference(page); return 1; } return 0; @@ -1018,7 +1018,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - f2fs_set_page_private(page, 0); + set_page_private_reference(page); } void f2fs_remove_dirty_inode(struct inode *inode) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index ac321e217d57..6daa71ef0565 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -74,7 +74,7 @@ bool f2fs_is_compressed_page(struct page *page) return false; if (!page_private(page)) return false; - if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) + if (page_private_nonpointer(page)) return false; f2fs_bug_on(F2FS_M_SB(page->mapping), @@ -85,8 +85,7 @@ bool f2fs_is_compressed_page(struct page *page) static void f2fs_set_compressed_page(struct page *page, struct inode *inode, pgoff_t index, void *data) { - SetPagePrivate(page); - set_page_private(page, (unsigned long)data); + attach_page_private(page, (void *)data); /* i_crypto_info and iv index */ page->index = index; @@ -589,8 +588,7 @@ static void f2fs_compress_free_page(struct page *page) { if (!page) return; - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); + detach_page_private(page); page->mapping = NULL; unlock_page(page); mempool_free(page, compress_page_pool); @@ -1405,7 +1403,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) for (i = 0; i < cic->nr_rpages; i++) { WARN_ON(!cic->rpages[i]); - clear_cold_data(cic->rpages[i]); + clear_page_private_gcing(cic->rpages[i]); end_page_writeback(cic->rpages[i]); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 009a09fb9d88..d352f2bea369 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -58,18 +58,19 @@ static bool __is_cp_guaranteed(struct page *page) if (!mapping) return false; - if (f2fs_is_compressed_page(page)) - return false; - inode = mapping->host; sbi = F2FS_I_SB(inode); if (inode->i_ino == F2FS_META_INO(sbi) || inode->i_ino == F2FS_NODE_INO(sbi) || - S_ISDIR(inode->i_mode) || - (S_ISREG(inode->i_mode) && + S_ISDIR(inode->i_mode)) + return true; + + if (f2fs_is_compressed_page(page)) + return false; + if ((S_ISREG(inode->i_mode) && (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || - is_cold_data(page)) + page_private_gcing(page)) return true; return false; } @@ -299,9 +300,8 @@ static void f2fs_write_end_io(struct bio *bio) struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); - if (IS_DUMMY_WRITTEN_PAGE(page)) { - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); + if (page_private_dummy(page)) { + clear_page_private_dummy(page); unlock_page(page); mempool_free(page, sbi->write_io_dummy); @@ -331,7 +331,7 @@ static void f2fs_write_end_io(struct bio *bio) dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, page)) f2fs_del_fsync_node_entry(sbi, page); - clear_cold_data(page); + clear_page_private_gcing(page); end_page_writeback(page); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && @@ -455,10 +455,11 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, GFP_NOIO | __GFP_NOFAIL); f2fs_bug_on(sbi, !page); - zero_user_segment(page, 0, PAGE_SIZE); - SetPagePrivate(page); - set_page_private(page, DUMMY_WRITTEN_PAGE); lock_page(page); + + zero_user_segment(page, 0, PAGE_SIZE); + set_page_private_dummy(page); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) f2fs_bug_on(sbi, 1); } @@ -2482,9 +2483,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (f2fs_is_atomic_file(inode)) return true; if (fio) { - if (is_cold_data(fio->page)) + if (page_private_gcing(fio->page)) return true; - if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + if (page_private_dummy(fio->page)) return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) @@ -2540,7 +2541,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); - clear_cold_data(page); + clear_page_private_gcing(page); goto out_writepage; } got_it: @@ -2750,7 +2751,7 @@ out: inode_dec_dirty_pages(inode); if (err) { ClearPageUptodate(page); - clear_cold_data(page); + clear_page_private_gcing(page); } if (wbc->for_reclaim) { @@ -3224,7 +3225,7 @@ restart: f2fs_do_read_inline_data(page, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) - set_inline_node(ipage); + set_page_private_inline(ipage); } else { err = f2fs_convert_inline_page(&dn, page); if (err) @@ -3615,12 +3616,13 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, } } - clear_cold_data(page); + clear_page_private_gcing(page); - if (IS_ATOMIC_WRITTEN_PAGE(page)) + if (page_private_atomic(page)) return f2fs_drop_inmem_page(inode, page); - f2fs_clear_page_private(page); + detach_page_private(page); + set_page_private(page, 0); } int f2fs_release_page(struct page *page, gfp_t wait) @@ -3630,11 +3632,13 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 0; /* This is atomic written page, keep Private */ - if (IS_ATOMIC_WRITTEN_PAGE(page)) + if (page_private_atomic(page)) return 0; - clear_cold_data(page); - f2fs_clear_page_private(page); + clear_page_private_gcing(page); + + detach_page_private(page); + set_page_private(page, 0); return 1; } @@ -3650,7 +3654,7 @@ static int f2fs_set_data_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { - if (!IS_ATOMIC_WRITTEN_PAGE(page)) { + if (!page_private_atomic(page)) { f2fs_register_inmem_page(inode, page); return 1; } @@ -3742,7 +3746,7 @@ int f2fs_migrate_page(struct address_space *mapping, { int rc, extra_count; struct f2fs_inode_info *fi = F2FS_I(mapping->host); - bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page); + bool atomic_written = page_private_atomic(page); BUG_ON(PageWriteback(page)); @@ -3778,8 +3782,13 @@ int f2fs_migrate_page(struct address_space *mapping, } if (PagePrivate(page)) { - f2fs_set_page_private(newpage, page_private(page)); - f2fs_clear_page_private(page); + set_page_private(newpage, page_private(page)); + SetPagePrivate(newpage); + get_page(newpage); + + set_page_private(page, 0); + ClearPagePrivate(page); + put_page(page); } if (mode != MIGRATE_SYNC_NO_COPY) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index dc7ce79672b8..96dcc4aca639 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -929,11 +929,15 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_page_cache_dirty_tag(page); clear_page_dirty_for_io(page); - f2fs_clear_page_private(page); ClearPageUptodate(page); - clear_cold_data(page); + + clear_page_private_gcing(page); + inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); + + detach_page_private(page); + set_page_private(page, 0); } f2fs_put_page(page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c83d90125ebd..f26a1c34102a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1291,17 +1291,85 @@ enum { */ }; -/* - * this value is set in page as a private data which indicate that - * the page is atomically written, and it is in inmem_pages list. - */ -#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) -#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) +static inline int f2fs_test_bit(unsigned int nr, char *addr); +static inline void f2fs_set_bit(unsigned int nr, char *addr); +static inline void f2fs_clear_bit(unsigned int nr, char *addr); -#define IS_ATOMIC_WRITTEN_PAGE(page) \ - (page_private(page) == ATOMIC_WRITTEN_PAGE) -#define IS_DUMMY_WRITTEN_PAGE(page) \ - (page_private(page) == DUMMY_WRITTEN_PAGE) +/* + * Layout of f2fs page.private: + * + * Layout A: lowest bit should be 1 + * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... | + * bit 0 PAGE_PRIVATE_NOT_POINTER + * bit 1 PAGE_PRIVATE_ATOMIC_WRITE + * bit 2 PAGE_PRIVATE_DUMMY_WRITE + * bit 3 PAGE_PRIVATE_ONGOING_MIGRATION + * bit 4 PAGE_PRIVATE_INLINE_INODE + * bit 5 PAGE_PRIVATE_REF_RESOURCE + * bit 6- f2fs private data + * + * Layout B: lowest bit should be 0 + * page.private is a wrapped pointer. + */ +enum { + PAGE_PRIVATE_NOT_POINTER, /* private contains non-pointer data */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ + PAGE_PRIVATE_DUMMY_WRITE, /* data page for padding aligned IO */ + PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ + PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ + PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_MAX +}; + +#define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool page_private_##name(struct page *page) \ +{ \ + return test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \ + test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ +} + +#define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void set_page_private_##name(struct page *page) \ +{ \ + if (!PagePrivate(page)) { \ + get_page(page); \ + SetPagePrivate(page); \ + } \ + set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \ + set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ +} + +#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void clear_page_private_##name(struct page *page) \ +{ \ + clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ + if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) { \ + set_page_private(page, 0); \ + if (PagePrivate(page)) { \ + ClearPagePrivate(page); \ + put_page(page); \ + }\ + } \ +} + +PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); +PAGE_PRIVATE_GET_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); +PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE); + +PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); +PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE); + +PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); +PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); /* For compression */ enum compress_algorithm_type { @@ -3169,20 +3237,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr) return true; } -static inline void f2fs_set_page_private(struct page *page, - unsigned long data) -{ - if (PagePrivate(page)) - return; - - attach_page_private(page, (void *)data); -} - -static inline void f2fs_clear_page_private(struct page *page) -{ - detach_page_private(page); -} - /* * file.c */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8d1f17ab94d8..ff54db6eb1a1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1336,7 +1336,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; } set_page_dirty(page); - set_cold_data(page); + set_page_private_gcing(page); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -1362,11 +1362,11 @@ retry: f2fs_remove_dirty_inode(inode); } - set_cold_data(page); + set_page_private_gcing(page); err = f2fs_do_write_data_page(&fio); if (err) { - clear_cold_data(page); + clear_page_private_gcing(page); if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 92652ca7a7c8..56a20d5c15da 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -173,7 +173,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* clear inline data and flag after data writeback */ f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0); - clear_inline_node(dn->inode_page); + clear_page_private_inline(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); @@ -255,7 +255,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_inline_node(dn.inode_page); + clear_page_private_inline(dn.inode_page); f2fs_put_dnode(&dn); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b401f08569f7..cbda7ca3b3be 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -646,7 +646,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) /* deleted inode */ if (inode->i_nlink == 0) - clear_inline_node(node_page); + clear_page_private_inline(node_page); F2FS_I(inode)->i_disk_time[0] = inode->i_atime; F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e67ce5f13b98..3a8f7afa5059 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1860,8 +1860,8 @@ continue_unlock: } /* flush inline_data, if it's async context. */ - if (is_inline_node(page)) { - clear_inline_node(page); + if (page_private_inline(page)) { + clear_page_private_inline(page); unlock_page(page); flush_inline_data(sbi, ino_of_node(page)); continue; @@ -1941,8 +1941,8 @@ continue_unlock: goto write_node; /* flush inline_data */ - if (is_inline_node(page)) { - clear_inline_node(page); + if (page_private_inline(page)) { + clear_page_private_inline(page); unlock_page(page); flush_inline_data(sbi, ino_of_node(page)); goto lock_node; @@ -2096,7 +2096,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); - f2fs_set_page_private(page, 0); + set_page_private_reference(page); return 1; } return 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 7a45c0f10629..d85e8659cfda 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -389,20 +389,6 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_cold_data(struct page *page) -{ - return PageChecked(page); -} - -static inline void set_cold_data(struct page *page) -{ - SetPageChecked(page); -} - -static inline void clear_cold_data(struct page *page) -{ - ClearPageChecked(page); -} static inline int is_node(struct page *page, int type) { @@ -414,21 +400,6 @@ static inline int is_node(struct page *page, int type) #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) -static inline int is_inline_node(struct page *page) -{ - return PageChecked(page); -} - -static inline void set_inline_node(struct page *page) -{ - SetPageChecked(page); -} - -static inline void clear_inline_node(struct page *page) -{ - ClearPageChecked(page); -} - static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 51dc79fad4fe..8668df7870d0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -186,10 +186,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct inmem_pages *new; - if (PagePrivate(page)) - set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); - else - f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); + set_page_private_atomic(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); @@ -272,9 +269,10 @@ next: /* we don't need to invalidate this in the sccessful status */ if (drop || recover) { ClearPageUptodate(page); - clear_cold_data(page); + clear_page_private_gcing(page); } - f2fs_clear_page_private(page); + detach_page_private(page); + set_page_private(page, 0); f2fs_put_page(page, 1); list_del(&cur->list); @@ -357,7 +355,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) struct list_head *head = &fi->inmem_pages; struct inmem_pages *cur = NULL; - f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); + f2fs_bug_on(sbi, !page_private_atomic(page)); mutex_lock(&fi->inmem_lock); list_for_each_entry(cur, head, list) { @@ -373,9 +371,12 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) kmem_cache_free(inmem_entry_slab, cur); ClearPageUptodate(page); - f2fs_clear_page_private(page); + clear_page_private_atomic(page); f2fs_put_page(page, 0); + detach_page_private(page); + set_page_private(page, 0); + trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); } @@ -3289,7 +3290,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; - if (is_cold_data(fio->page)) { + if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && (fio->sbi->gc_mode != GC_URGENT_HIGH)) From cad83c968c2ebe97905f900326988ed37146c347 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 7 May 2021 18:10:38 +0800 Subject: [PATCH 04/32] f2fs: fix to avoid racing on fsync_entry_slab by multi filesystem instances As syzbot reported, there is an use-after-free issue during f2fs recovery: Use-after-free write at 0xffff88823bc16040 (in kfence-#10): kmem_cache_destroy+0x1f/0x120 mm/slab_common.c:486 f2fs_recover_fsync_data+0x75b0/0x8380 fs/f2fs/recovery.c:869 f2fs_fill_super+0x9393/0xa420 fs/f2fs/super.c:3945 mount_bdev+0x26c/0x3a0 fs/super.c:1367 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1497 do_new_mount fs/namespace.c:2905 [inline] path_mount+0x196f/0x2be0 fs/namespace.c:3235 do_mount fs/namespace.c:3248 [inline] __do_sys_mount fs/namespace.c:3456 [inline] __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3433 do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae The root cause is multi f2fs filesystem instances can race on accessing global fsync_entry_slab pointer, result in use-after-free issue of slab cache, fixes to init/destroy this slab cache only once during module init/destroy procedure to avoid this issue. Reported-by: syzbot+9d90dad32dd9727ed084@syzkaller.appspotmail.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/recovery.c | 23 ++++++++++++++--------- fs/f2fs/super.c | 8 +++++++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f26a1c34102a..c0bead0df66a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3620,6 +3620,8 @@ void f2fs_destroy_garbage_collection_cache(void); */ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi); +int __init f2fs_create_recovery_cache(void); +void f2fs_destroy_recovery_cache(void); /* * debug.c diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 422146c6d866..4b2f7d1d5bf4 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -788,13 +788,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif - fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", - sizeof(struct fsync_inode_entry)); - if (!fsync_entry_slab) { - err = -ENOMEM; - goto out; - } - INIT_LIST_HEAD(&inode_list); INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); @@ -867,8 +860,6 @@ skip: } } - kmem_cache_destroy(fsync_entry_slab); -out: #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) @@ -878,3 +869,17 @@ out: return ret ? ret : err; } + +int __init f2fs_create_recovery_cache(void) +{ + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", + sizeof(struct fsync_inode_entry)); + if (!fsync_entry_slab) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_recovery_cache(void) +{ + kmem_cache_destroy(fsync_entry_slab); +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7d325bfaf65a..096492caaa6b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4227,9 +4227,12 @@ static int __init init_f2fs_fs(void) err = f2fs_create_checkpoint_caches(); if (err) goto free_segment_manager_caches; - err = f2fs_create_extent_cache(); + err = f2fs_create_recovery_cache(); if (err) goto free_checkpoint_caches; + err = f2fs_create_extent_cache(); + if (err) + goto free_recovery_cache; err = f2fs_create_garbage_collection_cache(); if (err) goto free_extent_cache; @@ -4278,6 +4281,8 @@ free_garbage_collection_cache: f2fs_destroy_garbage_collection_cache(); free_extent_cache: f2fs_destroy_extent_cache(); +free_recovery_cache: + f2fs_destroy_recovery_cache(); free_checkpoint_caches: f2fs_destroy_checkpoint_caches(); free_segment_manager_caches: @@ -4303,6 +4308,7 @@ static void __exit exit_f2fs_fs(void) f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); f2fs_destroy_extent_cache(); + f2fs_destroy_recovery_cache(); f2fs_destroy_checkpoint_caches(); f2fs_destroy_segment_manager_caches(); f2fs_destroy_node_manager_caches(); From d927ccfccb009ede24448d69c08b12e7c8a6979b Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 10 May 2021 20:24:44 +0900 Subject: [PATCH 05/32] f2fs: Prevent swap file in LFS mode The kernel writes to swap files on f2fs directly without the assistance of the filesystem. This direct write by kernel can be non-sequential even when the f2fs is in LFS mode. Such non-sequential write conflicts with the LFS semantics. Especially when f2fs is set up on zoned block devices, the non-sequential write causes unaligned write command errors. To avoid the non-sequential writes to swap files, prevent swap file activation when the filesystem is in LFS mode. Fixes: 4969c06a0d83 ("f2fs: support swap file w/ DIO") Signed-off-by: Shin'ichiro Kawasaki Cc: stable@vger.kernel.org # v5.10+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d352f2bea369..d4795eda12fa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4076,6 +4076,12 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (f2fs_readonly(F2FS_I_SB(inode)->sb)) return -EROFS; + if (f2fs_lfs_mode(F2FS_I_SB(inode))) { + f2fs_err(F2FS_I_SB(inode), + "Swapfile not supported in LFS mode"); + return -EINVAL; + } + ret = f2fs_convert_inline_inode(inode); if (ret) return ret; From 89e53ff1651a61cf2abef9356e2f60d0086215be Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 11 May 2021 18:17:34 +0800 Subject: [PATCH 06/32] f2fs: atgc: fix to set default age threshold Default age threshold value is missed to set, fix it. Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Reported-by: Sahitya Tummala Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ff54db6eb1a1..bcb3b488dbca 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1822,6 +1822,7 @@ static void init_atgc_management(struct f2fs_sb_info *sbi) am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO; am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT; am->age_weight = DEF_GC_THREAD_AGE_WEIGHT; + am->age_threshold = DEF_GC_THREAD_AGE_THRESHOLD; } void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) From fbec3b963ae1d5610602c03336597cf0396cda62 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 May 2021 17:52:56 +0800 Subject: [PATCH 07/32] f2fs: compress: remove unneeded f2fs_put_dnode() If we don't initialize dn.inode_page for f2fs_get_block(), f2fs_get_block() will call f2fs_put_dnode() itself, so let's remove unneeded f2fs_put_dnode() in f2fs_vm_page_mkwrite(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ceb575f99048..895dfcf45fe1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -117,7 +117,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_block(&dn, page->index); - f2fs_put_dnode(&dn); f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); } From 91f0fb6903ed30370135381f10c02a10c7872cdc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 May 2021 17:52:57 +0800 Subject: [PATCH 08/32] f2fs: compress: clean up parameter of __f2fs_cluster_blocks() Previously, in order to reuse __f2fs_cluster_blocks(), f2fs_is_compressed_cluster() assigned a compress_ctx type variable, which is used to pass few parameters (cc.inode, cc.cluster_size, cc.cluster_idx), it's wasteful to allocate such large space in stack. Let's clean up parameters of __f2fs_cluster_blocks() to avoid that. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6daa71ef0565..ad30f8dde541 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -892,14 +892,17 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) return false; } -static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) +static int __f2fs_cluster_blocks(struct inode *inode, + unsigned int cluster_idx, bool compr) { struct dnode_of_data dn; + unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; + unsigned int start_idx = cluster_idx << + F2FS_I(inode)->i_log_cluster_size; int ret; - set_new_dnode(&dn, cc->inode, NULL, NULL, 0); - ret = f2fs_get_dnode_of_data(&dn, start_idx_of_cluster(cc), - LOOKUP_NODE); + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (ret) { if (ret == -ENOENT) ret = 0; @@ -910,7 +913,7 @@ static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) int i; ret = 1; - for (i = 1; i < cc->cluster_size; i++) { + for (i = 1; i < cluster_size; i++) { block_t blkaddr; blkaddr = data_blkaddr(dn.inode, @@ -932,25 +935,15 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); } /* return # of valid blocks in compressed cluster */ -static int f2fs_cluster_blocks(struct compress_ctx *cc) -{ - return __f2fs_cluster_blocks(cc, false); -} - int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { - struct compress_ctx cc = { - .inode = inode, - .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, - .cluster_size = F2FS_I(inode)->i_cluster_size, - .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, - }; - - return f2fs_cluster_blocks(&cc); + return __f2fs_cluster_blocks(inode, + index >> F2FS_I(inode)->i_log_cluster_size, + false); } static bool cluster_may_compress(struct compress_ctx *cc) @@ -1001,7 +994,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, bool prealloc; retry: - ret = f2fs_cluster_blocks(cc); + ret = f2fs_is_compressed_cluster(cc->inode, start_idx); if (ret <= 0) return ret; From 4f55dc2a988b304d3595887f1161151d1c3b1f33 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 15 May 2021 11:09:41 -0700 Subject: [PATCH 09/32] f2fs: return success if there is no work to do Static analysis reports this problem file.c:3206:2: warning: Undefined or garbage value returned to caller return err; ^~~~~~~~~~ err is only set if there is some work to do. Because the loop returns immediately on an error, if all the work was done, a 0 would be returned. Instead of checking the unlikely case that there was no work to do, change the return of err to 0. Signed-off-by: Tom Rix Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 895dfcf45fe1..a6be76289452 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3202,7 +3202,7 @@ int f2fs_precache_extents(struct inode *inode) map.m_lblk = m_next_extent; } - return err; + return 0; } static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) From 0dd571785d61528d62cdd8aa49d76bc6085152fe Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 May 2021 09:57:54 +0800 Subject: [PATCH 10/32] f2fs: add MODULE_SOFTDEP to ensure crc32 is included in the initramfs As marcosfrm reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213089 Initramfs generators rely on "pre" softdeps (and "depends") to include additional required modules. F2FS does not declare "pre: crc32" softdep. Then every generator (dracut, mkinitcpio...) has to maintain a hardcoded list for this purpose. Hence let's use MODULE_SOFTDEP("pre: crc32") in f2fs code. Fixes: 43b6573bac95 ("f2fs: use cryptoapi crc32 functions") Reported-by: marcosfrm Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 096492caaa6b..b29de80ab60e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4321,4 +4321,5 @@ module_exit(exit_f2fs_fs) MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32"); From e3c548323d32b11d3fba71f993e17b0ccdeca5cb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 May 2021 11:10:53 -0700 Subject: [PATCH 11/32] f2fs: let's allow compression for mmap files This patch allows to compress mmap files. E.g., for so files. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index ad30f8dde541..279f79f4fb1c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -952,8 +952,6 @@ static bool cluster_may_compress(struct compress_ctx *cc) return false; if (f2fs_is_atomic_file(cc->inode)) return false; - if (f2fs_is_mmap_file(cc->inode)) - return false; if (!f2fs_cluster_is_full(cc)) return false; if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode)))) From 4a67d9b07ac8dce7f1034e0d887f2f4ee00fe118 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 May 2021 17:54:58 +0800 Subject: [PATCH 12/32] f2fs: compress: fix to disallow temp extension This patch restricts to configure compress extension as format of: [filename + '.' + extension] rather than: [filename + '.' + extension + (optional: '.' + temp extension)] in order to avoid to enable compression incorrectly: 1. compress_extension=so 2. touch file.soa 3. touch file.so.tmp Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a9cd9cf97229..d4139e166b95 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -153,7 +153,8 @@ fail_drop: return ERR_PTR(err); } -static inline int is_extension_exist(const unsigned char *s, const char *sub) +static inline int is_extension_exist(const unsigned char *s, const char *sub, + bool tmp_ext) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -169,6 +170,13 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) if (slen < sublen + 2) return 0; + if (!tmp_ext) { + /* file has no temp extension */ + if (s[slen - sublen - 1] != '.') + return 0; + return !strncasecmp(s + slen - sublen, sub, sublen); + } + for (i = 1; i < slen - sublen; i++) { if (s[i] != '.') continue; @@ -194,7 +202,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) { - if (is_extension_exist(name, extlist[i])) + if (is_extension_exist(name, extlist[i], true)) break; } @@ -295,7 +303,7 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, hot_count = sbi->raw_super->hot_ext_count; for (i = cold_count; i < cold_count + hot_count; i++) { - if (is_extension_exist(name, extlist[i])) { + if (is_extension_exist(name, extlist[i], false)) { up_read(&sbi->sb_lock); return; } @@ -306,7 +314,7 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, ext = F2FS_OPTION(sbi).extensions; for (i = 0; i < ext_cnt; i++) { - if (!is_extension_exist(name, ext[i])) + if (!is_extension_exist(name, ext[i], false)) continue; set_compress_context(inode); From 8939a8489ca64b56f49428b0d882709080a928d4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 May 2021 10:07:19 +0800 Subject: [PATCH 13/32] f2fs: atgc: export entries for better tunability via sysfs This patch export below sysfs entries for better ATGC tunability. /sys/fs/f2fs//atgc_candidate_ratio /sys/fs/f2fs//atgc_candidate_count /sys/fs/f2fs//atgc_age_weight /sys/fs/f2fs//atgc_age_threshold Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 28 +++++++++++++++++++++++++ fs/f2fs/sysfs.c | 27 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 4849b8e84e42..5088281e312e 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -438,3 +438,31 @@ Description: Show the count of inode newly enabled for compression since mount. Note that when the compression is disabled for the files, this count doesn't decrease. If you write "0" here, you can initialize compr_new_inode to "0". + +What: /sys/fs/f2fs//atgc_candidate_ratio +Date: May 2021 +Contact: "Chao Yu" +Description: When ATGC is on, it controls candidate ratio in order to limit total + number of potential victim in all candidates, the value should be in + range of [0, 100], by default it was initialized as 20(%). + +What: /sys/fs/f2fs//atgc_candidate_count +Date: May 2021 +Contact: "Chao Yu" +Description: When ATGC is on, it controls candidate count in order to limit total + number of potential victim in all candidates, by default it was + initialized as 10 (sections). + +What: /sys/fs/f2fs//atgc_age_weight +Date: May 2021 +Contact: "Chao Yu" +Description: When ATGC is on, it controls age weight to balance weight proportion + in between aging and valid blocks, the value should be in range of + [0, 100], by default it was initialized as 60(%). + +What: /sys/fs/f2fs//atgc_age_threshold +Date: May 2021 +Contact: "Chao Yu" +Description: When ATGC is on, it controls age threshold to bypass GCing young + candidates whose age is not beyond the threshold, by default it was + initialized as 604800 seconds (equals to 7 days). diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 39b522ec73e7..dc71bc968c72 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -37,6 +37,7 @@ enum { #endif RESERVED_BLOCKS, /* struct f2fs_sb_info */ CPRC_INFO, /* struct ckpt_req_control */ + ATGC_INFO, /* struct atgc_management */ }; struct f2fs_attr { @@ -75,6 +76,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #endif else if (struct_type == CPRC_INFO) return (unsigned char *)&sbi->cprc_info; + else if (struct_type == ATGC_INFO) + return (unsigned char *)&sbi->am; return NULL; } @@ -495,6 +498,20 @@ out: } #endif + if (!strcmp(a->attr.name, "atgc_candidate_ratio")) { + if (t > 100) + return -EINVAL; + sbi->am.candidate_ratio = t; + return count; + } + + if (!strcmp(a->attr.name, "atgc_age_weight")) { + if (t > 100) + return -EINVAL; + sbi->am.age_weight = t; + return count; + } + *ui = (unsigned int)t; return count; @@ -710,6 +727,11 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); #endif +/* For ATGC */ +F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_ratio, candidate_ratio); +F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_count, max_candidate_count); +F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_weight, age_weight); +F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -778,6 +800,11 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(compr_saved_block), ATTR_LIST(compr_new_inode), #endif + /* For ATGC */ + ATTR_LIST(atgc_candidate_ratio), + ATTR_LIST(atgc_candidate_count), + ATTR_LIST(atgc_age_weight), + ATTR_LIST(atgc_age_threshold), NULL, }; ATTRIBUTE_GROUPS(f2fs); From 8f1d49832636d514e949b29ce64370ebebf6d6d2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 May 2021 17:52:58 +0800 Subject: [PATCH 14/32] f2fs: compress: remove unneeded preallocation We will reserve iblocks for compression saved, so during compressed cluster overwrite, we don't need to preallocate blocks for later write. In addition, it adds a bug_on to detect wrong reserved iblock number in __f2fs_cluster_blocks(). Bug fix in the original patch by Jaegeuk: If we released compressed blocks having an immutable bit, we can see less number of compressed block addresses. Let's fix wrong BUG_ON. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 27 +++------------------------ fs/f2fs/file.c | 4 ---- 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 279f79f4fb1c..bec92ff5ee7d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -926,6 +926,9 @@ static int __f2fs_cluster_blocks(struct inode *inode, ret++; } } + + f2fs_bug_on(F2FS_I_SB(inode), + !compr && ret != cluster_size && !IS_IMMUTABLE(inode)); } fail: f2fs_put_dnode(&dn); @@ -984,21 +987,16 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct address_space *mapping = cc->inode->i_mapping; struct page *page; - struct dnode_of_data dn; sector_t last_block_in_bio; unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; pgoff_t start_idx = start_idx_of_cluster(cc); int i, ret; - bool prealloc; retry: ret = f2fs_is_compressed_cluster(cc->inode, start_idx); if (ret <= 0) return ret; - /* compressed case */ - prealloc = (ret < cc->cluster_size); - ret = f2fs_init_compress_ctx(cc); if (ret) return ret; @@ -1056,25 +1054,6 @@ release_and_retry: } } - if (prealloc) { - f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); - - set_new_dnode(&dn, cc->inode, NULL, NULL, 0); - - for (i = cc->cluster_size - 1; i > 0; i--) { - ret = f2fs_get_block(&dn, start_idx + i); - if (ret) { - i = cc->cluster_size; - break; - } - - if (dn.data_blkaddr != NEW_ADDR) - break; - } - - f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - } - if (likely(!ret)) { *fsdata = cc->rpages; *pagep = cc->rpages[offset_in_cluster(cc, index)]; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a6be76289452..4a8c3128b5a5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -85,10 +85,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) err = ret; goto err; } else if (ret) { - if (ret < F2FS_I(inode)->i_cluster_size) { - err = -EAGAIN; - goto err; - } need_alloc = false; } } From c61404153eb683da9c35aad133131554861ed561 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 May 2021 11:39:35 -0700 Subject: [PATCH 15/32] f2fs: introduce FI_COMPRESS_RELEASED instead of using IMMUTABLE bit Once we release compressed blocks, we used to set IMMUTABLE bit. But it turned out it disallows every fs operations which we don't need for compression. Let's just prevent writing data only. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 ++- fs/f2fs/f2fs.h | 6 ++++++ fs/f2fs/file.c | 18 ++++++++++++------ include/linux/f2fs_fs.h | 1 + 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index bec92ff5ee7d..1c3e98085591 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -928,7 +928,8 @@ static int __f2fs_cluster_blocks(struct inode *inode, } f2fs_bug_on(F2FS_I_SB(inode), - !compr && ret != cluster_size && !IS_IMMUTABLE(inode)); + !compr && ret != cluster_size && + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)); } fail: f2fs_put_dnode(&dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c0bead0df66a..eaf57b5f3c4b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -706,6 +706,7 @@ enum { FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */ FI_MMAP_FILE, /* indicate file was mmapped */ FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ + FI_COMPRESS_RELEASED, /* compressed blocks were released */ FI_MAX, /* max flag, never be used */ }; @@ -2746,6 +2747,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: + case FI_COMPRESS_RELEASED: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -2867,6 +2869,8 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) set_bit(FI_PIN_FILE, fi->flags); + if (ri->i_inline & F2FS_COMPRESS_RELEASED) + set_bit(FI_COMPRESS_RELEASED, fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -2887,6 +2891,8 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) ri->i_inline |= F2FS_PIN_FILE; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) + ri->i_inline |= F2FS_COMPRESS_RELEASED; } static inline int f2fs_has_extra_attr(struct inode *inode) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4a8c3128b5a5..4714925e1974 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -63,6 +63,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (unlikely(IS_IMMUTABLE(inode))) return VM_FAULT_SIGBUS; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) + return VM_FAULT_SIGBUS; + if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto err; @@ -3420,7 +3423,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (IS_IMMUTABLE(inode)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3429,8 +3432,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL; - f2fs_set_inode_flags(inode); + set_inode_flag(inode, FI_COMPRESS_RELEASED); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); @@ -3585,7 +3587,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) inode_lock(inode); - if (!IS_IMMUTABLE(inode)) { + if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } @@ -3630,8 +3632,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) up_write(&F2FS_I(inode)->i_mmap_sem); if (ret >= 0) { - F2FS_I(inode)->i_flags &= ~F2FS_IMMUTABLE_FL; - f2fs_set_inode_flags(inode); + clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); } @@ -4249,6 +4250,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto unlock; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EPERM; + goto unlock; + } + ret = generic_write_checks(iocb, from); if (ret > 0) { bool preallocated = false; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5487a80617a3..f93000c3a127 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -229,6 +229,7 @@ struct f2fs_extent { #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ +#define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */ struct f2fs_inode { __le16 i_mode; /* file mode */ From 833dcd35453713ced96e086daecf7f023709e6a4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 26 May 2021 13:05:36 -0700 Subject: [PATCH 16/32] f2fs: logging neatening Update the logging uses that have unnecessary newlines as the f2fs_printk function and so its f2fs_ macro callers already adds one. This allows searching single line logging entries with an easier grep and also avoids unnecessary blank lines in the logging. Miscellanea: o Coalesce formats o Align to open parenthesis Signed-off-by: Joe Perches Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 +++++++--------- fs/f2fs/file.c | 12 +++++------- fs/f2fs/gc.c | 4 ++-- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 4 ++-- 5 files changed, 17 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d4795eda12fa..37f4ab79d014 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3833,7 +3833,7 @@ static int f2fs_is_file_aligned(struct inode *inode) /* hole */ if (!(map.m_flags & F2FS_MAP_FLAGS)) { - f2fs_err(sbi, "Swapfile has holes\n"); + f2fs_err(sbi, "Swapfile has holes"); ret = -ENOENT; goto out; } @@ -3854,9 +3854,8 @@ static int f2fs_is_file_aligned(struct inode *inode) cur_lblock += nr_pblocks; } if (not_aligned) - f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n" - "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", - not_aligned); + f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", + not_aligned); out: return ret; } @@ -3904,7 +3903,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, /* hole */ if (!(map.m_flags & F2FS_MAP_FLAGS)) { - f2fs_err(sbi, "Swapfile has holes\n"); + f2fs_err(sbi, "Swapfile has holes"); ret = -EINVAL; goto out; } @@ -3950,9 +3949,8 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sis->highest_bit = cur_lblock - 1; if (not_aligned) - f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n" - "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", - not_aligned); + f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", + not_aligned); out: return ret; } @@ -4060,7 +4058,7 @@ reprobe: out: return ret; bad_bmap: - f2fs_err(sbi, "Swapfile has holes\n"); + f2fs_err(sbi, "Swapfile has holes"); return -EINVAL; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4714925e1974..6afd4562335f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3235,7 +3235,7 @@ static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { f2fs_warn(F2FS_I_SB(inode), - "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n", + "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", inode->i_ino); return -EOPNOTSUPP; } @@ -4019,9 +4019,8 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) LLONG_MAX); if (ret) - f2fs_warn(sbi, "%s: The file might be partially decompressed " - "(errno=%d). Please delete the file.\n", - __func__, ret); + f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", + __func__, ret); out: inode_unlock(inode); file_end_write(filp); @@ -4093,9 +4092,8 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) clear_inode_flag(inode, FI_ENABLE_COMPRESS); if (ret) - f2fs_warn(sbi, "%s: The file might be partially compressed " - "(errno=%d). Please delete the file.\n", - __func__, ret); + f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", + __func__, ret); out: inode_unlock(inode); file_end_write(filp); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bcb3b488dbca..ab1c0123904f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1031,8 +1031,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (unlikely(check_valid_map(sbi, segno, offset))) { if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) { - f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n", - blkaddr, source_blkaddr, segno); + f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u", + blkaddr, source_blkaddr, segno); f2fs_bug_on(sbi, 1); } } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8668df7870d0..380ef34e1a59 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3920,7 +3920,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { - f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", + f2fs_err(sbi, "invalid journal entries nats %u sits %u", nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b29de80ab60e..56f2f2f449c5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1153,7 +1153,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); + f2fs_err(sbi, "LFS not compatible with checkpoint=disable"); return -EINVAL; } @@ -3555,7 +3555,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && !f2fs_sb_has_blkzoned(sbi)) { - f2fs_err(sbi, "Zoned block device feature not enabled\n"); + f2fs_err(sbi, "Zoned block device feature not enabled"); return -EINVAL; } if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { From a7d9fe3c33887085a2e10c085d378126314dc222 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 May 2021 01:32:53 -0700 Subject: [PATCH 17/32] f2fs: support RO feature Given RO feature in superblock, we don't need to check provisioning/reserve spaces and SSA area. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/segment.c | 4 ++++ fs/f2fs/super.c | 37 +++++++++++++++++++++++++++++++------ fs/f2fs/sysfs.c | 8 ++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eaf57b5f3c4b..8903c43091f8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -168,6 +168,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_SB_CHKSUM 0x0800 #define F2FS_FEATURE_CASEFOLD 0x1000 #define F2FS_FEATURE_COMPRESSION 0x2000 +#define F2FS_FEATURE_RO 0x4000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -940,6 +941,7 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, #define NR_CURSEG_DATA_TYPE (3) #define NR_CURSEG_NODE_TYPE (3) #define NR_CURSEG_INMEM_TYPE (2) +#define NR_CURSEG_RO_TYPE (2) #define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) #define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE) @@ -4128,6 +4130,7 @@ F2FS_FEATURE_FUNCS(verity, VERITY); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); +F2FS_FEATURE_FUNCS(readonly, RO); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 380ef34e1a59..54847eebc5ca 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4683,6 +4683,10 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) struct seg_entry *se = get_seg_entry(sbi, curseg->segno); unsigned int blkofs = curseg->next_blkoff; + if (f2fs_sb_has_readonly(sbi) && + i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE) + continue; + sanity_check_seg_type(sbi, curseg->seg_type); if (f2fs_test_bit(blkofs, se->cur_valid_map)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 56f2f2f449c5..3e0e34b4680c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -555,7 +555,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) int ret; if (!options) - return 0; + goto default_check; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -1090,6 +1090,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) return -EINVAL; } } +default_check: #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) return -EINVAL; @@ -1162,6 +1163,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) */ if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + + if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { + f2fs_err(sbi, "Allow to mount readonly mode only"); + return -EROFS; + } return 0; } @@ -1819,7 +1825,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ - F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE; + if (f2fs_sb_has_readonly(sbi)) + F2FS_OPTION(sbi).active_logs = NR_CURSEG_RO_TYPE; + else + F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE; + F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; @@ -2004,6 +2014,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) goto skip; + if (f2fs_sb_has_readonly(sbi) && !(*flags & SB_RDONLY)) { + err = -EROFS; + goto restore_opts; + } + #ifdef CONFIG_QUOTA if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); @@ -3137,14 +3152,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); - if (unlikely(fsmeta < F2FS_MIN_META_SEGMENTS || + if (!f2fs_sb_has_readonly(sbi) && + unlikely(fsmeta < F2FS_MIN_META_SEGMENTS || ovp_segments == 0 || reserved_segments == 0)) { f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version"); return 1; } - user_block_count = le64_to_cpu(ckpt->user_block_count); - segment_count_main = le32_to_cpu(raw_super->segment_count_main); + segment_count_main = le32_to_cpu(raw_super->segment_count_main) + + (f2fs_sb_has_readonly(sbi) ? 1 : 0); log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (!user_block_count || user_block_count >= segment_count_main << log_blocks_per_seg) { @@ -3175,6 +3191,10 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; + + if (f2fs_sb_has_readonly(sbi)) + goto check_data; + for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_node_segno[j])) { @@ -3185,10 +3205,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } } } +check_data: for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; + + if (f2fs_sb_has_readonly(sbi)) + goto skip_cross; + for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { @@ -3210,7 +3235,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } } } - +skip_cross: sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index dc71bc968c72..c579d5d3a916 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -158,6 +158,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_casefold(sbi)) len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); + if (f2fs_sb_has_readonly(sbi)) + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "readonly"); if (f2fs_sb_has_compression(sbi)) len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "compression"); @@ -578,6 +581,7 @@ enum feat_id { FEAT_SB_CHECKSUM, FEAT_CASEFOLD, FEAT_COMPRESSION, + FEAT_RO, FEAT_TEST_DUMMY_ENCRYPTION_V2, }; @@ -599,6 +603,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_SB_CHECKSUM: case FEAT_CASEFOLD: case FEAT_COMPRESSION: + case FEAT_RO: case FEAT_TEST_DUMMY_ENCRYPTION_V2: return sprintf(buf, "supported\n"); } @@ -721,12 +726,14 @@ F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +F2FS_FEATURE_RO_ATTR(readonly, FEAT_RO); #ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); #endif + /* For ATGC */ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_ratio, candidate_ratio); F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_count, max_candidate_count); @@ -830,6 +837,7 @@ static struct attribute *f2fs_feat_attrs[] = { #endif ATTR_LIST(sb_checksum), ATTR_LIST(casefold), + ATTR_LIST(readonly), #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compression), #endif From 39307f8ee3539478c28e71b4909b5b028cce14b1 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 3 Jun 2021 09:50:37 +0000 Subject: [PATCH 18/32] f2fs: Show casefolding support only when supported The casefolding feature is only supported when CONFIG_UNICODE is set. This modifies the feature list f2fs presents under sysfs accordingly. Fixes: 5aba54302a46 ("f2fs: include charset encoding information in the superblock") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c579d5d3a916..62fbe4f20dd6 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -725,7 +725,9 @@ F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); +#ifdef CONFIG_UNICODE F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +#endif F2FS_FEATURE_RO_ATTR(readonly, FEAT_RO); #ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); @@ -836,7 +838,9 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(verity), #endif ATTR_LIST(sb_checksum), +#ifdef CONFIG_UNICODE ATTR_LIST(casefold), +#endif ATTR_LIST(readonly), #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compression), From 4c039d5452691fe80260e4c3dd7b629a095bd0a7 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 3 Jun 2021 09:50:38 +0000 Subject: [PATCH 19/32] f2fs: Advertise encrypted casefolding in sysfs Older kernels don't support encryption with casefolding. This adds the sysfs entry encrypted_casefold to show support for those combined features. Support for this feature was originally added by commit 7ad08a58bf67 ("f2fs: Handle casefolding with Encryption") Fixes: 7ad08a58bf67 ("f2fs: Handle casefolding with Encryption") Cc: stable@vger.kernel.org # v5.11+ Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 62fbe4f20dd6..4daa6aeb200b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -583,6 +583,7 @@ enum feat_id { FEAT_COMPRESSION, FEAT_RO, FEAT_TEST_DUMMY_ENCRYPTION_V2, + FEAT_ENCRYPTED_CASEFOLD, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -605,6 +606,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_COMPRESSION: case FEAT_RO: case FEAT_TEST_DUMMY_ENCRYPTION_V2: + case FEAT_ENCRYPTED_CASEFOLD: return sprintf(buf, "supported\n"); } return 0; @@ -709,7 +711,10 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2, FEAT_TEST_DUMMY_ENCRYPTION_V2); +#ifdef CONFIG_UNICODE +F2FS_FEATURE_RO_ATTR(encrypted_casefold, FEAT_ENCRYPTED_CASEFOLD); #endif +#endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED); #endif @@ -822,7 +827,10 @@ static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), +#ifdef CONFIG_UNICODE + ATTR_LIST(encrypted_casefold), #endif +#endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(block_zoned), #endif From 4a196df4cfba0b6a74023e6b36427f2bf2ddcdba Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Jun 2021 22:30:09 -0700 Subject: [PATCH 20/32] f2fs: add pin_file in feature list This patch adds missing pin_file feature supported by kernel. Fixes: f5a53edcf01e ("f2fs: support aligned pinned file") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4daa6aeb200b..e4d5090b7cb3 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -740,6 +740,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); #endif +F2FS_FEATURE_RO_ATTR(pin_file); /* For ATGC */ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_ratio, candidate_ratio); @@ -853,6 +854,7 @@ static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compression), #endif + ATTR_LIST(pin_file), NULL, }; ATTRIBUTE_GROUPS(f2fs_feat); From 4c89b53d05f1f5d25e9aec09c00351994101cc97 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Jun 2021 12:31:08 -0700 Subject: [PATCH 21/32] f2fs: clean up /sys/fs/f2fs//features Let's create /sys/fs/f2fs//feature_list/ to meet sysfs rule. Note that there are three feature list entries: 1) /sys/fs/f2fs/features : shows runtime features supported by in-kernel f2fs along with Kconfig. - ref. F2FS_FEATURE_RO_ATTR() 2) /sys/fs/f2fs/$s_id/features : shows on-disk features enabled by mkfs.f2fs, used for old kernels. This won't add new feature anymore, and thus, users should check entries in 3) instead of this 2). 3) /sys/fs/f2fs/$s_id/feature_list : shows on-disk features enabled by mkfs.f2fs per instance, which follows sysfs entry rule where each entry should expose single value. This list covers old feature list provided by 2) and beyond. Therefore, please add new on-disk feature in this list only. - ref. F2FS_SB_FEATURE_RO_ATTR() Reviewed-by: Chao Yu Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 29 +++- fs/f2fs/f2fs.h | 3 + fs/f2fs/sysfs.c | 195 ++++++++++++++++-------- 3 files changed, 162 insertions(+), 65 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 5088281e312e..95155e4ec7fe 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -203,7 +203,34 @@ Description: Shows total written kbytes issued to disk. What: /sys/fs/f2fs//features Date: July 2017 Contact: "Jaegeuk Kim" -Description: Shows all enabled features in current device. +Description: /feature_list/ + Shows all enabled features in current device. + Supported features: + encryption, blkzoned, extra_attr, projquota, inode_checksum, + flexible_inline_xattr, quota_ino, inode_crtime, lost_found, + verity, sb_checksum, casefold, readonly, compression, pin_file. + +What: /sys/fs/f2fs//feature_list/ +Date: June 2021 +Contact: "Jaegeuk Kim" +Description: Expand /sys/fs/f2fs//features to meet sysfs rule. + Supported on-disk features: + encryption, block_zoned (aka blkzoned), extra_attr, + project_quota (aka projquota), inode_checksum, + flexible_inline_xattr, quota_ino, inode_crtime, lost_found, + verity, sb_checksum, casefold, readonly, compression. + Note that, pin_file is moved into /sys/fs/f2fs/features/. + +What: /sys/fs/f2fs/features/ +Date: July 2017 +Contact: "Jaegeuk Kim" +Description: Shows all enabled kernel features. + Supported features: + encryption, block_zoned, extra_attr, project_quota, + inode_checksum, flexible_inline_xattr, quota_ino, + inode_crtime, lost_found, verity, sb_checksum, + casefold, readonly, compression, test_dummy_encryption_v2, + atomic_write, pin_file, encrypted_casefold. What: /sys/fs/f2fs//inject_rate Date: May 2016 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8903c43091f8..bbc36828a9d9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1665,6 +1665,9 @@ struct f2fs_sb_info { struct kobject s_stat_kobj; /* /sys/fs/f2fs//stat */ struct completion s_stat_kobj_unregister; + struct kobject s_feature_list_kobj; /* /sys/fs/f2fs//feature_list */ + struct completion s_feature_list_kobj_unregister; + /* For shrinker support */ struct list_head s_list; int s_ndevs; /* number of devices */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e4d5090b7cb3..6642246206bd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -566,50 +566,49 @@ static void f2fs_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } -enum feat_id { - FEAT_CRYPTO = 0, - FEAT_BLKZONED, - FEAT_ATOMIC_WRITE, - FEAT_EXTRA_ATTR, - FEAT_PROJECT_QUOTA, - FEAT_INODE_CHECKSUM, - FEAT_FLEXIBLE_INLINE_XATTR, - FEAT_QUOTA_INO, - FEAT_INODE_CRTIME, - FEAT_LOST_FOUND, - FEAT_VERITY, - FEAT_SB_CHECKSUM, - FEAT_CASEFOLD, - FEAT_COMPRESSION, - FEAT_RO, - FEAT_TEST_DUMMY_ENCRYPTION_V2, - FEAT_ENCRYPTED_CASEFOLD, -}; - +/* + * Note that there are three feature list entries: + * 1) /sys/fs/f2fs/features + * : shows runtime features supported by in-kernel f2fs along with Kconfig. + * - ref. F2FS_FEATURE_RO_ATTR() + * + * 2) /sys/fs/f2fs/$s_id/features + * : shows on-disk features enabled by mkfs.f2fs, used for old kernels. This + * won't add new feature anymore, and thus, users should check entries in 3) + * instead of this 2). + * + * 3) /sys/fs/f2fs/$s_id/feature_list + * : shows on-disk features enabled by mkfs.f2fs per instance, which follows + * sysfs entry rule where each entry should expose single value. + * This list covers old feature list provided by 2) and beyond. Therefore, + * please add new on-disk feature in this list only. + * - ref. F2FS_SB_FEATURE_RO_ATTR() + */ static ssize_t f2fs_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - switch (a->id) { - case FEAT_CRYPTO: - case FEAT_BLKZONED: - case FEAT_ATOMIC_WRITE: - case FEAT_EXTRA_ATTR: - case FEAT_PROJECT_QUOTA: - case FEAT_INODE_CHECKSUM: - case FEAT_FLEXIBLE_INLINE_XATTR: - case FEAT_QUOTA_INO: - case FEAT_INODE_CRTIME: - case FEAT_LOST_FOUND: - case FEAT_VERITY: - case FEAT_SB_CHECKSUM: - case FEAT_CASEFOLD: - case FEAT_COMPRESSION: - case FEAT_RO: - case FEAT_TEST_DUMMY_ENCRYPTION_V2: - case FEAT_ENCRYPTED_CASEFOLD: + return sprintf(buf, "supported\n"); +} + +#define F2FS_FEATURE_RO_ATTR(_name) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_feature_show, \ +} + +static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + if (F2FS_HAS_FEATURE(sbi, a->id)) return sprintf(buf, "supported\n"); - } - return 0; + return sprintf(buf, "unsupported\n"); +} + +#define F2FS_SB_FEATURE_RO_ATTR(_name, _feat) \ +static struct f2fs_attr f2fs_attr_sb_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_sb_feature_show, \ + .id = F2FS_FEATURE_##_feat, \ } #define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ @@ -629,13 +628,6 @@ static struct f2fs_attr f2fs_attr_##_name = { \ #define F2FS_GENERAL_RO_ATTR(name) \ static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) -#define F2FS_FEATURE_RO_ATTR(_name, _id) \ -static struct f2fs_attr f2fs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = 0444 }, \ - .show = f2fs_feature_show, \ - .id = _id, \ -} - #define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \ static struct f2fs_attr f2fs_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = 0444 }, \ @@ -709,33 +701,33 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks); #endif #ifdef CONFIG_FS_ENCRYPTION -F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); -F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2, FEAT_TEST_DUMMY_ENCRYPTION_V2); +F2FS_FEATURE_RO_ATTR(encryption); +F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2); #ifdef CONFIG_UNICODE -F2FS_FEATURE_RO_ATTR(encrypted_casefold, FEAT_ENCRYPTED_CASEFOLD); +F2FS_FEATURE_RO_ATTR(encrypted_casefold); #endif #endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED -F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED); +F2FS_FEATURE_RO_ATTR(block_zoned); #endif -F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); -F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); -F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); -F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); -F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); -F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); -F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); -F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +F2FS_FEATURE_RO_ATTR(atomic_write); +F2FS_FEATURE_RO_ATTR(extra_attr); +F2FS_FEATURE_RO_ATTR(project_quota); +F2FS_FEATURE_RO_ATTR(inode_checksum); +F2FS_FEATURE_RO_ATTR(flexible_inline_xattr); +F2FS_FEATURE_RO_ATTR(quota_ino); +F2FS_FEATURE_RO_ATTR(inode_crtime); +F2FS_FEATURE_RO_ATTR(lost_found); #ifdef CONFIG_FS_VERITY -F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); +F2FS_FEATURE_RO_ATTR(verity); #endif -F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); +F2FS_FEATURE_RO_ATTR(sb_checksum); #ifdef CONFIG_UNICODE -F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +F2FS_FEATURE_RO_ATTR(casefold); #endif -F2FS_FEATURE_RO_ATTR(readonly, FEAT_RO); +F2FS_FEATURE_RO_ATTR(readonly); #ifdef CONFIG_F2FS_FS_COMPRESSION -F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); +F2FS_FEATURE_RO_ATTR(compression); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); @@ -866,6 +858,40 @@ static struct attribute *f2fs_stat_attrs[] = { }; ATTRIBUTE_GROUPS(f2fs_stat); +F2FS_SB_FEATURE_RO_ATTR(encryption, ENCRYPT); +F2FS_SB_FEATURE_RO_ATTR(block_zoned, BLKZONED); +F2FS_SB_FEATURE_RO_ATTR(extra_attr, EXTRA_ATTR); +F2FS_SB_FEATURE_RO_ATTR(project_quota, PRJQUOTA); +F2FS_SB_FEATURE_RO_ATTR(inode_checksum, INODE_CHKSUM); +F2FS_SB_FEATURE_RO_ATTR(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_SB_FEATURE_RO_ATTR(quota_ino, QUOTA_INO); +F2FS_SB_FEATURE_RO_ATTR(inode_crtime, INODE_CRTIME); +F2FS_SB_FEATURE_RO_ATTR(lost_found, LOST_FOUND); +F2FS_SB_FEATURE_RO_ATTR(verity, VERITY); +F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); +F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); +F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); +F2FS_SB_FEATURE_RO_ATTR(readonly, RO); + +static struct attribute *f2fs_sb_feat_attrs[] = { + ATTR_LIST(sb_encryption), + ATTR_LIST(sb_block_zoned), + ATTR_LIST(sb_extra_attr), + ATTR_LIST(sb_project_quota), + ATTR_LIST(sb_inode_checksum), + ATTR_LIST(sb_flexible_inline_xattr), + ATTR_LIST(sb_quota_ino), + ATTR_LIST(sb_inode_crtime), + ATTR_LIST(sb_lost_found), + ATTR_LIST(sb_verity), + ATTR_LIST(sb_sb_checksum), + ATTR_LIST(sb_casefold), + ATTR_LIST(sb_compression), + ATTR_LIST(sb_readonly), + NULL, +}; +ATTRIBUTE_GROUPS(f2fs_sb_feat); + static const struct sysfs_ops f2fs_attr_ops = { .show = f2fs_attr_show, .store = f2fs_attr_store, @@ -932,6 +958,33 @@ static struct kobj_type f2fs_stat_ktype = { .release = f2fs_stat_kobj_release, }; +static ssize_t f2fs_sb_feat_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_feature_list_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static void f2fs_feature_list_kobj_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_feature_list_kobj); + complete(&sbi->s_feature_list_kobj_unregister); +} + +static const struct sysfs_ops f2fs_feature_list_attr_ops = { + .show = f2fs_sb_feat_attr_show, +}; + +static struct kobj_type f2fs_feature_list_ktype = { + .default_groups = f2fs_sb_feat_groups, + .sysfs_ops = &f2fs_feature_list_attr_ops, + .release = f2fs_feature_list_kobj_release, +}; + static int __maybe_unused segment_info_seq_show(struct seq_file *seq, void *offset) { @@ -1148,6 +1201,14 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) if (err) goto put_stat_kobj; + sbi->s_feature_list_kobj.kset = &f2fs_kset; + init_completion(&sbi->s_feature_list_kobj_unregister); + err = kobject_init_and_add(&sbi->s_feature_list_kobj, + &f2fs_feature_list_ktype, + &sbi->s_kobj, "feature_list"); + if (err) + goto put_feature_list_kobj; + if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -1162,6 +1223,9 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) victim_bits_seq_show, sb); } return 0; +put_feature_list_kobj: + kobject_put(&sbi->s_feature_list_kobj); + wait_for_completion(&sbi->s_feature_list_kobj_unregister); put_stat_kobj: kobject_put(&sbi->s_stat_kobj); wait_for_completion(&sbi->s_stat_kobj_unregister); @@ -1184,6 +1248,9 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) kobject_del(&sbi->s_stat_kobj); kobject_put(&sbi->s_stat_kobj); wait_for_completion(&sbi->s_stat_kobj_unregister); + kobject_del(&sbi->s_feature_list_kobj); + kobject_put(&sbi->s_feature_list_kobj); + wait_for_completion(&sbi->s_feature_list_kobj_unregister); kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); From 6ce19aff0b8cd386860855185c6cd79337fc4d2b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 May 2021 19:51:50 +0800 Subject: [PATCH 22/32] f2fs: compress: add compress_inode to cache compressed blocks Support to use address space of inner inode to cache compressed block, in order to improve cache hit ratio of random read. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 3 + fs/f2fs/compress.c | 168 ++++++++++++++++++++++++++++- fs/f2fs/data.c | 41 +++++-- fs/f2fs/debug.c | 13 +++ fs/f2fs/f2fs.h | 71 +++++++++++- fs/f2fs/gc.c | 1 + fs/f2fs/inode.c | 21 +++- fs/f2fs/node.c | 14 +++ fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 6 +- fs/f2fs/super.c | 35 +++++- include/linux/f2fs_fs.h | 1 + 12 files changed, 361 insertions(+), 14 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 992bf91eeec8..809c4d0a696f 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -289,6 +289,9 @@ compress_mode=%s Control file compression mode. This supports "fs" and "user" choosing the target file and the timing. The user can do manual compression/decompression on the compression enabled files using ioctls. +compress_cache Support to use address space of a filesystem managed inode to + cache compressed block, in order to improve cache hit ratio of + random read. inlinecrypt When possible, encrypt/decrypt the contents of encrypted files using the blk-crypto framework rather than filesystem-layer encryption. This allows the use of diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1c3e98085591..455561826c7d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -12,9 +12,11 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" +#include "segment.h" #include static struct kmem_cache *cic_entry_slab; @@ -736,7 +738,7 @@ out: return ret; } -static void f2fs_decompress_cluster(struct decompress_io_ctx *dic) +void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); struct f2fs_inode_info *fi = F2FS_I(dic->inode); @@ -835,7 +837,8 @@ out_end_io: * page being waited on in the cluster, and if so, it decompresses the cluster * (or in the case of a failure, cleans up without actually decompressing). */ -void f2fs_end_read_compressed_page(struct page *page, bool failed) +void f2fs_end_read_compressed_page(struct page *page, bool failed, + block_t blkaddr) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); @@ -845,6 +848,9 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed) if (failed) WRITE_ONCE(dic->failed, true); + else if (blkaddr) + f2fs_cache_compressed_page(sbi, page, + dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) f2fs_decompress_cluster(dic); @@ -1660,6 +1666,164 @@ void f2fs_put_page_dic(struct page *page) f2fs_put_dic(dic); } +const struct address_space_operations f2fs_compress_aops = { + .releasepage = f2fs_release_page, + .invalidatepage = f2fs_invalidate_page, +}; + +struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi) +{ + return sbi->compress_inode->i_mapping; +} + +void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + if (!sbi->compress_inode) + return; + invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr); +} + +void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, + nid_t ino, block_t blkaddr) +{ + struct page *cpage; + int ret; + + if (!test_opt(sbi, COMPRESS_CACHE)) + return; + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) + return; + + if (!f2fs_available_free_memory(sbi, COMPRESS_PAGE)) + return; + + cpage = find_get_page(COMPRESS_MAPPING(sbi), blkaddr); + if (cpage) { + f2fs_put_page(cpage, 0); + return; + } + + cpage = alloc_page(__GFP_NOWARN | __GFP_IO); + if (!cpage) + return; + + ret = add_to_page_cache_lru(cpage, COMPRESS_MAPPING(sbi), + blkaddr, GFP_NOFS); + if (ret) { + f2fs_put_page(cpage, 0); + return; + } + + set_page_private_data(cpage, ino); + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) + goto out; + + memcpy(page_address(cpage), page_address(page), PAGE_SIZE); + SetPageUptodate(cpage); +out: + f2fs_put_page(cpage, 1); +} + +bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blkaddr) +{ + struct page *cpage; + bool hitted = false; + + if (!test_opt(sbi, COMPRESS_CACHE)) + return false; + + cpage = f2fs_pagecache_get_page(COMPRESS_MAPPING(sbi), + blkaddr, FGP_LOCK | FGP_NOWAIT, GFP_NOFS); + if (cpage) { + if (PageUptodate(cpage)) { + atomic_inc(&sbi->compress_page_hit); + memcpy(page_address(page), + page_address(cpage), PAGE_SIZE); + hitted = true; + } + f2fs_put_page(cpage, 1); + } + + return hitted; +} + +void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct address_space *mapping = sbi->compress_inode->i_mapping; + struct pagevec pvec; + pgoff_t index = 0; + pgoff_t end = MAX_BLKADDR(sbi); + + if (!mapping->nrpages) + return; + + pagevec_init(&pvec); + + do { + unsigned int nr_pages; + int i; + + nr_pages = pagevec_lookup_range(&pvec, mapping, + &index, end - 1); + if (!nr_pages) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (page->index > end) + break; + + lock_page(page); + if (page->mapping != mapping) { + unlock_page(page); + continue; + } + + if (ino != get_page_private_data(page)) { + unlock_page(page); + continue; + } + + generic_error_remove_page(mapping, page); + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } while (index < end); +} + +int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) +{ + struct inode *inode; + + if (!test_opt(sbi, COMPRESS_CACHE)) + return 0; + + inode = f2fs_iget(sbi->sb, F2FS_COMPRESS_INO(sbi)); + if (IS_ERR(inode)) + return PTR_ERR(inode); + sbi->compress_inode = inode; + + sbi->compress_percent = COMPRESS_PERCENT; + sbi->compress_watermark = COMPRESS_WATERMARK; + + atomic_set(&sbi->compress_page_hit, 0); + + return 0; +} + +void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) +{ + if (!sbi->compress_inode) + return; + iput(sbi->compress_inode); + sbi->compress_inode = NULL; +} + int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 37f4ab79d014..53af21ff6196 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -132,7 +132,7 @@ static void f2fs_finish_read_bio(struct bio *bio) if (f2fs_is_compressed_page(page)) { if (bio->bi_status) - f2fs_end_read_compressed_page(page, true); + f2fs_end_read_compressed_page(page, true, 0); f2fs_put_page_dic(page); continue; } @@ -228,15 +228,19 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) struct bio_vec *bv; struct bvec_iter_all iter_all; bool all_compressed = true; + block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector); bio_for_each_segment_all(bv, ctx->bio, iter_all) { struct page *page = bv->bv_page; /* PG_error was set if decryption failed. */ if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, PageError(page)); + f2fs_end_read_compressed_page(page, PageError(page), + blkaddr); else all_compressed = false; + + blkaddr++; } /* @@ -1352,9 +1356,11 @@ alloc: old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, &sum, seg_type, NULL); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); + f2fs_invalidate_compress_page(sbi, old_blkaddr); + } f2fs_update_data_blkaddr(dn, dn->data_blkaddr); /* @@ -2174,7 +2180,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto out_put_dnode; } - for (i = 0; i < dic->nr_cpages; i++) { + for (i = 0; i < cc->nr_cpages; i++) { struct page *page = dic->cpages[i]; block_t blkaddr; struct bio_post_read_ctx *ctx; @@ -2182,6 +2188,14 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); + f2fs_wait_on_block_writeback(inode, blkaddr); + + if (f2fs_load_compressed_page(sbi, page, blkaddr)) { + if (atomic_dec_and_test(&dic->remaining_pages)) + f2fs_decompress_cluster(dic); + continue; + } + if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { @@ -2203,8 +2217,6 @@ submit_and_realloc: } } - f2fs_wait_on_block_writeback(inode, blkaddr); - if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; @@ -3618,6 +3630,13 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, clear_page_private_gcing(page); + if (test_opt(sbi, COMPRESS_CACHE)) { + if (f2fs_compressed_file(inode)) + f2fs_invalidate_compress_pages(sbi, inode->i_ino); + if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) + clear_page_private_data(page); + } + if (page_private_atomic(page)) return f2fs_drop_inmem_page(inode, page); @@ -3635,6 +3654,16 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (page_private_atomic(page)) return 0; + if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) { + struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct inode *inode = page->mapping->host; + + if (f2fs_compressed_file(inode)) + f2fs_invalidate_compress_pages(sbi, inode->i_ino); + if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) + clear_page_private_data(page); + } + clear_page_private_gcing(page); detach_page_private(page); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index c03949a7ccff..833325038ef3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -152,6 +152,12 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; if (sbi->meta_inode) si->meta_pages = META_MAPPING(sbi)->nrpages; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (sbi->compress_inode) { + si->compress_pages = COMPRESS_MAPPING(sbi)->nrpages; + si->compress_page_hit = atomic_read(&sbi->compress_page_hit); + } +#endif si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT]; si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT]; si->sits = MAIN_SEGS(sbi); @@ -309,6 +315,12 @@ get_cache: si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (sbi->compress_inode) { + unsigned npages = COMPRESS_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } +#endif } static int stat_show(struct seq_file *s, void *v) @@ -476,6 +488,7 @@ static int stat_show(struct seq_file *s, void *v) "volatile IO: %4d (Max. %4d)\n", si->inmem_pages, si->aw_cnt, si->max_aw_cnt, si->vw_cnt, si->max_vw_cnt); + seq_printf(s, " - compress: %4d, hit:%8d\n", si->compress_pages, si->compress_page_hit); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bbc36828a9d9..561e52a07391 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -98,6 +98,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_ATGC 0x08000000 #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 #define F2FS_MOUNT_GC_MERGE 0x20000000 +#define F2FS_MOUNT_COMPRESS_CACHE 0x40000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -1374,6 +1375,37 @@ PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); +static inline unsigned long get_page_private_data(struct page *page) +{ + unsigned long data = page_private(page); + + if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) + return 0; + return data >> PAGE_PRIVATE_MAX; +} + +static inline void set_page_private_data(struct page *page, unsigned long data) +{ + if (!PagePrivate(page)) { + get_page(page); + SetPagePrivate(page); + } + set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); + page_private(page) |= data << PAGE_PRIVATE_MAX; +} + +static inline void clear_page_private_data(struct page *page) +{ + page_private(page) &= (1 << PAGE_PRIVATE_MAX) - 1; + if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) { + set_page_private(page, 0); + if (PagePrivate(page)) { + ClearPagePrivate(page); + put_page(page); + } + } +} + /* For compression */ enum compress_algorithm_type { COMPRESS_LZO, @@ -1388,6 +1420,9 @@ enum compress_flag { COMPRESS_MAX_FLAG, }; +#define COMPRESS_WATERMARK 20 +#define COMPRESS_PERCENT 20 + #define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ @@ -1700,6 +1735,12 @@ struct f2fs_sb_info { u64 compr_written_block; u64 compr_saved_block; u32 compr_new_inode; + + /* For compressed block cache */ + struct inode *compress_inode; /* cache compressed blocks */ + unsigned int compress_percent; /* cache page percentage */ + unsigned int compress_watermark; /* cache page watermark */ + atomic_t compress_page_hit; /* cache hit count */ #endif }; @@ -3671,7 +3712,8 @@ struct f2fs_stat_info { unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; int rsvd_segs, overp_segs; - int dirty_count, node_pages, meta_pages; + int dirty_count, node_pages, meta_pages, compress_pages; + int compress_page_hit; int prefree_count, call_count, cp_count, bg_cp_count; int tot_segs, node_segs, data_segs, free_segs, free_secs; int bg_node_segs, bg_data_segs; @@ -4007,7 +4049,9 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); -void f2fs_end_read_compressed_page(struct page *page, bool failed); +void f2fs_decompress_cluster(struct decompress_io_ctx *dic); +void f2fs_end_read_compressed_page(struct page *page, bool failed, + block_t blkaddr); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); @@ -4025,10 +4069,19 @@ void f2fs_put_page_dic(struct page *page); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); +int f2fs_init_compress_inode(struct f2fs_sb_info *sbi); +void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi); int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); +struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); +void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, + nid_t ino, block_t blkaddr); +bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blkaddr); +void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); #define inc_compr_inode_stat(inode) \ do { \ struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ @@ -4057,7 +4110,9 @@ static inline struct page *f2fs_compress_control_page(struct page *page) } static inline int f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } -static inline void f2fs_end_read_compressed_page(struct page *page, bool failed) +static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { } +static inline void f2fs_end_read_compressed_page(struct page *page, + bool failed, block_t blkaddr) { WARN_ON_ONCE(1); } @@ -4065,10 +4120,20 @@ static inline void f2fs_put_page_dic(struct page *page) { WARN_ON_ONCE(1); } +static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } +static inline void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, + block_t blkaddr) { } +static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct page *page, nid_t ino, block_t blkaddr) { } +static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, + struct page *page, block_t blkaddr) { return false; } +static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, + nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) #endif diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ab1c0123904f..da5947b30142 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1261,6 +1261,7 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_put_page(mpage, 1); invalidate_mapping_pages(META_MAPPING(fio.sbi), fio.old_blkaddr, fio.old_blkaddr); + f2fs_invalidate_compress_page(fio.sbi, fio.old_blkaddr); set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cbda7ca3b3be..9141147b5bb0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -18,6 +18,10 @@ #include +#ifdef CONFIG_F2FS_FS_COMPRESSION +extern const struct address_space_operations f2fs_compress_aops; +#endif + void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) { if (is_inode_flag_set(inode, FI_NEW_INODE)) @@ -494,6 +498,11 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) goto make_now; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (ino == F2FS_COMPRESS_INO(sbi)) + goto make_now; +#endif + ret = do_read_inode(inode); if (ret) goto bad_inode; @@ -504,6 +513,12 @@ make_now: } else if (ino == F2FS_META_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_meta_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + } else if (ino == F2FS_COMPRESS_INO(sbi)) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + inode->i_mapping->a_ops = &f2fs_compress_aops; +#endif + mapping_set_gfp_mask(inode->i_mapping, + GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); } else if (S_ISREG(inode->i_mode)) { inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -723,8 +738,12 @@ void f2fs_evict_inode(struct inode *inode) trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); + if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode)) + f2fs_invalidate_compress_pages(sbi, inode->i_ino); + if (inode->i_ino == F2FS_NODE_INO(sbi) || - inode->i_ino == F2FS_META_INO(sbi)) + inode->i_ino == F2FS_META_INO(sbi) || + inode->i_ino == F2FS_COMPRESS_INO(sbi)) goto out_clear; f2fs_bug_on(sbi, get_dirty_pages(inode)); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3a8f7afa5059..dd611efa8aa4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -97,6 +97,20 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size = (atomic_read(&dcc->discard_cmd_cnt) * sizeof(struct discard_cmd)) >> PAGE_SHIFT; res = mem_size < (avail_ram * nm_i->ram_thresh / 100); + } else if (type == COMPRESS_PAGE) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + unsigned long free_ram = val.freeram; + + /* + * free memory is lower than watermark or cached page count + * exceed threshold, deny caching compress page. + */ + res = (free_ram > avail_ram * sbi->compress_watermark / 100) && + (COMPRESS_MAPPING(sbi)->nrpages < + free_ram * sbi->compress_percent / 100); +#else + res = false; +#endif } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index d85e8659cfda..84d45385d1f2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -148,6 +148,7 @@ enum mem_type { EXTENT_CACHE, /* indicates extent cache */ INMEM_PAGES, /* indicates inmemory pages */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ + COMPRESS_PAGE, /* indicates memory of cached compressed pages */ BASE_CHECK, /* check kernel status */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 54847eebc5ca..85fae9f3624d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2322,6 +2322,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) return; invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); + f2fs_invalidate_compress_page(sbi, addr); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); @@ -3469,9 +3470,11 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio); - if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) { invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); + f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr); + } /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); @@ -3661,6 +3664,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); + f2fs_invalidate_compress_page(sbi, old_blkaddr); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3e0e34b4680c..d70122da3f99 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -150,6 +150,7 @@ enum { Opt_compress_extension, Opt_compress_chksum, Opt_compress_mode, + Opt_compress_cache, Opt_atgc, Opt_gc_merge, Opt_nogc_merge, @@ -224,6 +225,7 @@ static match_table_t f2fs_tokens = { {Opt_compress_extension, "compress_extension=%s"}, {Opt_compress_chksum, "compress_chksum"}, {Opt_compress_mode, "compress_mode=%s"}, + {Opt_compress_cache, "compress_cache"}, {Opt_atgc, "atgc"}, {Opt_gc_merge, "gc_merge"}, {Opt_nogc_merge, "nogc_merge"}, @@ -1066,12 +1068,16 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; + case Opt_compress_cache: + set_opt(sbi, COMPRESS_CACHE); + break; #else case Opt_compress_algorithm: case Opt_compress_log_size: case Opt_compress_extension: case Opt_compress_chksum: case Opt_compress_mode: + case Opt_compress_cache: f2fs_info(sbi, "compression options not supported"); break; #endif @@ -1409,6 +1415,8 @@ static void f2fs_put_super(struct super_block *sb) f2fs_bug_on(sbi, sbi->fsync_node_num); + f2fs_destroy_compress_inode(sbi); + iput(sbi->node_inode); sbi->node_inode = NULL; @@ -1678,6 +1686,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, seq_printf(seq, ",compress_mode=%s", "fs"); else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER) seq_printf(seq, ",compress_mode=%s", "user"); + + if (test_opt(sbi, COMPRESS_CACHE)) + seq_puts(seq, ",compress_cache"); } #endif @@ -1959,6 +1970,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); + bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); bool checkpoint_changed; #ifdef CONFIG_QUOTA int i, j; @@ -2056,6 +2068,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) { + err = -EINVAL; + f2fs_warn(sbi, "switch compress_cache option is not allowed"); + goto restore_opts; + } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); @@ -3965,10 +3983,14 @@ try_onemore: goto free_node_inode; } - err = f2fs_register_sysfs(sbi); + err = f2fs_init_compress_inode(sbi); if (err) goto free_root_inode; + err = f2fs_register_sysfs(sbi); + if (err) + goto free_compress_inode; + #ifdef CONFIG_QUOTA /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { @@ -4109,6 +4131,8 @@ free_meta: /* evict some inodes being cached by GC */ evict_inodes(sb); f2fs_unregister_sysfs(sbi); +free_compress_inode: + f2fs_destroy_compress_inode(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -4187,6 +4211,15 @@ static void kill_f2fs_super(struct super_block *sb) f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* + * latter evict_inode() can bypass checking and invalidating + * compress inode cache. + */ + if (test_opt(sbi, COMPRESS_CACHE)) + truncate_inode_pages_final(COMPRESS_MAPPING(sbi)); +#endif + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { struct cp_control cpc = { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f93000c3a127..d445150c5350 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -34,6 +34,7 @@ #define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num) #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) +#define F2FS_COMPRESS_INO(sbi) (NM_I(sbi)->max_nid) #define F2FS_MAX_QUOTAS 3 From 0b8fc00601c0d8bea19667bbc66f00e13d954e4a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 May 2021 14:29:26 +0800 Subject: [PATCH 23/32] f2fs: swap: remove dead codes After commit af4b6b8edf6a ("f2fs: introduce check_swap_activate_fast()"), we will never run into original logic of check_swap_activate() before f2fs supports non 4k-sized page, so let's delete those dead codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 168 +------------------------------------------------ 1 file changed, 1 insertion(+), 167 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 53af21ff6196..ee04e6b63218 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3830,66 +3830,7 @@ int f2fs_migrate_page(struct address_space *mapping, #endif #ifdef CONFIG_SWAP -static int f2fs_is_file_aligned(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - block_t main_blkaddr = SM_I(sbi)->main_blkaddr; - block_t cur_lblock; - block_t last_lblock; - block_t pblock; - unsigned long nr_pblocks; - unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); - unsigned int not_aligned = 0; - int ret = 0; - - cur_lblock = 0; - last_lblock = bytes_to_blks(inode, i_size_read(inode)); - - while (cur_lblock < last_lblock) { - struct f2fs_map_blocks map; - - memset(&map, 0, sizeof(map)); - map.m_lblk = cur_lblock; - map.m_len = last_lblock - cur_lblock; - map.m_next_pgofs = NULL; - map.m_next_extent = NULL; - map.m_seg_type = NO_CHECK_TYPE; - map.m_may_create = false; - - ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); - if (ret) - goto out; - - /* hole */ - if (!(map.m_flags & F2FS_MAP_FLAGS)) { - f2fs_err(sbi, "Swapfile has holes"); - ret = -ENOENT; - goto out; - } - - pblock = map.m_pblk; - nr_pblocks = map.m_len; - - if ((pblock - main_blkaddr) & (blocks_per_sec - 1) || - nr_pblocks & (blocks_per_sec - 1)) { - if (f2fs_is_pinned_file(inode)) { - f2fs_err(sbi, "Swapfile does not align to section"); - ret = -EINVAL; - goto out; - } - not_aligned++; - } - - cur_lblock += nr_pblocks; - } - if (not_aligned) - f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", - not_aligned); -out: - return ret; -} - -static int check_swap_activate_fast(struct swap_info_struct *sis, +static int check_swap_activate(struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; @@ -3984,113 +3925,6 @@ out: return ret; } -/* Copied from generic_swapfile_activate() to check any holes */ -static int check_swap_activate(struct swap_info_struct *sis, - struct file *swap_file, sector_t *span) -{ - struct address_space *mapping = swap_file->f_mapping; - struct inode *inode = mapping->host; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned blocks_per_page; - unsigned long page_no; - sector_t probe_block; - sector_t last_block; - sector_t lowest_block = -1; - sector_t highest_block = 0; - int nr_extents = 0; - int ret = 0; - - if (PAGE_SIZE == F2FS_BLKSIZE) - return check_swap_activate_fast(sis, swap_file, span); - - ret = f2fs_is_file_aligned(inode); - if (ret) - goto out; - - blocks_per_page = bytes_to_blks(inode, PAGE_SIZE); - - /* - * Map all the blocks into the extent list. This code doesn't try - * to be very smart. - */ - probe_block = 0; - page_no = 0; - last_block = bytes_to_blks(inode, i_size_read(inode)); - while ((probe_block + blocks_per_page) <= last_block && - page_no < sis->max) { - unsigned block_in_page; - sector_t first_block; - sector_t block = 0; - - cond_resched(); - - block = probe_block; - ret = bmap(inode, &block); - if (ret) - goto out; - if (!block) - goto bad_bmap; - first_block = block; - - /* - * It must be PAGE_SIZE aligned on-disk - */ - if (first_block & (blocks_per_page - 1)) { - probe_block++; - goto reprobe; - } - - for (block_in_page = 1; block_in_page < blocks_per_page; - block_in_page++) { - - block = probe_block + block_in_page; - ret = bmap(inode, &block); - if (ret) - goto out; - if (!block) - goto bad_bmap; - - if (block != first_block + block_in_page) { - /* Discontiguity */ - probe_block++; - goto reprobe; - } - } - - first_block >>= (PAGE_SHIFT - inode->i_blkbits); - if (page_no) { /* exclude the header page */ - if (first_block < lowest_block) - lowest_block = first_block; - if (first_block > highest_block) - highest_block = first_block; - } - - /* - * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks - */ - ret = add_swap_extent(sis, page_no, 1, first_block); - if (ret < 0) - goto out; - nr_extents += ret; - page_no++; - probe_block += blocks_per_page; -reprobe: - continue; - } - ret = nr_extents; - *span = 1 + highest_block - lowest_block; - if (page_no == 0) - page_no = 1; /* force Empty message */ - sis->max = page_no; - sis->pages = page_no - 1; - sis->highest_bit = page_no - 1; -out: - return ret; -bad_bmap: - f2fs_err(sbi, "Swapfile has holes"); - return -EINVAL; -} - static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { From 859fca6b706e005f7cf19aa2ce7bb4005bcef427 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 May 2021 14:29:27 +0800 Subject: [PATCH 24/32] f2fs: swap: support migrating swapfile in aligned write mode This patch supports to migrate swapfile in aligned write mode during swapon in order to keep swapfile being aligned to section as much as possible, then pinned swapfile will locates fully filled section which may not affected by GC. However, for the case that swapfile's size is not aligned to section size, it will still leave last extent in file's tail as unaligned due to its size is smaller than section size, like case #2. case #1 xfs_io -f /mnt/f2fs/file -c "pwrite 0 4M" -c "fsync" Before swapon: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..3047]: 1123352..1126399 3048 0x1000 1: [3048..7143]: 237568..241663 4096 0x1000 2: [7144..8191]: 245760..246807 1048 0x1001 After swapon: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..8191]: 249856..258047 8192 0x1001 Kmsg: F2FS-fs (zram0): Swapfile (2) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(2097152 * n) case #2 xfs_io -f /mnt/f2fs/file -c "pwrite 0 3M" -c "fsync" Before swapon: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..3047]: 246808..249855 3048 0x1000 1: [3048..6143]: 237568..240663 3096 0x1001 After swapon: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 258048..262143 4096 0x1000 1: [4096..6143]: 238616..240663 2048 0x1001 Kmsg: F2FS-fs (zram0): Swapfile: last extent is not aligned to section F2FS-fs (zram0): Swapfile (2) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(2097152 * n) Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 108 ++++++++++++++++++++++++++++++++++++++++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.h | 3 ++ fs/f2fs/segment.c | 3 ++ 4 files changed, 101 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ee04e6b63218..3a01a1b50104 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2472,6 +2472,10 @@ static inline bool check_inplace_update_policy(struct inode *inode, bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) { + /* swap file is migrating in aligned write mode */ + if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) + return false; + if (f2fs_is_pinned_file(inode)) return true; @@ -2494,6 +2498,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (f2fs_is_atomic_file(inode)) return true; + + /* swap file is migrating in aligned write mode */ + if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) + return true; + if (fio) { if (page_private_gcing(fio->page)) return true; @@ -3830,6 +3839,65 @@ int f2fs_migrate_page(struct address_space *mapping, #endif #ifdef CONFIG_SWAP +static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, + unsigned int blkcnt) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int blkofs; + unsigned int blk_per_sec = BLKS_PER_SEC(sbi); + unsigned int secidx = start_blk / blk_per_sec; + unsigned int end_sec = secidx + blkcnt / blk_per_sec; + int ret = 0; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + set_inode_flag(inode, FI_ALIGNED_WRITE); + + for (; secidx < end_sec; secidx++) { + down_write(&sbi->pin_sem); + + f2fs_lock_op(sbi); + f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); + f2fs_unlock_op(sbi); + + set_inode_flag(inode, FI_DO_DEFRAG); + + for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { + struct page *page; + unsigned int blkidx = secidx * blk_per_sec + blkofs; + + page = f2fs_get_lock_data_page(inode, blkidx, true); + if (IS_ERR(page)) { + up_write(&sbi->pin_sem); + ret = PTR_ERR(page); + goto done; + } + + set_page_dirty(page); + f2fs_put_page(page, 1); + } + + clear_inode_flag(inode, FI_DO_DEFRAG); + + ret = filemap_fdatawrite(inode->i_mapping); + + up_write(&sbi->pin_sem); + + if (ret) + break; + } + +done: + clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_ALIGNED_WRITE); + + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + return ret; +} + static int check_swap_activate(struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { @@ -3843,7 +3911,8 @@ static int check_swap_activate(struct swap_info_struct *sis, sector_t highest_pblock = 0; int nr_extents = 0; unsigned long nr_pblocks; - unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + unsigned int blks_per_sec = BLKS_PER_SEC(sbi); + unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; unsigned int not_aligned = 0; int ret = 0; @@ -3856,7 +3925,7 @@ static int check_swap_activate(struct swap_info_struct *sis, while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; - +retry: cond_resched(); memset(&map, 0, sizeof(map)); @@ -3881,16 +3950,28 @@ static int check_swap_activate(struct swap_info_struct *sis, pblock = map.m_pblk; nr_pblocks = map.m_len; - if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) || - nr_pblocks & (blocks_per_sec - 1)) { - if (f2fs_is_pinned_file(inode)) { - f2fs_err(sbi, "Swapfile does not align to section"); - ret = -EINVAL; - goto out; - } + if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || + nr_pblocks & sec_blks_mask) { not_aligned++; - } + nr_pblocks = roundup(nr_pblocks, blks_per_sec); + if (cur_lblock + nr_pblocks > sis->max) + nr_pblocks -= blks_per_sec; + + if (!nr_pblocks) { + /* this extent is last one */ + nr_pblocks = map.m_len; + f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); + goto next; + } + + ret = f2fs_migrate_blocks(inode, cur_lblock, + nr_pblocks); + if (ret) + goto out; + goto retry; + } +next: if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -3917,11 +3998,10 @@ static int check_swap_activate(struct swap_info_struct *sis, sis->max = cur_lblock; sis->pages = cur_lblock - 1; sis->highest_bit = cur_lblock - 1; - - if (not_aligned) - f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", - not_aligned); out: + if (not_aligned) + f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)", + not_aligned, blks_per_sec * F2FS_BLKSIZE); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 561e52a07391..d84e78dabdbe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -709,6 +709,7 @@ enum { FI_MMAP_FILE, /* indicate file was mmapped */ FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ FI_COMPRESS_RELEASED, /* compressed blocks were released */ + FI_ALIGNED_WRITE, /* enable aligned write */ FI_MAX, /* max flag, never be used */ }; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 84d45385d1f2..ff14a6e5ac1c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -38,6 +38,9 @@ /* return value for read_node_page */ #define LOCKED_PAGE 1 +/* check pinned file's alignment status of physical blocks */ +#define FILE_NOT_ALIGNED 1 + /* For flag in struct node_info */ enum { IS_CHECKPOINTED, /* is it checkpointed before? */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 85fae9f3624d..15cc89eef28d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3291,6 +3291,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; + if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) + return CURSEG_COLD_DATA_PINNED; + if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && From 4d9a2bb1a6babc9280a8b4e7a95ada9bf6e51e9a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Jun 2021 07:46:30 +0800 Subject: [PATCH 25/32] f2fs: introduce f2fs_casefolded_name slab cache Add a slab cache: "f2fs_casefolded_name" for memory allocation of casefold name. Reviewed-by: Eric Biggers Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 17 +++++++++++------ fs/f2fs/recovery.c | 6 +++++- fs/f2fs/super.c | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 96dcc4aca639..456651682daf 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -16,6 +16,10 @@ #include "xattr.h" #include +#ifdef CONFIG_UNICODE +extern struct kmem_cache *f2fs_cf_name_slab; +#endif + static unsigned long dir_blocks(struct inode *inode) { return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1)) @@ -77,11 +81,10 @@ int f2fs_init_casefolded_name(const struct inode *dir, { #ifdef CONFIG_UNICODE struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); if (IS_CASEFOLDED(dir)) { - fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, - GFP_NOFS); + fname->cf_name.name = kmem_cache_alloc(f2fs_cf_name_slab, + GFP_NOFS); if (!fname->cf_name.name) return -ENOMEM; fname->cf_name.len = utf8_casefold(sb->s_encoding, @@ -89,7 +92,7 @@ int f2fs_init_casefolded_name(const struct inode *dir, fname->cf_name.name, F2FS_NAME_LEN); if ((int)fname->cf_name.len <= 0) { - kfree(fname->cf_name.name); + kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); fname->cf_name.name = NULL; if (sb_has_strict_encoding(sb)) return -EINVAL; @@ -172,8 +175,10 @@ void f2fs_free_filename(struct f2fs_filename *fname) fname->crypto_buf.name = NULL; #endif #ifdef CONFIG_UNICODE - kfree(fname->cf_name.name); - fname->cf_name.name = NULL; + if (fname->cf_name.name) { + kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); + fname->cf_name.name = NULL; + } #endif } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4b2f7d1d5bf4..695eacfe776c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -45,6 +45,10 @@ static struct kmem_cache *fsync_entry_slab; +#ifdef CONFIG_UNICODE +extern struct kmem_cache *f2fs_cf_name_slab; +#endif + bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) { s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); @@ -145,7 +149,7 @@ static int init_recovered_filename(const struct inode *dir, f2fs_hash_filename(dir, fname); #ifdef CONFIG_UNICODE /* Case-sensitive match is fine for recovery */ - kfree(fname->cf_name.name); + kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); fname->cf_name.name = NULL; #endif } else { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d70122da3f99..289cdda3d17b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -277,6 +277,24 @@ static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb, return 0; } + +struct kmem_cache *f2fs_cf_name_slab; +static int __init f2fs_create_casefold_cache(void) +{ + f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name", + F2FS_NAME_LEN); + if (!f2fs_cf_name_slab) + return -ENOMEM; + return 0; +} + +static void f2fs_destroy_casefold_cache(void) +{ + kmem_cache_destroy(f2fs_cf_name_slab); +} +#else +static int __init f2fs_create_casefold_cache(void) { return 0; } +static void f2fs_destroy_casefold_cache(void) { } #endif static inline void limit_reserve_root(struct f2fs_sb_info *sbi) @@ -4319,7 +4337,12 @@ static int __init init_f2fs_fs(void) err = f2fs_init_compress_cache(); if (err) goto free_compress_mempool; + err = f2fs_create_casefold_cache(); + if (err) + goto free_compress_cache; return 0; +free_compress_cache: + f2fs_destroy_compress_cache(); free_compress_mempool: f2fs_destroy_compress_mempool(); free_bioset: @@ -4355,6 +4378,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_casefold_cache(); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); f2fs_destroy_bioset(); From 3c16dc40aab84bab9cf54c2b61a458bb86b180c3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Jun 2021 07:31:22 +0800 Subject: [PATCH 26/32] f2fs: fix to avoid adding tab before doc section Otherwise whole section after tab will be invisible in compiled html format document. Cc: Mauro Carvalho Chehab Fixes: 89272ca1102e ("docs: filesystems: convert f2fs.txt to ReST") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 809c4d0a696f..b91e5a8444d5 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -720,10 +720,10 @@ users. ===================== ======================== =================== User F2FS Block ===================== ======================== =================== - META WRITE_LIFE_NOT_SET - HOT_NODE " - WARM_NODE " - COLD_NODE " +N/A META WRITE_LIFE_NOT_SET +N/A HOT_NODE " +N/A WARM_NODE " +N/A COLD_NODE " ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME extension list " " @@ -749,10 +749,10 @@ WRITE_LIFE_LONG " WRITE_LIFE_LONG ===================== ======================== =================== User F2FS Block ===================== ======================== =================== - META WRITE_LIFE_MEDIUM; - HOT_NODE WRITE_LIFE_NOT_SET - WARM_NODE " - COLD_NODE WRITE_LIFE_NONE +N/A META WRITE_LIFE_MEDIUM; +N/A HOT_NODE WRITE_LIFE_NOT_SET +N/A WARM_NODE " +N/A COLD_NODE WRITE_LIFE_NONE ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME extension list " " From 4215d054aea002ab36290504b1d8bb98df43d3eb Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 15 Jun 2021 15:39:04 -0700 Subject: [PATCH 27/32] f2fs: enable extent cache for compression files in read-only Let's allow extent cache for RO partition. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d84e78dabdbe..16ce1ade9fa6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3148,25 +3148,6 @@ static inline bool is_dot_dotdot(const u8 *name, size_t len) return false; } -static inline bool f2fs_may_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT) || - is_inode_flag_set(inode, FI_COMPRESSED_FILE)) - return false; - - /* - * for recovered files during mount do not create extents - * if shrinker is not registered. - */ - if (list_empty(&sbi->s_list)) - return false; - - return S_ISREG(inode->i_mode); -} - static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -4201,6 +4182,26 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); +static inline bool f2fs_may_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, EXTENT_CACHE) || + is_inode_flag_set(inode, FI_NO_EXTENT) || + (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(sbi))) + return false; + + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + + return S_ISREG(inode->i_mode); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, block_t blkaddr) From 132e3209789c647e37dc398ef36af4de13f104b4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Jun 2021 12:56:44 -0700 Subject: [PATCH 28/32] f2fs: remove false alarm on iget failure during GC This patch removes setting SBI_NEED_FSCK when GC gets an error on f2fs_iget, since f2fs_iget can give ENOMEM and others by race condition. If we set this critical fsck flag, we'll get EIO during fsync via the below code path. In f2fs_inplace_write_data(), if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) { err = -EIO; goto drop_bio; } Fixes: 9557727876674 ("f2fs: drop inplace IO if fs status is abnormal") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index da5947b30142..0e42ee5f7770 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1451,10 +1451,8 @@ next_step: if (phase == 3) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); + if (IS_ERR(inode) || is_bad_inode(inode)) continue; - } if (!down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { From bf1c5bc21b879bcddc8cf0fe0e1c3110fc8d25d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 27 Jun 2021 21:29:18 +0800 Subject: [PATCH 29/32] MAINTAINERS: f2fs: update my email address Old email address will be invalid after a few days, update it to kernel.org one. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 93ce2b8c1b44..0693971c630f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6903,7 +6903,7 @@ F: drivers/iommu/exynos-iommu.c F2FS FILE SYSTEM M: Jaegeuk Kim -M: Chao Yu +M: Chao Yu L: linux-f2fs-devel@lists.sourceforge.net S: Maintained W: https://f2fs.wiki.kernel.org/ From 151b1982be5d9f4ca641687ee1a4bb4fba5d26cf Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Tue, 8 Jun 2021 19:15:08 +0800 Subject: [PATCH 30/32] f2fs: compress: add nocompress extensions support When we create a directory with enable compression, all file write into directory will try to compress.But sometimes we may know, new file cannot meet compression ratio requirements. We need a nocompress extension to skip those files to avoid unnecessary compress page test. After add nocompress_extension, the priority should be: dir_flag < comp_extention,nocompress_extension < comp_file_flag, no_comp_file_flag. Priority in between FS_COMPR_FL, FS_NOCOMP_FS, extensions: * compress_extension=so; nocompress_extension=zip; chattr +c dir; touch dir/foo.so; touch dir/bar.zip; touch dir/baz.txt; then foo.so and baz.txt should be compresse, bar.zip should be non-compressed. chattr +c dir/bar.zip can enable compress on bar.zip. * compress_extension=so; nocompress_extension=zip; chattr -c dir; touch dir/foo.so; touch dir/bar.zip; touch dir/baz.txt; then foo.so should be compresse, bar.zip and baz.txt should be non-compressed. chattr+c dir/bar.zip; chattr+c dir/baz.txt; can enable compress on bar.zip and baz.txt. Signed-off-by: Fengnan Chang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 31 +++++++++++- fs/f2fs/f2fs.h | 2 + fs/f2fs/namei.c | 20 ++++++-- fs/f2fs/super.c | 79 +++++++++++++++++++++++++++++- 4 files changed, 125 insertions(+), 7 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index b91e5a8444d5..ff9e7cc97c65 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -281,6 +281,18 @@ compress_extension=%s Support adding specified extension, so that f2fs can enab For other files, we can still enable compression via ioctl. Note that, there is one reserved special extension '*', it can be set to enable compression for all files. +nocompress_extension=%s Support adding specified extension, so that f2fs can disable + compression on those corresponding files, just contrary to compression extension. + If you know exactly which files cannot be compressed, you can use this. + The same extension name can't appear in both compress and nocompress + extension at the same time. + If the compress extension specifies all files, the types specified by the + nocompress extension will be treated as special cases and will not be compressed. + Don't allow use '*' to specifie all file in nocompress extension. + After add nocompress_extension, the priority should be: + dir_flag < comp_extention,nocompress_extension < comp_file_flag,no_comp_file_flag. + See more in compression sections. + compress_chksum Support verifying chksum of raw data in compressed cluster. compress_mode=%s Control file compression mode. This supports "fs" and "user" modes. In "fs" mode (default), f2fs does automatic compression @@ -817,13 +829,30 @@ Compression implementation all logical blocks in cluster contain valid data and compress ratio of cluster data is lower than specified threshold. -- To enable compression on regular inode, there are three ways: +- To enable compression on regular inode, there are four ways: * chattr +c file * chattr +c dir; touch dir/file * mount w/ -o compress_extension=ext; touch file.ext * mount w/ -o compress_extension=*; touch any_file +- To disable compression on regular inode, there are two ways: + + * chattr -c file + * mount w/ -o nocompress_extension=ext; touch file.ext + +- Priority in between FS_COMPR_FL, FS_NOCOMP_FS, extensions: + + * compress_extension=so; nocompress_extension=zip; chattr +c dir; touch + dir/foo.so; touch dir/bar.zip; touch dir/baz.txt; then foo.so and baz.txt + should be compresse, bar.zip should be non-compressed. chattr +c dir/bar.zip + can enable compress on bar.zip. + * compress_extension=so; nocompress_extension=zip; chattr -c dir; touch + dir/foo.so; touch dir/bar.zip; touch dir/baz.txt; then foo.so should be + compresse, bar.zip and baz.txt should be non-compressed. + chattr+c dir/bar.zip; chattr+c dir/baz.txt; can enable compress on bar.zip + and baz.txt. + - At this point, compression feature doesn't expose compressed space to user directly in order to guarantee potential data updates later to the space. Instead, the main goal is to reduce data writes to flash disk as much as diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 16ce1ade9fa6..65befc68d88e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -151,8 +151,10 @@ struct f2fs_mount_info { unsigned char compress_level; /* compress level */ bool compress_chksum; /* compressed data chksum */ unsigned char compress_ext_cnt; /* extension count */ + unsigned char nocompress_ext_cnt; /* nocompress extension count */ int compress_mode; /* compression mode */ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ + unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d4139e166b95..e149c8c66a71 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -287,14 +287,16 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - unsigned char (*ext)[F2FS_EXTENSION_LEN]; - unsigned int ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + unsigned char (*noext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).noextensions; + unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions; + unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; int i, cold_count, hot_count; if (!f2fs_sb_has_compression(sbi) || - is_inode_flag_set(inode, FI_COMPRESSED_FILE) || F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL || - !f2fs_may_compress(inode)) + !f2fs_may_compress(inode) || + (!ext_cnt && !noext_cnt)) return; down_read(&sbi->sb_lock); @@ -311,7 +313,15 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, up_read(&sbi->sb_lock); - ext = F2FS_OPTION(sbi).extensions; + for (i = 0; i < noext_cnt; i++) { + if (is_extension_exist(name, noext[i], false)) { + f2fs_disable_compressed_file(inode); + return; + } + } + + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return; for (i = 0; i < ext_cnt; i++) { if (!is_extension_exist(name, ext[i], false)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 289cdda3d17b..8fecd3050ccd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -148,6 +148,7 @@ enum { Opt_compress_algorithm, Opt_compress_log_size, Opt_compress_extension, + Opt_nocompress_extension, Opt_compress_chksum, Opt_compress_mode, Opt_compress_cache, @@ -223,6 +224,7 @@ static match_table_t f2fs_tokens = { {Opt_compress_algorithm, "compress_algorithm=%s"}, {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, + {Opt_nocompress_extension, "nocompress_extension=%s"}, {Opt_compress_chksum, "compress_chksum"}, {Opt_compress_mode, "compress_mode=%s"}, {Opt_compress_cache, "compress_cache"}, @@ -493,6 +495,43 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, } #ifdef CONFIG_F2FS_FS_COMPRESSION +/* + * 1. The same extension name cannot not appear in both compress and non-compress extension + * at the same time. + * 2. If the compress extension specifies all files, the types specified by the non-compress + * extension will be treated as special cases and will not be compressed. + * 3. Don't allow the non-compress extension specifies all files. + */ +static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) +{ + unsigned char (*ext)[F2FS_EXTENSION_LEN]; + unsigned char (*noext)[F2FS_EXTENSION_LEN]; + int ext_cnt, noext_cnt, index = 0, no_index = 0; + + ext = F2FS_OPTION(sbi).extensions; + ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + noext = F2FS_OPTION(sbi).noextensions; + noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + + if (!noext_cnt) + return 0; + + for (no_index = 0; no_index < noext_cnt; no_index++) { + if (!strcasecmp("*", noext[no_index])) { + f2fs_info(sbi, "Don't allow the nocompress extension specifies all files"); + return -EINVAL; + } + for (index = 0; index < ext_cnt; index++) { + if (!strcasecmp(ext[index], noext[no_index])) { + f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension", + ext[index]); + return -EINVAL; + } + } + } + return 0; +} + #ifdef CONFIG_F2FS_FS_LZ4 static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) { @@ -566,7 +605,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) substring_t args[MAX_OPT_ARGS]; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; - int ext_cnt; + unsigned char (*noext)[F2FS_EXTENSION_LEN]; + int ext_cnt, noext_cnt; #endif char *p, *name; int arg = 0; @@ -1069,6 +1109,30 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; + case Opt_nocompress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + noext = F2FS_OPTION(sbi).noextensions; + noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + noext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, + "invalid extension length/number"); + kfree(name); + return -EINVAL; + } + + strcpy(noext[noext_cnt], name); + F2FS_OPTION(sbi).nocompress_ext_cnt++; + kfree(name); + break; case Opt_compress_chksum: F2FS_OPTION(sbi).compress_chksum = true; break; @@ -1093,6 +1157,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_compress_algorithm: case Opt_compress_log_size: case Opt_compress_extension: + case Opt_nocompress_extension: case Opt_compress_chksum: case Opt_compress_mode: case Opt_compress_cache: @@ -1147,6 +1212,13 @@ default_check: } #endif +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_test_compress_extension(sbi)) { + f2fs_err(sbi, "invalid compress or nocompress extension"); + return -EINVAL; + } +#endif + if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", F2FS_IO_SIZE_KB(sbi)); @@ -1697,6 +1769,11 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, F2FS_OPTION(sbi).extensions[i]); } + for (i = 0; i < F2FS_OPTION(sbi).nocompress_ext_cnt; i++) { + seq_printf(seq, ",nocompress_extension=%s", + F2FS_OPTION(sbi).noextensions[i]); + } + if (F2FS_OPTION(sbi).compress_chksum) seq_puts(seq, ",compress_chksum"); From c9ebd3df43c067b57203737484076345b6df2fb4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 4 Jul 2021 22:11:25 -0700 Subject: [PATCH 31/32] f2fs: initialize page->private when using for our internal use We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag operations. Fixes: b763f3bedc2d ("f2fs: restructure f2fs page.private layout") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3a01a1b50104..d2cf48c5a2e4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3819,6 +3819,8 @@ int f2fs_migrate_page(struct address_space *mapping, get_page(newpage); } + /* guarantee to start from no stale private field */ + set_page_private(newpage, 0); if (PagePrivate(page)) { set_page_private(newpage, page_private(page)); SetPagePrivate(newpage); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 65befc68d88e..ee8eb33e2c25 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1331,7 +1331,8 @@ enum { #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ static inline bool page_private_##name(struct page *page) \ { \ - return test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \ + return PagePrivate(page) && \ + test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \ test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ } @@ -1341,6 +1342,7 @@ static inline void set_page_private_##name(struct page *page) \ if (!PagePrivate(page)) { \ get_page(page); \ SetPagePrivate(page); \ + set_page_private(page, 0); \ } \ set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \ set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ @@ -1392,6 +1394,7 @@ static inline void set_page_private_data(struct page *page, unsigned long data) if (!PagePrivate(page)) { get_page(page); SetPagePrivate(page); + set_page_private(page, 0); } set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); page_private(page) |= data << PAGE_PRIVATE_MAX; From 28607bf3aa6f9762b32dc7f1ed0488823c0651b8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Jul 2021 22:05:06 -0700 Subject: [PATCH 32/32] f2fs: drop dirty node pages when cp is in error status Otherwise, writeback is going to fall in a loop to flush dirty inode forever before getting SBI_CLOSING. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dd611efa8aa4..0be9e2d7120e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1549,13 +1549,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, trace_f2fs_writepage(page, NODE); if (unlikely(f2fs_cp_error(sbi))) { - if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { - ClearPageUptodate(page); - dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); - return 0; - } - goto redirty_out; + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))