Changes since last update:

- get rid of magical page->mapping type marks;
 
  - switch to inplace I/O under low memory scenario;
 
  - return the correct block number for bmap();
 
  - some minor cleanups.
 -----BEGIN PGP SIGNATURE-----
 
 iIsEABYIADMWIQThPAmQN9sSA0DVxtI5NzHcH7XmBAUCX9iA5xUcaHNpYW5na2Fv
 QHJlZGhhdC5jb20ACgkQOTcx3B+15gT+gAD+N8HcFqJk0vgLih5ud1TmM9tWlYY0
 7UYQnvRn6OuXDEUA+waXg+zutWVzHBP6cnVHGmcZVp3elsZB1U05sg2TIKkP
 =m8At
 -----END PGP SIGNATURE-----

Merge tag 'erofs-for-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "This cycle we got rid of magical page->mapping type marks for
  temporary pages which had some concern before, now such usage is
  replaced with specific page->private.

  Also switch to inplace I/O instead of allocating extra cached pages to
  avoid direct reclaim under low memory scenario.

  There are some bmap bugfix and minor cleanups as well"

* tag 'erofs-for-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: avoid using generic_block_bmap
  erofs: force inplace I/O under low memory scenario
  erofs: simplify try_to_claim_pcluster()
  erofs: insert to managed cache after adding to pcl
  erofs: get rid of magical Z_EROFS_MAPPING_STAGING
  erofs: remove a void EROFS_VERSION macro set in Makefile
This commit is contained in:
Linus Torvalds 2020-12-15 18:58:27 -08:00
commit e88bd82698
6 changed files with 153 additions and 115 deletions

View File

@ -1,11 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
EROFS_VERSION = "1.0"
ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\"
obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o

View File

@ -26,30 +26,58 @@ struct z_erofs_decompress_req {
bool inplace_io, partial_decoding;
};
/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
/*
* - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
* used to mark temporary allocated pages from other
* file/cached pages and NULL mapping pages.
* For all pages in a pcluster, page->private should be one of
* Type Last 2bits page->private
* short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
* preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
* cached/managed page 00 pointer to z_erofs_pcluster
* online page (file-backed, 01/10/11 sub-index << 2 | count
* some pages can be used for inplace I/O)
*
* page->mapping should be one of
* Type page->mapping
* short-lived page NULL
* preallocated page NULL
* cached/managed page non-NULL or NULL (invalidated/truncated page)
* online page non-NULL
*
* For all managed pages, PG_private should be set with 1 extra refcount,
* which is used for page reclaim / migration.
*/
#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
/* check if a page is marked as staging */
static inline bool z_erofs_page_is_staging(struct page *page)
/*
* short-lived pages are pages directly from buddy system with specific
* page->private (no need to set PagePrivate since these are non-LRU /
* non-movable pages and bypass reclaim / migration code).
*/
static inline bool z_erofs_is_shortlived_page(struct page *page)
{
return page->mapping == Z_EROFS_MAPPING_STAGING;
}
static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
struct page *page)
{
if (!z_erofs_page_is_staging(page))
if (page->private != Z_EROFS_SHORTLIVED_PAGE)
return false;
/* staging pages should not be used by others at the same time */
if (page_ref_count(page) > 1)
DBG_BUGON(page->mapping);
return true;
}
static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
struct page *page)
{
if (!z_erofs_is_shortlived_page(page))
return false;
/* short-lived pages should not be used by others at the same time */
if (page_ref_count(page) > 1) {
put_page(page);
else
} else {
/* follow the pcluster rule above. */
set_page_private(page, 0);
list_add(&page->lru, pagepool);
}
return true;
}

View File

@ -312,27 +312,12 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
submit_bio(bio);
}
static int erofs_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
struct erofs_map_blocks map = {
.m_la = iblock << 9,
};
int err;
err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
if (err)
return err;
if (map.m_flags & EROFS_MAP_MAPPED)
bh->b_blocknr = erofs_blknr(map.m_pa);
return err;
}
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
struct erofs_map_blocks map = {
.m_la = blknr_to_addr(block),
};
if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
@ -341,7 +326,10 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return 0;
}
return generic_block_bmap(mapping, block, erofs_get_block);
if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW))
return erofs_blknr(map.m_pa);
return 0;
}
/* for uncompressed (aligned) files and raw access for other files */

View File

@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
victim = erofs_allocpage(pagepool, GFP_KERNEL);
if (!victim)
return -ENOMEM;
victim->mapping = Z_EROFS_MAPPING_STAGING;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
}

View File

@ -20,6 +20,11 @@
enum z_erofs_cache_alloctype {
DONTALLOC, /* don't allocate any cached pages */
DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
/*
* try to use cached I/O if page allocation succeeds or fallback
* to in-place I/O instead to avoid any direct reclaim.
*/
TRYALLOC,
};
/*
@ -154,13 +159,16 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
static void preload_compressed_pages(struct z_erofs_collector *clt,
struct address_space *mc,
enum z_erofs_cache_alloctype type)
enum z_erofs_cache_alloctype type,
struct list_head *pagepool)
{
const struct z_erofs_pcluster *pcl = clt->pcl;
const unsigned int clusterpages = BIT(pcl->clusterbits);
struct page **pages = clt->compressedpages;
pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
bool standalone = true;
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
return;
@ -168,6 +176,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
struct page *page;
compressed_page_t t;
struct page *newpage = NULL;
/* the compressed page was loaded before */
if (READ_ONCE(*pages))
@ -179,7 +188,15 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
t = tag_compressed_page_justfound(page);
} else if (type == DELAYEDALLOC) {
t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
} else if (type == TRYALLOC) {
newpage = erofs_allocpage(pagepool, gfp);
if (!newpage)
goto dontalloc;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
t = tag_compressed_page_justfound(newpage);
} else { /* DONTALLOC */
dontalloc:
if (standalone)
clt->compressedpages = pages;
standalone = false;
@ -189,8 +206,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
continue;
if (page)
if (page) {
put_page(page);
} else if (newpage) {
set_page_private(newpage, 0);
list_add(&newpage->lru, pagepool);
}
}
if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@ -226,11 +247,8 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
/* barrier is implied in the following 'unlock_page' */
WRITE_ONCE(pcl->compressed_pages[i], NULL);
set_page_private(page, 0);
ClearPagePrivate(page);
detach_page_private(page);
unlock_page(page);
put_page(page);
}
return 0;
}
@ -254,10 +272,8 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
}
erofs_workgroup_unfreeze(&pcl->obj, 1);
if (ret) {
ClearPagePrivate(page);
put_page(page);
}
if (ret)
detach_page_private(page);
}
return ret;
}
@ -297,34 +313,33 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
return ret ? 0 : -EAGAIN;
}
static enum z_erofs_collectmode
try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
z_erofs_next_pcluster_t *owned_head)
static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
{
/* let's claim these following types of pclusters */
retry:
if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
/* type 1, nil pcluster */
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
*owned_head) != Z_EROFS_PCLUSTER_NIL)
goto retry;
struct z_erofs_pcluster *pcl = clt->pcl;
z_erofs_next_pcluster_t *owned_head = &clt->owned_head;
/* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
*owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
/* lucky, I am the followee :) */
return COLLECT_PRIMARY_FOLLOWED;
} else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
/* so we can attach this pcluster to our submission chain. */
clt->mode = COLLECT_PRIMARY_FOLLOWED;
return;
}
/*
* type 2, link to the end of a existing open chain,
* be careful that its submission itself is governed
* by the original owned chain.
* type 2, link to the end of an existing open chain, be careful
* that its submission is controlled by the original attached chain.
*/
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
*owned_head) != Z_EROFS_PCLUSTER_TAIL)
goto retry;
*owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
return COLLECT_PRIMARY_HOOKED;
clt->mode = COLLECT_PRIMARY_HOOKED;
clt->tailpcl = NULL;
return;
}
return COLLECT_PRIMARY; /* :( better luck next time */
/* type 3, it belongs to a chain, but it isn't the end of the chain */
clt->mode = COLLECT_PRIMARY;
}
static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
@ -369,10 +384,8 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
/* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = pcl;
clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = NULL;
z_erofs_try_to_claim_pcluster(clt);
clt->cl = cl;
return 0;
}
@ -562,7 +575,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page)
struct page *page, struct list_head *pagepool)
{
struct inode *const inode = fe->inode;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@ -615,11 +628,12 @@ restart_now:
/* preload all compressed pages (maybe downgrade role if necessary) */
if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
cache_strategy = DELAYEDALLOC;
cache_strategy = TRYALLOC;
else
cache_strategy = DONTALLOC;
preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
preload_compressed_pages(clt, MNGD_MAPPING(sbi),
cache_strategy, pagepool);
hitted:
/*
@ -648,12 +662,12 @@ hitted:
retry:
err = z_erofs_attach_page(clt, page, page_type);
/* should allocate an additional staging page for pagevec */
/* should allocate an additional short-lived page for pagevec */
if (err == -EAGAIN) {
struct page *const newpage =
alloc_page(GFP_NOFS | __GFP_NOFAIL);
newpage->mapping = Z_EROFS_MAPPING_STAGING;
set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
err = z_erofs_attach_page(clt, newpage,
Z_EROFS_PAGE_TYPE_EXCLUSIVE);
if (!err)
@ -710,6 +724,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
queue_work(z_erofs_workqueue, &io->u.work);
}
static bool z_erofs_page_is_invalidated(struct page *page)
{
return !page->mapping && !z_erofs_is_shortlived_page(page);
}
static void z_erofs_decompressqueue_endio(struct bio *bio)
{
tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
@ -722,7 +741,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
struct page *page = bvec->bv_page;
DBG_BUGON(PageUptodate(page));
DBG_BUGON(!page->mapping);
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (err)
SetPageError(page);
@ -795,9 +814,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
/* all pages in pagevec ought to be valid */
DBG_BUGON(!page);
DBG_BUGON(!page->mapping);
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (z_erofs_put_stagingpage(pagepool, page))
if (z_erofs_put_shortlivedpage(pagepool, page))
continue;
if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
@ -831,9 +850,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
/* all compressed pages ought to be valid */
DBG_BUGON(!page);
DBG_BUGON(!page->mapping);
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_page_is_staging(page)) {
if (!z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (!PageUptodate(page))
err = -EIO;
@ -858,7 +877,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = true;
}
/* PG_error needs checking for inplaced and staging pages */
/* PG_error needs checking for all non-managed pages */
if (PageError(page)) {
DBG_BUGON(PageUptodate(page));
err = -EIO;
@ -897,8 +916,8 @@ out:
if (erofs_page_is_managed(sbi, page))
continue;
/* recycle all individual staging pages */
(void)z_erofs_put_stagingpage(pagepool, page);
/* recycle all individual short-lived pages */
(void)z_erofs_put_shortlivedpage(pagepool, page);
WRITE_ONCE(compressed_pages[i], NULL);
}
@ -908,10 +927,10 @@ out:
if (!page)
continue;
DBG_BUGON(!page->mapping);
DBG_BUGON(z_erofs_page_is_invalidated(page));
/* recycle all individual staging pages */
if (z_erofs_put_stagingpage(pagepool, page))
/* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(pagepool, page))
continue;
if (err < 0)
@ -1008,16 +1027,30 @@ repeat:
justfound = tagptr_unfold_tags(t);
page = tagptr_unfold_ptr(t);
/*
* preallocated cached pages, which is used to avoid direct reclaim
* otherwise, it will go inplace I/O path instead.
*/
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
WRITE_ONCE(pcl->compressed_pages[nr], page);
set_page_private(page, 0);
tocache = true;
goto out_tocache;
}
mapping = READ_ONCE(page->mapping);
/*
* unmanaged (file) pages are all locked solidly,
* file-backed online pages in plcuster are all locked steady,
* therefore it is impossible for `mapping' to be NULL.
*/
if (mapping && mapping != mc)
/* ought to be unmanaged pages */
goto out;
/* directly return for shortlived page as well */
if (z_erofs_is_shortlived_page(page))
goto out;
lock_page(page);
/* only true if page reclaim goes wrong, should never happen */
@ -1061,28 +1094,21 @@ repeat:
put_page(page);
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
/* non-LRU / non-movable temporary page is needed */
page->mapping = Z_EROFS_MAPPING_STAGING;
tocache = false;
}
if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
if (tocache) {
/* since it added to managed cache successfully */
unlock_page(page);
put_page(page);
} else {
list_add(&page->lru, pagepool);
}
cond_resched();
goto repeat;
}
if (tocache) {
set_page_private(page, (unsigned long)pcl);
SetPagePrivate(page);
out_tocache:
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
/* turn into temporary page if fails (1 ref) */
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
goto out;
}
attach_page_private(page, pcl);
/* drop a refcount added by allocpage (then we have 2 refs here) */
put_page(page);
out: /* the only exit (for tracing and debugging) */
return page;
}
@ -1284,7 +1310,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
err = z_erofs_do_read_page(&f, page);
err = z_erofs_do_read_page(&f, page, &pagepool);
(void)z_erofs_collector_end(&f.clt);
/* if some compressed cluster ready, need submit them anyway */
@ -1338,7 +1364,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
/* traversal in reverse order */
head = (void *)page_private(page);
err = z_erofs_do_read_page(&f, page);
err = z_erofs_do_read_page(&f, page, &pagepool);
if (err)
erofs_err(inode->i_sb,
"readahead error at page %lu @ nid %llu",

View File

@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
set_page_private(page, 0);
ClearPagePrivate(page);
if (!PageError(page))
SetPageUptodate(page);