mm: introduce memalloc_retry_wait()
Various places in the kernel - largely in filesystems - respond to a memory allocation failure by looping around and re-trying. Some of these cannot conveniently use __GFP_NOFAIL, for reasons such as: - a GFP_ATOMIC allocation, which __GFP_NOFAIL doesn't work on - a need to check for the process being signalled between failures - the possibility that other recovery actions could be performed - the allocation is quite deep in support code, and passing down an extra flag to say if __GFP_NOFAIL is wanted would be clumsy. Many of these currently use congestion_wait() which (in almost all cases) simply waits the given timeout - congestion isn't tracked for most devices. It isn't clear what the best delay is for loops, but it is clear that the various filesystems shouldn't be responsible for choosing a timeout. This patch introduces memalloc_retry_wait() with takes on that responsibility. Code that wants to retry a memory allocation can call this function passing the GFP flags that were used. It will wait however is appropriate. For now, it only considers __GFP_NORETRY and whatever gfpflags_allow_blocking() tests. If blocking is allowed without __GFP_NORETRY, then alloc_page either made some reclaim progress, or waited for a while, before failing. So there is no need for much further waiting. memalloc_retry_wait() will wait until the current jiffie ends. If this condition is not met, then alloc_page() won't have waited much if at all. In that case memalloc_retry_wait() waits about 200ms. This is the delay that most current loops uses. linux/sched/mm.h needs to be included in some files now, but linux/backing-dev.h does not. Link: https://lkml.kernel.org/r/163754371968.13692.1277530886009912421@noble.neil.brown.name Signed-off-by: NeilBrown <neilb@suse.de> Cc: Dave Chinner <david@fromorbit.com> Cc: Michal Hocko <mhocko@suse.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Chao Yu <chao@kernel.org> Cc: Darrick J. Wong <djwong@kernel.org> Cc: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
704687deaa
commit
4034247a0d
@ -27,8 +27,8 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4_extents.h"
|
||||
#include "xattr.h"
|
||||
@ -4407,8 +4407,7 @@ retry:
|
||||
err = ext4_es_remove_extent(inode, last_block,
|
||||
EXT_MAX_BLOCKS - last_block);
|
||||
if (err == -ENOMEM) {
|
||||
cond_resched();
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
||||
memalloc_retry_wait(GFP_ATOMIC);
|
||||
goto retry;
|
||||
}
|
||||
if (err)
|
||||
@ -4416,8 +4415,7 @@ retry:
|
||||
retry_remove_space:
|
||||
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
|
||||
if (err == -ENOMEM) {
|
||||
cond_resched();
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
||||
memalloc_retry_wait(GFP_ATOMIC);
|
||||
goto retry_remove_space;
|
||||
}
|
||||
return err;
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
@ -1929,8 +1929,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
|
||||
retry:
|
||||
err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
||||
if (err == -ENOMEM) {
|
||||
cond_resched();
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
||||
memalloc_retry_wait(GFP_ATOMIC);
|
||||
goto retry;
|
||||
}
|
||||
if (err)
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
@ -523,12 +523,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
ret = PTR_ERR(bounce_page);
|
||||
if (ret == -ENOMEM &&
|
||||
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
|
||||
gfp_flags = GFP_NOFS;
|
||||
gfp_t new_gfp_flags = GFP_NOFS;
|
||||
if (io->io_bio)
|
||||
ext4_io_submit(io);
|
||||
else
|
||||
gfp_flags |= __GFP_NOFAIL;
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
||||
new_gfp_flags |= __GFP_NOFAIL;
|
||||
memalloc_retry_wait(gfp_flags);
|
||||
gfp_flags = new_gfp_flags;
|
||||
goto retry_encrypt;
|
||||
}
|
||||
|
||||
|
@ -8,9 +8,9 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/f2fs_fs.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/bio.h>
|
||||
@ -2542,7 +2542,7 @@ retry_encrypt:
|
||||
/* flush pending IOs and wait for a while in the ENOMEM case */
|
||||
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
|
||||
f2fs_flush_merged_writes(fio->sbi);
|
||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
gfp_flags |= __GFP_NOFAIL;
|
||||
goto retry_encrypt;
|
||||
}
|
||||
|
@ -7,7 +7,6 @@
|
||||
*/
|
||||
#include <linux/fs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/f2fs_fs.h>
|
||||
#include <linux/kthread.h>
|
||||
@ -15,6 +14,7 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "f2fs.h"
|
||||
#include "node.h"
|
||||
@ -1375,8 +1375,7 @@ retry:
|
||||
if (err) {
|
||||
clear_page_private_gcing(page);
|
||||
if (err == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC,
|
||||
DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry;
|
||||
}
|
||||
if (is_dirty)
|
||||
|
@ -8,8 +8,8 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/f2fs_fs.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "f2fs.h"
|
||||
#include "node.h"
|
||||
@ -562,7 +562,7 @@ retry:
|
||||
inode = f2fs_iget(sb, ino);
|
||||
if (IS_ERR(inode)) {
|
||||
if (PTR_ERR(inode) == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/f2fs_fs.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/swap.h>
|
||||
@ -2750,7 +2750,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
|
||||
retry:
|
||||
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
|
||||
if (!ipage) {
|
||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/f2fs_fs.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "f2fs.h"
|
||||
#include "node.h"
|
||||
#include "segment.h"
|
||||
@ -587,7 +588,7 @@ retry_dn:
|
||||
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
|
||||
if (err) {
|
||||
if (err == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry_dn;
|
||||
}
|
||||
goto out;
|
||||
@ -670,8 +671,7 @@ retry_prev:
|
||||
err = check_index_in_prev_nodes(sbi, dest, &dn);
|
||||
if (err) {
|
||||
if (err == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC,
|
||||
DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry_prev;
|
||||
}
|
||||
goto err;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/f2fs_fs.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/swap.h>
|
||||
@ -245,9 +246,7 @@ retry:
|
||||
LOOKUP_NODE);
|
||||
if (err) {
|
||||
if (err == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC,
|
||||
DEFAULT_IO_TIMEOUT);
|
||||
cond_resched();
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry;
|
||||
}
|
||||
err = -EAGAIN;
|
||||
@ -424,9 +423,7 @@ retry:
|
||||
err = f2fs_do_write_data_page(&fio);
|
||||
if (err) {
|
||||
if (err == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC,
|
||||
DEFAULT_IO_TIMEOUT);
|
||||
cond_resched();
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto retry;
|
||||
}
|
||||
unlock_page(page);
|
||||
|
@ -8,9 +8,9 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/statfs.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/parser.h>
|
||||
#include <linux/mount.h>
|
||||
@ -2415,8 +2415,7 @@ repeat:
|
||||
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
|
||||
if (IS_ERR(page)) {
|
||||
if (PTR_ERR(page) == -ENOMEM) {
|
||||
congestion_wait(BLK_RW_ASYNC,
|
||||
DEFAULT_IO_TIMEOUT);
|
||||
memalloc_retry_wait(GFP_NOFS);
|
||||
goto repeat;
|
||||
}
|
||||
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
||||
|
@ -4,7 +4,6 @@
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include <linux/backing-dev.h>
|
||||
#include "xfs_message.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -26,6 +25,6 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
|
||||
"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
|
||||
current->comm, current->pid,
|
||||
(unsigned int)size, __func__, lflags);
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
||||
memalloc_retry_wait(lflags);
|
||||
} while (1);
|
||||
}
|
||||
|
@ -394,7 +394,7 @@ xfs_buf_alloc_pages(
|
||||
}
|
||||
|
||||
XFS_STATS_INC(bp->b_mount, xb_page_retries);
|
||||
congestion_wait(BLK_RW_ASYNC, HZ / 50);
|
||||
memalloc_retry_wait(gfp_mask);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -214,6 +214,32 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
|
||||
static inline void fs_reclaim_release(gfp_t gfp_mask) { }
|
||||
#endif
|
||||
|
||||
/* Any memory-allocation retry loop should use
|
||||
* memalloc_retry_wait(), and pass the flags for the most
|
||||
* constrained allocation attempt that might have failed.
|
||||
* This provides useful documentation of where loops are,
|
||||
* and a central place to fine tune the waiting as the MM
|
||||
* implementation changes.
|
||||
*/
|
||||
static inline void memalloc_retry_wait(gfp_t gfp_flags)
|
||||
{
|
||||
/* We use io_schedule_timeout because waiting for memory
|
||||
* typically included waiting for dirty pages to be
|
||||
* written out, which requires IO.
|
||||
*/
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
gfp_flags = current_gfp_context(gfp_flags);
|
||||
if (gfpflags_allow_blocking(gfp_flags) &&
|
||||
!(gfp_flags & __GFP_NORETRY))
|
||||
/* Probably waited already, no need for much more */
|
||||
io_schedule_timeout(1);
|
||||
else
|
||||
/* Probably didn't wait, and has now released a lock,
|
||||
* so now is a good time to wait
|
||||
*/
|
||||
io_schedule_timeout(HZ/50);
|
||||
}
|
||||
|
||||
/**
|
||||
* might_alloc - Mark possible allocation sites
|
||||
* @gfp_mask: gfp_t flags that would be used to allocate
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
@ -688,7 +689,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
|
||||
return -EINTR;
|
||||
}
|
||||
trace_svc_alloc_arg_err(pages);
|
||||
schedule_timeout(msecs_to_jiffies(500));
|
||||
memalloc_retry_wait(GFP_KERNEL);
|
||||
}
|
||||
rqstp->rq_page_end = &rqstp->rq_pages[pages];
|
||||
rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
|
||||
|
Loading…
Reference in New Issue
Block a user