mm/vmscan.c: refactor shrink_node()
This patch refactors shrink_node() to improve readability for the upcoming changes to mm/vmscan.c. Link: https://lkml.kernel.org/r/20220918080010.2920238-4-yuzhao@google.com Signed-off-by: Yu Zhao <yuzhao@google.com> Reviewed-by: Barry Song <baohua@kernel.org> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> Acked-by: Brian Geffon <bgeffon@google.com> Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org> Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name> Acked-by: Steven Barrett <steven@liquorix.net> Acked-by: Suleiman Souhlal <suleiman@google.com> Tested-by: Daniel Byrne <djbyrne@mtu.edu> Tested-by: Donald Carr <d@chaos-reins.com> Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com> Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru> Tested-by: Shuang Zhai <szhai2@cs.rochester.edu> Tested-by: Sofia Trinh <sofia.trinh@edi.works> Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Hillf Danton <hdanton@sina.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Rapoport <rppt@kernel.org> Cc: Mike Rapoport <rppt@linux.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
eed9a328aa
commit
f1e1a7be47
198
mm/vmscan.c
198
mm/vmscan.c
@ -2728,6 +2728,109 @@ enum scan_balance {
|
||||
SCAN_FILE,
|
||||
};
|
||||
|
||||
static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
|
||||
{
|
||||
unsigned long file;
|
||||
struct lruvec *target_lruvec;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
/*
|
||||
* Flush the memory cgroup stats, so that we read accurate per-memcg
|
||||
* lruvec stats for heuristics.
|
||||
*/
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
/*
|
||||
* Determine the scan balance between anon and file LRUs.
|
||||
*/
|
||||
spin_lock_irq(&target_lruvec->lru_lock);
|
||||
sc->anon_cost = target_lruvec->anon_cost;
|
||||
sc->file_cost = target_lruvec->file_cost;
|
||||
spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
|
||||
/*
|
||||
* Target desirable inactive:active list ratios for the anon
|
||||
* and file LRU lists.
|
||||
*/
|
||||
if (!sc->force_deactivate) {
|
||||
unsigned long refaults;
|
||||
|
||||
/*
|
||||
* When refaults are being observed, it means a new
|
||||
* workingset is being established. Deactivate to get
|
||||
* rid of any stale active pages quickly.
|
||||
*/
|
||||
refaults = lruvec_page_state(target_lruvec,
|
||||
WORKINGSET_ACTIVATE_ANON);
|
||||
if (refaults != target_lruvec->refaults[WORKINGSET_ANON] ||
|
||||
inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
else
|
||||
sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
|
||||
refaults = lruvec_page_state(target_lruvec,
|
||||
WORKINGSET_ACTIVATE_FILE);
|
||||
if (refaults != target_lruvec->refaults[WORKINGSET_FILE] ||
|
||||
inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
else
|
||||
sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
} else
|
||||
sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
|
||||
/*
|
||||
* If we have plenty of inactive file pages that aren't
|
||||
* thrashing, try to reclaim those first before touching
|
||||
* anonymous pages.
|
||||
*/
|
||||
file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
sc->cache_trim_mode = 1;
|
||||
else
|
||||
sc->cache_trim_mode = 0;
|
||||
|
||||
/*
|
||||
* Prevent the reclaimer from falling into the cache trap: as
|
||||
* cache pages start out inactive, every cache fault will tip
|
||||
* the scan balance towards the file LRU. And as the file LRU
|
||||
* shrinks, so does the window for rotation from references.
|
||||
* This means we have a runaway feedback loop where a tiny
|
||||
* thrashing file LRU becomes infinitely more attractive than
|
||||
* anon pages. Try to detect this based on file LRU size.
|
||||
*/
|
||||
if (!cgroup_reclaim(sc)) {
|
||||
unsigned long total_high_wmark = 0;
|
||||
unsigned long free, anon;
|
||||
int z;
|
||||
|
||||
free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
|
||||
for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
struct zone *zone = &pgdat->node_zones[z];
|
||||
|
||||
if (!managed_zone(zone))
|
||||
continue;
|
||||
|
||||
total_high_wmark += high_wmark_pages(zone);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider anon: if that's low too, this isn't a
|
||||
* runaway file reclaim problem, but rather just
|
||||
* extreme pressure. Reclaim as per usual then.
|
||||
*/
|
||||
anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
|
||||
sc->file_is_tiny =
|
||||
file + free <= total_high_wmark &&
|
||||
!(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
anon >> sc->priority;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine how aggressively the anon and file LRU lists should be
|
||||
* scanned.
|
||||
@ -3197,109 +3300,16 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
||||
unsigned long nr_reclaimed, nr_scanned;
|
||||
struct lruvec *target_lruvec;
|
||||
bool reclaimable = false;
|
||||
unsigned long file;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
again:
|
||||
/*
|
||||
* Flush the memory cgroup stats, so that we read accurate per-memcg
|
||||
* lruvec stats for heuristics.
|
||||
*/
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
memset(&sc->nr, 0, sizeof(sc->nr));
|
||||
|
||||
nr_reclaimed = sc->nr_reclaimed;
|
||||
nr_scanned = sc->nr_scanned;
|
||||
|
||||
/*
|
||||
* Determine the scan balance between anon and file LRUs.
|
||||
*/
|
||||
spin_lock_irq(&target_lruvec->lru_lock);
|
||||
sc->anon_cost = target_lruvec->anon_cost;
|
||||
sc->file_cost = target_lruvec->file_cost;
|
||||
spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
|
||||
/*
|
||||
* Target desirable inactive:active list ratios for the anon
|
||||
* and file LRU lists.
|
||||
*/
|
||||
if (!sc->force_deactivate) {
|
||||
unsigned long refaults;
|
||||
|
||||
/*
|
||||
* When refaults are being observed, it means a new
|
||||
* workingset is being established. Deactivate to get
|
||||
* rid of any stale active pages quickly.
|
||||
*/
|
||||
refaults = lruvec_page_state(target_lruvec,
|
||||
WORKINGSET_ACTIVATE_ANON);
|
||||
if (refaults != target_lruvec->refaults[WORKINGSET_ANON] ||
|
||||
inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
else
|
||||
sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
|
||||
refaults = lruvec_page_state(target_lruvec,
|
||||
WORKINGSET_ACTIVATE_FILE);
|
||||
if (refaults != target_lruvec->refaults[WORKINGSET_FILE] ||
|
||||
inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
else
|
||||
sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
} else
|
||||
sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
|
||||
/*
|
||||
* If we have plenty of inactive file pages that aren't
|
||||
* thrashing, try to reclaim those first before touching
|
||||
* anonymous pages.
|
||||
*/
|
||||
file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
sc->cache_trim_mode = 1;
|
||||
else
|
||||
sc->cache_trim_mode = 0;
|
||||
|
||||
/*
|
||||
* Prevent the reclaimer from falling into the cache trap: as
|
||||
* cache pages start out inactive, every cache fault will tip
|
||||
* the scan balance towards the file LRU. And as the file LRU
|
||||
* shrinks, so does the window for rotation from references.
|
||||
* This means we have a runaway feedback loop where a tiny
|
||||
* thrashing file LRU becomes infinitely more attractive than
|
||||
* anon pages. Try to detect this based on file LRU size.
|
||||
*/
|
||||
if (!cgroup_reclaim(sc)) {
|
||||
unsigned long total_high_wmark = 0;
|
||||
unsigned long free, anon;
|
||||
int z;
|
||||
|
||||
free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
|
||||
for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
struct zone *zone = &pgdat->node_zones[z];
|
||||
if (!managed_zone(zone))
|
||||
continue;
|
||||
|
||||
total_high_wmark += high_wmark_pages(zone);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider anon: if that's low too, this isn't a
|
||||
* runaway file reclaim problem, but rather just
|
||||
* extreme pressure. Reclaim as per usual then.
|
||||
*/
|
||||
anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
|
||||
sc->file_is_tiny =
|
||||
file + free <= total_high_wmark &&
|
||||
!(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
anon >> sc->priority;
|
||||
}
|
||||
prepare_scan_count(pgdat, sc);
|
||||
|
||||
shrink_node_memcgs(pgdat, sc);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user