Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "More MM work: a memcg scalability improvememt" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm/lru: revise the comments of lru_lock mm/lru: introduce relock_page_lruvec() mm/lru: replace pgdat lru_lock with lruvec lock mm/swap.c: serialize memcg changes in pagevec_lru_move_fn mm/compaction: do page isolation first in compaction mm/lru: introduce TestClearPageLRU() mm/mlock: remove __munlock_isolate_lru_page() mm/mlock: remove lru_lock on TestClearPageMlocked mm/vmscan: remove lruvec reget in move_pages_to_lru mm/lru: move lock into lru_note_cost mm/swap.c: fold vm event PGROTATED into pagevec_move_tail_fn mm/memcg: add debug checking in lock_page_memcg mm: page_idle_get_page() does not need lru_lock mm/rmap: stop store reordering issue on page->mapping mm/vmscan: remove unnecessary lruvec adding mm/thp: narrow lru locking mm/thp: simplify lru_add_page_tail() mm/thp: use head for head page in lru_add_page_tail() mm/thp: move lru_add_page_tail() to huge_memory.c
This commit is contained in:
commit
5b200f5789
@ -133,18 +133,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
|||||||
|
|
||||||
8. LRU
|
8. LRU
|
||||||
======
|
======
|
||||||
Each memcg has its own private LRU. Now, its handling is under global
|
Each memcg has its own vector of LRUs (inactive anon, active anon,
|
||||||
VM's control (means that it's handled under global pgdat->lru_lock).
|
inactive file, active file, unevictable) of pages from each node,
|
||||||
Almost all routines around memcg's LRU is called by global LRU's
|
each LRU handled under a single lru_lock for that memcg and node.
|
||||||
list management functions under pgdat->lru_lock.
|
|
||||||
|
|
||||||
A special function is mem_cgroup_isolate_pages(). This scans
|
|
||||||
memcg's private LRU and call __isolate_lru_page() to extract a page
|
|
||||||
from LRU.
|
|
||||||
|
|
||||||
(By __isolate_lru_page(), the page is removed from both of global and
|
|
||||||
private LRU.)
|
|
||||||
|
|
||||||
|
|
||||||
9. Typical Tests.
|
9. Typical Tests.
|
||||||
=================
|
=================
|
||||||
|
@ -287,20 +287,17 @@ When oom event notifier is registered, event will be delivered.
|
|||||||
2.6 Locking
|
2.6 Locking
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
lock_page_cgroup()/unlock_page_cgroup() should not be called under
|
Lock order is as follows:
|
||||||
the i_pages lock.
|
|
||||||
|
|
||||||
Other lock order is following:
|
Page lock (PG_locked bit of page->flags)
|
||||||
|
mm->page_table_lock or split pte_lock
|
||||||
|
lock_page_memcg (memcg->move_lock)
|
||||||
|
mapping->i_pages lock
|
||||||
|
lruvec->lru_lock.
|
||||||
|
|
||||||
PG_locked.
|
Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by
|
||||||
mm->page_table_lock
|
lruvec->lru_lock; PG_lru bit of page->flags is cleared before
|
||||||
pgdat->lru_lock
|
isolating a page from its LRU under lruvec->lru_lock.
|
||||||
lock_page_cgroup.
|
|
||||||
|
|
||||||
In many cases, just lock_page_cgroup() is called.
|
|
||||||
|
|
||||||
per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
|
|
||||||
pgdat->lru_lock, it has no lock of its own.
|
|
||||||
|
|
||||||
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
|
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
|
@ -69,7 +69,7 @@ When pages are freed in batch, the also mm_page_free_batched is triggered.
|
|||||||
Broadly speaking, pages are taken off the LRU lock in bulk and
|
Broadly speaking, pages are taken off the LRU lock in bulk and
|
||||||
freed in batch with a page list. Significant amounts of activity here could
|
freed in batch with a page list. Significant amounts of activity here could
|
||||||
indicate that the system is under memory pressure and can also indicate
|
indicate that the system is under memory pressure and can also indicate
|
||||||
contention on the zone->lru_lock.
|
contention on the lruvec->lru_lock.
|
||||||
|
|
||||||
4. Per-CPU Allocator Activity
|
4. Per-CPU Allocator Activity
|
||||||
=============================
|
=============================
|
||||||
|
@ -33,7 +33,7 @@ reclaim in Linux. The problems have been observed at customer sites on large
|
|||||||
memory x86_64 systems.
|
memory x86_64 systems.
|
||||||
|
|
||||||
To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
|
To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
|
||||||
main memory will have over 32 million 4k pages in a single zone. When a large
|
main memory will have over 32 million 4k pages in a single node. When a large
|
||||||
fraction of these pages are not evictable for any reason [see below], vmscan
|
fraction of these pages are not evictable for any reason [see below], vmscan
|
||||||
will spend a lot of time scanning the LRU lists looking for the small fraction
|
will spend a lot of time scanning the LRU lists looking for the small fraction
|
||||||
of pages that are evictable. This can result in a situation where all CPUs are
|
of pages that are evictable. This can result in a situation where all CPUs are
|
||||||
@ -55,7 +55,7 @@ unevictable, either by definition or by circumstance, in the future.
|
|||||||
The Unevictable Page List
|
The Unevictable Page List
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
|
The Unevictable LRU infrastructure consists of an additional, per-node, LRU list
|
||||||
called the "unevictable" list and an associated page flag, PG_unevictable, to
|
called the "unevictable" list and an associated page flag, PG_unevictable, to
|
||||||
indicate that the page is being managed on the unevictable list.
|
indicate that the page is being managed on the unevictable list.
|
||||||
|
|
||||||
@ -84,15 +84,9 @@ The unevictable list does not differentiate between file-backed and anonymous,
|
|||||||
swap-backed pages. This differentiation is only important while the pages are,
|
swap-backed pages. This differentiation is only important while the pages are,
|
||||||
in fact, evictable.
|
in fact, evictable.
|
||||||
|
|
||||||
The unevictable list benefits from the "arrayification" of the per-zone LRU
|
The unevictable list benefits from the "arrayification" of the per-node LRU
|
||||||
lists and statistics originally proposed and posted by Christoph Lameter.
|
lists and statistics originally proposed and posted by Christoph Lameter.
|
||||||
|
|
||||||
The unevictable list does not use the LRU pagevec mechanism. Rather,
|
|
||||||
unevictable pages are placed directly on the page's zone's unevictable list
|
|
||||||
under the zone lru_lock. This allows us to prevent the stranding of pages on
|
|
||||||
the unevictable list when one task has the page isolated from the LRU and other
|
|
||||||
tasks are changing the "evictability" state of the page.
|
|
||||||
|
|
||||||
|
|
||||||
Memory Control Group Interaction
|
Memory Control Group Interaction
|
||||||
--------------------------------
|
--------------------------------
|
||||||
@ -101,8 +95,8 @@ The unevictable LRU facility interacts with the memory control group [aka
|
|||||||
memory controller; see Documentation/admin-guide/cgroup-v1/memory.rst] by extending the
|
memory controller; see Documentation/admin-guide/cgroup-v1/memory.rst] by extending the
|
||||||
lru_list enum.
|
lru_list enum.
|
||||||
|
|
||||||
The memory controller data structure automatically gets a per-zone unevictable
|
The memory controller data structure automatically gets a per-node unevictable
|
||||||
list as a result of the "arrayification" of the per-zone LRU lists (one per
|
list as a result of the "arrayification" of the per-node LRU lists (one per
|
||||||
lru_list enum element). The memory controller tracks the movement of pages to
|
lru_list enum element). The memory controller tracks the movement of pages to
|
||||||
and from the unevictable list.
|
and from the unevictable list.
|
||||||
|
|
||||||
@ -196,7 +190,7 @@ for the sake of expediency, to leave a unevictable page on one of the regular
|
|||||||
active/inactive LRU lists for vmscan to deal with. vmscan checks for such
|
active/inactive LRU lists for vmscan to deal with. vmscan checks for such
|
||||||
pages in all of the shrink_{active|inactive|page}_list() functions and will
|
pages in all of the shrink_{active|inactive|page}_list() functions and will
|
||||||
"cull" such pages that it encounters: that is, it diverts those pages to the
|
"cull" such pages that it encounters: that is, it diverts those pages to the
|
||||||
unevictable list for the zone being scanned.
|
unevictable list for the node being scanned.
|
||||||
|
|
||||||
There may be situations where a page is mapped into a VM_LOCKED VMA, but the
|
There may be situations where a page is mapped into a VM_LOCKED VMA, but the
|
||||||
page is not marked as PG_mlocked. Such pages will make it all the way to
|
page is not marked as PG_mlocked. Such pages will make it all the way to
|
||||||
@ -328,7 +322,7 @@ If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
|
|||||||
page from the LRU, as it is likely on the appropriate active or inactive list
|
page from the LRU, as it is likely on the appropriate active or inactive list
|
||||||
at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put
|
at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put
|
||||||
back the page - by calling putback_lru_page() - which will notice that the page
|
back the page - by calling putback_lru_page() - which will notice that the page
|
||||||
is now mlocked and divert the page to the zone's unevictable list. If
|
is now mlocked and divert the page to the node's unevictable list. If
|
||||||
mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
|
mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
|
||||||
it later if and when it attempts to reclaim the page.
|
it later if and when it attempts to reclaim the page.
|
||||||
|
|
||||||
@ -603,7 +597,7 @@ Some examples of these unevictable pages on the LRU lists are:
|
|||||||
unevictable list in mlock_vma_page().
|
unevictable list in mlock_vma_page().
|
||||||
|
|
||||||
shrink_inactive_list() also diverts any unevictable pages that it finds on the
|
shrink_inactive_list() also diverts any unevictable pages that it finds on the
|
||||||
inactive lists to the appropriate zone's unevictable list.
|
inactive lists to the appropriate node's unevictable list.
|
||||||
|
|
||||||
shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
|
shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
|
||||||
after shrink_active_list() had moved them to the inactive list, or pages mapped
|
after shrink_active_list() had moved them to the inactive list, or pages mapped
|
||||||
|
@ -654,12 +654,41 @@ out:
|
|||||||
|
|
||||||
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
|
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
|
||||||
|
|
||||||
|
static inline bool lruvec_holds_page_lru_lock(struct page *page,
|
||||||
|
struct lruvec *lruvec)
|
||||||
|
{
|
||||||
|
pg_data_t *pgdat = page_pgdat(page);
|
||||||
|
const struct mem_cgroup *memcg;
|
||||||
|
struct mem_cgroup_per_node *mz;
|
||||||
|
|
||||||
|
if (mem_cgroup_disabled())
|
||||||
|
return lruvec == &pgdat->__lruvec;
|
||||||
|
|
||||||
|
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||||
|
memcg = page_memcg(page) ? : root_mem_cgroup;
|
||||||
|
|
||||||
|
return lruvec->pgdat == pgdat && mz->memcg == memcg;
|
||||||
|
}
|
||||||
|
|
||||||
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
|
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
|
||||||
|
|
||||||
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
|
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
|
||||||
|
|
||||||
struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
|
struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
|
||||||
|
|
||||||
|
struct lruvec *lock_page_lruvec(struct page *page);
|
||||||
|
struct lruvec *lock_page_lruvec_irq(struct page *page);
|
||||||
|
struct lruvec *lock_page_lruvec_irqsave(struct page *page,
|
||||||
|
unsigned long *flags);
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_VM
|
||||||
|
void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
|
||||||
|
#else
|
||||||
|
static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
|
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
|
||||||
return css ? container_of(css, struct mem_cgroup, css) : NULL;
|
return css ? container_of(css, struct mem_cgroup, css) : NULL;
|
||||||
@ -1167,6 +1196,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
|
|||||||
return &pgdat->__lruvec;
|
return &pgdat->__lruvec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool lruvec_holds_page_lru_lock(struct page *page,
|
||||||
|
struct lruvec *lruvec)
|
||||||
|
{
|
||||||
|
pg_data_t *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
return lruvec == &pgdat->__lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
|
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1192,6 +1229,31 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct lruvec *lock_page_lruvec(struct page *page)
|
||||||
|
{
|
||||||
|
struct pglist_data *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
spin_lock(&pgdat->__lruvec.lru_lock);
|
||||||
|
return &pgdat->__lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
|
||||||
|
{
|
||||||
|
struct pglist_data *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
spin_lock_irq(&pgdat->__lruvec.lru_lock);
|
||||||
|
return &pgdat->__lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
|
||||||
|
unsigned long *flagsp)
|
||||||
|
{
|
||||||
|
struct pglist_data *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
|
||||||
|
return &pgdat->__lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct mem_cgroup *
|
static inline struct mem_cgroup *
|
||||||
mem_cgroup_iter(struct mem_cgroup *root,
|
mem_cgroup_iter(struct mem_cgroup *root,
|
||||||
struct mem_cgroup *prev,
|
struct mem_cgroup *prev,
|
||||||
@ -1411,6 +1473,10 @@ static inline
|
|||||||
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
|
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif /* CONFIG_MEMCG */
|
#endif /* CONFIG_MEMCG */
|
||||||
|
|
||||||
/* idx can be of type enum memcg_stat_item or node_stat_item */
|
/* idx can be of type enum memcg_stat_item or node_stat_item */
|
||||||
@ -1492,6 +1558,50 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
|
|||||||
return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
|
return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void unlock_page_lruvec(struct lruvec *lruvec)
|
||||||
|
{
|
||||||
|
spin_unlock(&lruvec->lru_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void unlock_page_lruvec_irq(struct lruvec *lruvec)
|
||||||
|
{
|
||||||
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
|
||||||
|
unsigned long flags)
|
||||||
|
{
|
||||||
|
spin_unlock_irqrestore(&lruvec->lru_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Don't lock again iff page's lruvec locked */
|
||||||
|
static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
|
||||||
|
struct lruvec *locked_lruvec)
|
||||||
|
{
|
||||||
|
if (locked_lruvec) {
|
||||||
|
if (lruvec_holds_page_lru_lock(page, locked_lruvec))
|
||||||
|
return locked_lruvec;
|
||||||
|
|
||||||
|
unlock_page_lruvec_irq(locked_lruvec);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lock_page_lruvec_irq(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Don't lock again iff page's lruvec locked */
|
||||||
|
static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
|
||||||
|
struct lruvec *locked_lruvec, unsigned long *flags)
|
||||||
|
{
|
||||||
|
if (locked_lruvec) {
|
||||||
|
if (lruvec_holds_page_lru_lock(page, locked_lruvec))
|
||||||
|
return locked_lruvec;
|
||||||
|
|
||||||
|
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lock_page_lruvec_irqsave(page, flags);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
|
|
||||||
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
|
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
|
||||||
|
@ -79,7 +79,7 @@ struct page {
|
|||||||
struct { /* Page cache and anonymous pages */
|
struct { /* Page cache and anonymous pages */
|
||||||
/**
|
/**
|
||||||
* @lru: Pageout list, eg. active_list protected by
|
* @lru: Pageout list, eg. active_list protected by
|
||||||
* pgdat->lru_lock. Sometimes used as a generic list
|
* lruvec->lru_lock. Sometimes used as a generic list
|
||||||
* by the page owner.
|
* by the page owner.
|
||||||
*/
|
*/
|
||||||
struct list_head lru;
|
struct list_head lru;
|
||||||
|
@ -113,8 +113,7 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
|
|||||||
struct pglist_data;
|
struct pglist_data;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
|
* Add a wild amount of padding here to ensure datas fall into separate
|
||||||
* So add a wild amount of padding here to ensure that they fall into separate
|
|
||||||
* cachelines. There are very few zone structures in the machine, so space
|
* cachelines. There are very few zone structures in the machine, so space
|
||||||
* consumption is not a concern here.
|
* consumption is not a concern here.
|
||||||
*/
|
*/
|
||||||
@ -276,6 +275,8 @@ enum lruvec_flags {
|
|||||||
|
|
||||||
struct lruvec {
|
struct lruvec {
|
||||||
struct list_head lists[NR_LRU_LISTS];
|
struct list_head lists[NR_LRU_LISTS];
|
||||||
|
/* per lruvec lru_lock for memcg */
|
||||||
|
spinlock_t lru_lock;
|
||||||
/*
|
/*
|
||||||
* These track the cost of reclaiming one LRU - file or anon -
|
* These track the cost of reclaiming one LRU - file or anon -
|
||||||
* over the other. As the observed cost of reclaiming one LRU
|
* over the other. As the observed cost of reclaiming one LRU
|
||||||
@ -782,7 +783,6 @@ typedef struct pglist_data {
|
|||||||
|
|
||||||
/* Write-intensive fields used by page reclaim */
|
/* Write-intensive fields used by page reclaim */
|
||||||
ZONE_PADDING(_pad1_)
|
ZONE_PADDING(_pad1_)
|
||||||
spinlock_t lru_lock;
|
|
||||||
|
|
||||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||||
/*
|
/*
|
||||||
|
@ -334,6 +334,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD)
|
|||||||
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
|
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
|
||||||
__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
|
__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
|
||||||
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
|
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
|
||||||
|
TESTCLEARFLAG(LRU, lru, PF_HEAD)
|
||||||
PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
|
PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
|
||||||
TESTCLEARFLAG(Active, active, PF_HEAD)
|
TESTCLEARFLAG(Active, active, PF_HEAD)
|
||||||
PAGEFLAG(Workingset, workingset, PF_HEAD)
|
PAGEFLAG(Workingset, workingset, PF_HEAD)
|
||||||
|
@ -338,8 +338,6 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file,
|
|||||||
unsigned int nr_pages);
|
unsigned int nr_pages);
|
||||||
extern void lru_note_cost_page(struct page *);
|
extern void lru_note_cost_page(struct page *);
|
||||||
extern void lru_cache_add(struct page *);
|
extern void lru_cache_add(struct page *);
|
||||||
extern void lru_add_page_tail(struct page *page, struct page *page_tail,
|
|
||||||
struct lruvec *lruvec, struct list_head *head);
|
|
||||||
extern void mark_page_accessed(struct page *);
|
extern void mark_page_accessed(struct page *);
|
||||||
extern void lru_add_drain(void);
|
extern void lru_add_drain(void);
|
||||||
extern void lru_add_drain_cpu(int cpu);
|
extern void lru_add_drain_cpu(int cpu);
|
||||||
@ -358,7 +356,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
|
|||||||
extern unsigned long zone_reclaimable_pages(struct zone *zone);
|
extern unsigned long zone_reclaimable_pages(struct zone *zone);
|
||||||
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
||||||
gfp_t gfp_mask, nodemask_t *mask);
|
gfp_t gfp_mask, nodemask_t *mask);
|
||||||
extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
|
extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
|
||||||
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||||
unsigned long nr_pages,
|
unsigned long nr_pages,
|
||||||
gfp_t gfp_mask,
|
gfp_t gfp_mask,
|
||||||
|
@ -804,7 +804,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
unsigned long nr_scanned = 0, nr_isolated = 0;
|
unsigned long nr_scanned = 0, nr_isolated = 0;
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec;
|
||||||
unsigned long flags = 0;
|
unsigned long flags = 0;
|
||||||
bool locked = false;
|
struct lruvec *locked = NULL;
|
||||||
struct page *page = NULL, *valid_page = NULL;
|
struct page *page = NULL, *valid_page = NULL;
|
||||||
unsigned long start_pfn = low_pfn;
|
unsigned long start_pfn = low_pfn;
|
||||||
bool skip_on_failure = false;
|
bool skip_on_failure = false;
|
||||||
@ -868,11 +868,20 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
* contention, to give chance to IRQs. Abort completely if
|
* contention, to give chance to IRQs. Abort completely if
|
||||||
* a fatal signal is pending.
|
* a fatal signal is pending.
|
||||||
*/
|
*/
|
||||||
if (!(low_pfn % SWAP_CLUSTER_MAX)
|
if (!(low_pfn % SWAP_CLUSTER_MAX)) {
|
||||||
&& compact_unlock_should_abort(&pgdat->lru_lock,
|
if (locked) {
|
||||||
flags, &locked, cc)) {
|
unlock_page_lruvec_irqrestore(locked, flags);
|
||||||
low_pfn = 0;
|
locked = NULL;
|
||||||
goto fatal_pending;
|
}
|
||||||
|
|
||||||
|
if (fatal_signal_pending(current)) {
|
||||||
|
cc->contended = true;
|
||||||
|
|
||||||
|
low_pfn = 0;
|
||||||
|
goto fatal_pending;
|
||||||
|
}
|
||||||
|
|
||||||
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pfn_valid_within(low_pfn))
|
if (!pfn_valid_within(low_pfn))
|
||||||
@ -890,6 +899,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
|
if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
|
||||||
if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
|
if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
|
||||||
low_pfn = end_pfn;
|
low_pfn = end_pfn;
|
||||||
|
page = NULL;
|
||||||
goto isolate_abort;
|
goto isolate_abort;
|
||||||
}
|
}
|
||||||
valid_page = page;
|
valid_page = page;
|
||||||
@ -943,9 +953,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
if (unlikely(__PageMovable(page)) &&
|
if (unlikely(__PageMovable(page)) &&
|
||||||
!PageIsolated(page)) {
|
!PageIsolated(page)) {
|
||||||
if (locked) {
|
if (locked) {
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock,
|
unlock_page_lruvec_irqrestore(locked, flags);
|
||||||
flags);
|
locked = NULL;
|
||||||
locked = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isolate_movable_page(page, isolate_mode))
|
if (!isolate_movable_page(page, isolate_mode))
|
||||||
@ -971,10 +980,34 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
|
if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
|
||||||
goto isolate_fail;
|
goto isolate_fail;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Be careful not to clear PageLRU until after we're
|
||||||
|
* sure the page is not being freed elsewhere -- the
|
||||||
|
* page release code relies on it.
|
||||||
|
*/
|
||||||
|
if (unlikely(!get_page_unless_zero(page)))
|
||||||
|
goto isolate_fail;
|
||||||
|
|
||||||
|
if (__isolate_lru_page_prepare(page, isolate_mode) != 0)
|
||||||
|
goto isolate_fail_put;
|
||||||
|
|
||||||
|
/* Try isolate the page */
|
||||||
|
if (!TestClearPageLRU(page))
|
||||||
|
goto isolate_fail_put;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||||
|
|
||||||
/* If we already hold the lock, we can skip some rechecking */
|
/* If we already hold the lock, we can skip some rechecking */
|
||||||
if (!locked) {
|
if (lruvec != locked) {
|
||||||
locked = compact_lock_irqsave(&pgdat->lru_lock,
|
if (locked)
|
||||||
&flags, cc);
|
unlock_page_lruvec_irqrestore(locked, flags);
|
||||||
|
|
||||||
|
compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
|
||||||
|
locked = lruvec;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
lruvec_memcg_debug(lruvec, page);
|
||||||
|
|
||||||
/* Try get exclusive access under lock */
|
/* Try get exclusive access under lock */
|
||||||
if (!skip_updated) {
|
if (!skip_updated) {
|
||||||
@ -983,10 +1016,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
goto isolate_abort;
|
goto isolate_abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Recheck PageLRU and PageCompound under lock */
|
|
||||||
if (!PageLRU(page))
|
|
||||||
goto isolate_fail;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page become compound since the non-locked check,
|
* Page become compound since the non-locked check,
|
||||||
* and it's on LRU. It can only be a THP so the order
|
* and it's on LRU. It can only be a THP so the order
|
||||||
@ -994,15 +1023,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||||||
*/
|
*/
|
||||||
if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
|
if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
|
||||||
low_pfn += compound_nr(page) - 1;
|
low_pfn += compound_nr(page) - 1;
|
||||||
goto isolate_fail;
|
SetPageLRU(page);
|
||||||
|
goto isolate_fail_put;
|
||||||
}
|
}
|
||||||
}
|
} else
|
||||||
|
rcu_read_unlock();
|
||||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
|
||||||
|
|
||||||
/* Try isolate the page */
|
|
||||||
if (__isolate_lru_page(page, isolate_mode) != 0)
|
|
||||||
goto isolate_fail;
|
|
||||||
|
|
||||||
/* The whole page is taken off the LRU; skip the tail pages. */
|
/* The whole page is taken off the LRU; skip the tail pages. */
|
||||||
if (PageCompound(page))
|
if (PageCompound(page))
|
||||||
@ -1032,6 +1057,15 @@ isolate_success:
|
|||||||
}
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
isolate_fail_put:
|
||||||
|
/* Avoid potential deadlock in freeing page under lru_lock */
|
||||||
|
if (locked) {
|
||||||
|
unlock_page_lruvec_irqrestore(locked, flags);
|
||||||
|
locked = NULL;
|
||||||
|
}
|
||||||
|
put_page(page);
|
||||||
|
|
||||||
isolate_fail:
|
isolate_fail:
|
||||||
if (!skip_on_failure)
|
if (!skip_on_failure)
|
||||||
continue;
|
continue;
|
||||||
@ -1043,8 +1077,8 @@ isolate_fail:
|
|||||||
*/
|
*/
|
||||||
if (nr_isolated) {
|
if (nr_isolated) {
|
||||||
if (locked) {
|
if (locked) {
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(locked, flags);
|
||||||
locked = false;
|
locked = NULL;
|
||||||
}
|
}
|
||||||
putback_movable_pages(&cc->migratepages);
|
putback_movable_pages(&cc->migratepages);
|
||||||
cc->nr_migratepages = 0;
|
cc->nr_migratepages = 0;
|
||||||
@ -1068,9 +1102,15 @@ isolate_fail:
|
|||||||
if (unlikely(low_pfn > end_pfn))
|
if (unlikely(low_pfn > end_pfn))
|
||||||
low_pfn = end_pfn;
|
low_pfn = end_pfn;
|
||||||
|
|
||||||
|
page = NULL;
|
||||||
|
|
||||||
isolate_abort:
|
isolate_abort:
|
||||||
if (locked)
|
if (locked)
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(locked, flags);
|
||||||
|
if (page) {
|
||||||
|
SetPageLRU(page);
|
||||||
|
put_page(page);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Updated the cached scanner pfn once the pageblock has been scanned
|
* Updated the cached scanner pfn once the pageblock has been scanned
|
||||||
|
@ -102,8 +102,8 @@
|
|||||||
* ->swap_lock (try_to_unmap_one)
|
* ->swap_lock (try_to_unmap_one)
|
||||||
* ->private_lock (try_to_unmap_one)
|
* ->private_lock (try_to_unmap_one)
|
||||||
* ->i_pages lock (try_to_unmap_one)
|
* ->i_pages lock (try_to_unmap_one)
|
||||||
* ->pgdat->lru_lock (follow_page->mark_page_accessed)
|
* ->lruvec->lru_lock (follow_page->mark_page_accessed)
|
||||||
* ->pgdat->lru_lock (check_pte_range->isolate_lru_page)
|
* ->lruvec->lru_lock (check_pte_range->isolate_lru_page)
|
||||||
* ->private_lock (page_remove_rmap->set_page_dirty)
|
* ->private_lock (page_remove_rmap->set_page_dirty)
|
||||||
* ->i_pages lock (page_remove_rmap->set_page_dirty)
|
* ->i_pages lock (page_remove_rmap->set_page_dirty)
|
||||||
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
|
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
|
||||||
|
@ -2359,6 +2359,27 @@ static void remap_page(struct page *page, unsigned int nr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lru_add_page_tail(struct page *head, struct page *tail,
|
||||||
|
struct lruvec *lruvec, struct list_head *list)
|
||||||
|
{
|
||||||
|
VM_BUG_ON_PAGE(!PageHead(head), head);
|
||||||
|
VM_BUG_ON_PAGE(PageCompound(tail), head);
|
||||||
|
VM_BUG_ON_PAGE(PageLRU(tail), head);
|
||||||
|
lockdep_assert_held(&lruvec->lru_lock);
|
||||||
|
|
||||||
|
if (list) {
|
||||||
|
/* page reclaim is reclaiming a huge page */
|
||||||
|
VM_WARN_ON(PageLRU(head));
|
||||||
|
get_page(tail);
|
||||||
|
list_add_tail(&tail->lru, list);
|
||||||
|
} else {
|
||||||
|
/* head is still on lru (and we have it frozen) */
|
||||||
|
VM_WARN_ON(!PageLRU(head));
|
||||||
|
SetPageLRU(tail);
|
||||||
|
list_add_tail(&tail->lru, &head->lru);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void __split_huge_page_tail(struct page *head, int tail,
|
static void __split_huge_page_tail(struct page *head, int tail,
|
||||||
struct lruvec *lruvec, struct list_head *list)
|
struct lruvec *lruvec, struct list_head *list)
|
||||||
{
|
{
|
||||||
@ -2425,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __split_huge_page(struct page *page, struct list_head *list,
|
static void __split_huge_page(struct page *page, struct list_head *list,
|
||||||
pgoff_t end, unsigned long flags)
|
pgoff_t end)
|
||||||
{
|
{
|
||||||
struct page *head = compound_head(page);
|
struct page *head = compound_head(page);
|
||||||
pg_data_t *pgdat = page_pgdat(head);
|
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec;
|
||||||
struct address_space *swap_cache = NULL;
|
struct address_space *swap_cache = NULL;
|
||||||
unsigned long offset = 0;
|
unsigned long offset = 0;
|
||||||
unsigned int nr = thp_nr_pages(head);
|
unsigned int nr = thp_nr_pages(head);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
lruvec = mem_cgroup_page_lruvec(head, pgdat);
|
|
||||||
|
|
||||||
/* complete memcg works before add pages to LRU */
|
/* complete memcg works before add pages to LRU */
|
||||||
mem_cgroup_split_huge_fixup(head);
|
mem_cgroup_split_huge_fixup(head);
|
||||||
|
|
||||||
@ -2448,6 +2466,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||||||
xa_lock(&swap_cache->i_pages);
|
xa_lock(&swap_cache->i_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* lock lru list/PageCompound, ref freezed by page_ref_freeze */
|
||||||
|
lruvec = lock_page_lruvec(head);
|
||||||
|
|
||||||
for (i = nr - 1; i >= 1; i--) {
|
for (i = nr - 1; i >= 1; i--) {
|
||||||
__split_huge_page_tail(head, i, lruvec, list);
|
__split_huge_page_tail(head, i, lruvec, list);
|
||||||
/* Some pages can be beyond i_size: drop them from page cache */
|
/* Some pages can be beyond i_size: drop them from page cache */
|
||||||
@ -2467,6 +2488,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ClearPageCompound(head);
|
ClearPageCompound(head);
|
||||||
|
unlock_page_lruvec(lruvec);
|
||||||
|
/* Caller disabled irqs, so they are still disabled here */
|
||||||
|
|
||||||
split_page_owner(head, nr);
|
split_page_owner(head, nr);
|
||||||
|
|
||||||
@ -2484,8 +2507,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||||||
page_ref_add(head, 2);
|
page_ref_add(head, 2);
|
||||||
xa_unlock(&head->mapping->i_pages);
|
xa_unlock(&head->mapping->i_pages);
|
||||||
}
|
}
|
||||||
|
local_irq_enable();
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
|
||||||
|
|
||||||
remap_page(head, nr);
|
remap_page(head, nr);
|
||||||
|
|
||||||
@ -2631,12 +2653,10 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
|
|||||||
int split_huge_page_to_list(struct page *page, struct list_head *list)
|
int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||||
{
|
{
|
||||||
struct page *head = compound_head(page);
|
struct page *head = compound_head(page);
|
||||||
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
|
|
||||||
struct deferred_split *ds_queue = get_deferred_split_queue(head);
|
struct deferred_split *ds_queue = get_deferred_split_queue(head);
|
||||||
struct anon_vma *anon_vma = NULL;
|
struct anon_vma *anon_vma = NULL;
|
||||||
struct address_space *mapping = NULL;
|
struct address_space *mapping = NULL;
|
||||||
int count, mapcount, extra_pins, ret;
|
int count, mapcount, extra_pins, ret;
|
||||||
unsigned long flags;
|
|
||||||
pgoff_t end;
|
pgoff_t end;
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
|
VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
|
||||||
@ -2697,9 +2717,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||||||
unmap_page(head);
|
unmap_page(head);
|
||||||
VM_BUG_ON_PAGE(compound_mapcount(head), head);
|
VM_BUG_ON_PAGE(compound_mapcount(head), head);
|
||||||
|
|
||||||
/* prevent PageLRU to go away from under us, and freeze lru stats */
|
/* block interrupt reentry in xa_lock and spinlock */
|
||||||
spin_lock_irqsave(&pgdata->lru_lock, flags);
|
local_irq_disable();
|
||||||
|
|
||||||
if (mapping) {
|
if (mapping) {
|
||||||
XA_STATE(xas, &mapping->i_pages, page_index(head));
|
XA_STATE(xas, &mapping->i_pages, page_index(head));
|
||||||
|
|
||||||
@ -2729,7 +2748,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||||||
__dec_lruvec_page_state(head, NR_FILE_THPS);
|
__dec_lruvec_page_state(head, NR_FILE_THPS);
|
||||||
}
|
}
|
||||||
|
|
||||||
__split_huge_page(page, list, end, flags);
|
__split_huge_page(page, list, end);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
|
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
|
||||||
@ -2743,7 +2762,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||||||
spin_unlock(&ds_queue->split_queue_lock);
|
spin_unlock(&ds_queue->split_queue_lock);
|
||||||
fail: if (mapping)
|
fail: if (mapping)
|
||||||
xa_unlock(&mapping->i_pages);
|
xa_unlock(&mapping->i_pages);
|
||||||
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
|
local_irq_enable();
|
||||||
remap_page(head, thp_nr_pages(head));
|
remap_page(head, thp_nr_pages(head));
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
* Lockless page tracking & accounting
|
* Lockless page tracking & accounting
|
||||||
* Unified hierarchy configuration model
|
* Unified hierarchy configuration model
|
||||||
* Copyright (C) 2015 Red Hat, Inc., Johannes Weiner
|
* Copyright (C) 2015 Red Hat, Inc., Johannes Weiner
|
||||||
|
*
|
||||||
|
* Per memcg lru locking
|
||||||
|
* Copyright (C) 2020 Alibaba, Inc, Alex Shi
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/page_counter.h>
|
#include <linux/page_counter.h>
|
||||||
@ -1322,6 +1325,23 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_VM
|
||||||
|
void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
|
||||||
|
{
|
||||||
|
struct mem_cgroup *memcg;
|
||||||
|
|
||||||
|
if (mem_cgroup_disabled())
|
||||||
|
return;
|
||||||
|
|
||||||
|
memcg = page_memcg(page);
|
||||||
|
|
||||||
|
if (!memcg)
|
||||||
|
VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page);
|
||||||
|
else
|
||||||
|
VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
|
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
|
||||||
* @page: the page
|
* @page: the page
|
||||||
@ -1362,6 +1382,60 @@ out:
|
|||||||
return lruvec;
|
return lruvec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* lock_page_lruvec - lock and return lruvec for a given page.
|
||||||
|
* @page: the page
|
||||||
|
*
|
||||||
|
* This series functions should be used in either conditions:
|
||||||
|
* PageLRU is cleared or unset
|
||||||
|
* or page->_refcount is zero
|
||||||
|
* or page is locked.
|
||||||
|
*/
|
||||||
|
struct lruvec *lock_page_lruvec(struct page *page)
|
||||||
|
{
|
||||||
|
struct lruvec *lruvec;
|
||||||
|
struct pglist_data *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||||
|
spin_lock(&lruvec->lru_lock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
lruvec_memcg_debug(lruvec, page);
|
||||||
|
|
||||||
|
return lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct lruvec *lock_page_lruvec_irq(struct page *page)
|
||||||
|
{
|
||||||
|
struct lruvec *lruvec;
|
||||||
|
struct pglist_data *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||||
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
lruvec_memcg_debug(lruvec, page);
|
||||||
|
|
||||||
|
return lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
|
||||||
|
{
|
||||||
|
struct lruvec *lruvec;
|
||||||
|
struct pglist_data *pgdat = page_pgdat(page);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||||
|
spin_lock_irqsave(&lruvec->lru_lock, *flags);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
lruvec_memcg_debug(lruvec, page);
|
||||||
|
|
||||||
|
return lruvec;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mem_cgroup_update_lru_size - account for adding or removing an lru page
|
* mem_cgroup_update_lru_size - account for adding or removing an lru page
|
||||||
* @lruvec: mem_cgroup per zone lru vector
|
* @lruvec: mem_cgroup per zone lru vector
|
||||||
@ -2142,6 +2216,12 @@ again:
|
|||||||
if (unlikely(!memcg))
|
if (unlikely(!memcg))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROVE_LOCKING
|
||||||
|
local_irq_save(flags);
|
||||||
|
might_lock(&memcg->move_lock);
|
||||||
|
local_irq_restore(flags);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (atomic_read(&memcg->moving_account) <= 0)
|
if (atomic_read(&memcg->moving_account) <= 0)
|
||||||
return memcg;
|
return memcg;
|
||||||
|
|
||||||
@ -3263,10 +3343,8 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
|
|||||||
#endif /* CONFIG_MEMCG_KMEM */
|
#endif /* CONFIG_MEMCG_KMEM */
|
||||||
|
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Because tail pages are not marked as "used", set it. We're under
|
* Because page_memcg(head) is not set on compound tails, set it now.
|
||||||
* pgdat->lru_lock and migration entries setup in all page mappings.
|
|
||||||
*/
|
*/
|
||||||
void mem_cgroup_split_huge_fixup(struct page *head)
|
void mem_cgroup_split_huge_fixup(struct page *head)
|
||||||
{
|
{
|
||||||
|
63
mm/mlock.c
63
mm/mlock.c
@ -105,26 +105,6 @@ void mlock_vma_page(struct page *page)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Isolate a page from LRU with optional get_page() pin.
|
|
||||||
* Assumes lru_lock already held and page already pinned.
|
|
||||||
*/
|
|
||||||
static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
|
|
||||||
{
|
|
||||||
if (PageLRU(page)) {
|
|
||||||
struct lruvec *lruvec;
|
|
||||||
|
|
||||||
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
|
||||||
if (getpage)
|
|
||||||
get_page(page);
|
|
||||||
ClearPageLRU(page);
|
|
||||||
del_page_from_lru_list(page, lruvec, page_lru(page));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Finish munlock after successful page isolation
|
* Finish munlock after successful page isolation
|
||||||
*
|
*
|
||||||
@ -187,40 +167,24 @@ static void __munlock_isolation_failed(struct page *page)
|
|||||||
unsigned int munlock_vma_page(struct page *page)
|
unsigned int munlock_vma_page(struct page *page)
|
||||||
{
|
{
|
||||||
int nr_pages;
|
int nr_pages;
|
||||||
pg_data_t *pgdat = page_pgdat(page);
|
|
||||||
|
|
||||||
/* For try_to_munlock() and to serialize with page migration */
|
/* For try_to_munlock() and to serialize with page migration */
|
||||||
BUG_ON(!PageLocked(page));
|
BUG_ON(!PageLocked(page));
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||||
|
|
||||||
/*
|
|
||||||
* Serialize with any parallel __split_huge_page_refcount() which
|
|
||||||
* might otherwise copy PageMlocked to part of the tail pages before
|
|
||||||
* we clear it in the head page. It also stabilizes thp_nr_pages().
|
|
||||||
*/
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
|
||||||
|
|
||||||
if (!TestClearPageMlocked(page)) {
|
if (!TestClearPageMlocked(page)) {
|
||||||
/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
|
/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
|
||||||
nr_pages = 1;
|
return 0;
|
||||||
goto unlock_out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nr_pages = thp_nr_pages(page);
|
nr_pages = thp_nr_pages(page);
|
||||||
__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||||
|
|
||||||
if (__munlock_isolate_lru_page(page, true)) {
|
if (!isolate_lru_page(page))
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
|
||||||
__munlock_isolated_page(page);
|
__munlock_isolated_page(page);
|
||||||
goto out;
|
else
|
||||||
}
|
__munlock_isolation_failed(page);
|
||||||
__munlock_isolation_failed(page);
|
|
||||||
|
|
||||||
unlock_out:
|
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
|
||||||
|
|
||||||
out:
|
|
||||||
return nr_pages - 1;
|
return nr_pages - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -298,12 +262,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
|||||||
int nr = pagevec_count(pvec);
|
int nr = pagevec_count(pvec);
|
||||||
int delta_munlocked = -nr;
|
int delta_munlocked = -nr;
|
||||||
struct pagevec pvec_putback;
|
struct pagevec pvec_putback;
|
||||||
|
struct lruvec *lruvec = NULL;
|
||||||
int pgrescued = 0;
|
int pgrescued = 0;
|
||||||
|
|
||||||
pagevec_init(&pvec_putback);
|
pagevec_init(&pvec_putback);
|
||||||
|
|
||||||
/* Phase 1: page isolation */
|
/* Phase 1: page isolation */
|
||||||
spin_lock_irq(&zone->zone_pgdat->lru_lock);
|
|
||||||
for (i = 0; i < nr; i++) {
|
for (i = 0; i < nr; i++) {
|
||||||
struct page *page = pvec->pages[i];
|
struct page *page = pvec->pages[i];
|
||||||
|
|
||||||
@ -312,9 +276,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
|||||||
* We already have pin from follow_page_mask()
|
* We already have pin from follow_page_mask()
|
||||||
* so we can spare the get_page() here.
|
* so we can spare the get_page() here.
|
||||||
*/
|
*/
|
||||||
if (__munlock_isolate_lru_page(page, false))
|
if (TestClearPageLRU(page)) {
|
||||||
|
lruvec = relock_page_lruvec_irq(page, lruvec);
|
||||||
|
del_page_from_lru_list(page, lruvec,
|
||||||
|
page_lru(page));
|
||||||
continue;
|
continue;
|
||||||
else
|
} else
|
||||||
__munlock_isolation_failed(page);
|
__munlock_isolation_failed(page);
|
||||||
} else {
|
} else {
|
||||||
delta_munlocked++;
|
delta_munlocked++;
|
||||||
@ -329,8 +296,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
|||||||
pagevec_add(&pvec_putback, pvec->pages[i]);
|
pagevec_add(&pvec_putback, pvec->pages[i]);
|
||||||
pvec->pages[i] = NULL;
|
pvec->pages[i] = NULL;
|
||||||
}
|
}
|
||||||
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
|
if (lruvec) {
|
||||||
spin_unlock_irq(&zone->zone_pgdat->lru_lock);
|
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
|
||||||
|
unlock_page_lruvec_irq(lruvec);
|
||||||
|
} else if (delta_munlocked) {
|
||||||
|
mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
|
||||||
|
}
|
||||||
|
|
||||||
/* Now we can release pins of pages that we are not munlocking */
|
/* Now we can release pins of pages that we are not munlocking */
|
||||||
pagevec_release(&pvec_putback);
|
pagevec_release(&pvec_putback);
|
||||||
|
@ -77,6 +77,7 @@ void lruvec_init(struct lruvec *lruvec)
|
|||||||
enum lru_list lru;
|
enum lru_list lru;
|
||||||
|
|
||||||
memset(lruvec, 0, sizeof(struct lruvec));
|
memset(lruvec, 0, sizeof(struct lruvec));
|
||||||
|
spin_lock_init(&lruvec->lru_lock);
|
||||||
|
|
||||||
for_each_lru(lru)
|
for_each_lru(lru)
|
||||||
INIT_LIST_HEAD(&lruvec->lists[lru]);
|
INIT_LIST_HEAD(&lruvec->lists[lru]);
|
||||||
|
@ -6870,7 +6870,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
|
|||||||
init_waitqueue_head(&pgdat->pfmemalloc_wait);
|
init_waitqueue_head(&pgdat->pfmemalloc_wait);
|
||||||
|
|
||||||
pgdat_page_ext_init(pgdat);
|
pgdat_page_ext_init(pgdat);
|
||||||
spin_lock_init(&pgdat->lru_lock);
|
|
||||||
lruvec_init(&pgdat->__lruvec);
|
lruvec_init(&pgdat->__lruvec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,19 +32,15 @@
|
|||||||
static struct page *page_idle_get_page(unsigned long pfn)
|
static struct page *page_idle_get_page(unsigned long pfn)
|
||||||
{
|
{
|
||||||
struct page *page = pfn_to_online_page(pfn);
|
struct page *page = pfn_to_online_page(pfn);
|
||||||
pg_data_t *pgdat;
|
|
||||||
|
|
||||||
if (!page || !PageLRU(page) ||
|
if (!page || !PageLRU(page) ||
|
||||||
!get_page_unless_zero(page))
|
!get_page_unless_zero(page))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
pgdat = page_pgdat(page);
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
|
||||||
if (unlikely(!PageLRU(page))) {
|
if (unlikely(!PageLRU(page))) {
|
||||||
put_page(page);
|
put_page(page);
|
||||||
page = NULL;
|
page = NULL;
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
12
mm/rmap.c
12
mm/rmap.c
@ -28,12 +28,12 @@
|
|||||||
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
|
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
|
||||||
* anon_vma->rwsem
|
* anon_vma->rwsem
|
||||||
* mm->page_table_lock or pte_lock
|
* mm->page_table_lock or pte_lock
|
||||||
* pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
|
|
||||||
* swap_lock (in swap_duplicate, swap_info_get)
|
* swap_lock (in swap_duplicate, swap_info_get)
|
||||||
* mmlist_lock (in mmput, drain_mmlist and others)
|
* mmlist_lock (in mmput, drain_mmlist and others)
|
||||||
* mapping->private_lock (in __set_page_dirty_buffers)
|
* mapping->private_lock (in __set_page_dirty_buffers)
|
||||||
* mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
|
* lock_page_memcg move_lock (in __set_page_dirty_buffers)
|
||||||
* i_pages lock (widely used)
|
* i_pages lock (widely used)
|
||||||
|
* lruvec->lru_lock (in lock_page_lruvec_irq)
|
||||||
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
|
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
|
||||||
* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
|
* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
|
||||||
* sb_lock (within inode_lock in fs/fs-writeback.c)
|
* sb_lock (within inode_lock in fs/fs-writeback.c)
|
||||||
@ -1054,8 +1054,14 @@ static void __page_set_anon_rmap(struct page *page,
|
|||||||
if (!exclusive)
|
if (!exclusive)
|
||||||
anon_vma = anon_vma->root;
|
anon_vma = anon_vma->root;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* page_idle does a lockless/optimistic rmap scan on page->mapping.
|
||||||
|
* Make sure the compiler doesn't split the stores of anon_vma and
|
||||||
|
* the PAGE_MAPPING_ANON type identifier, otherwise the rmap code
|
||||||
|
* could mistake the mapping for a struct address_space and crash.
|
||||||
|
*/
|
||||||
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
||||||
page->mapping = (struct address_space *) anon_vma;
|
WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
|
||||||
page->index = linear_page_index(vma, address);
|
page->index = linear_page_index(vma, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
208
mm/swap.c
208
mm/swap.c
@ -79,16 +79,14 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
|
|||||||
static void __page_cache_release(struct page *page)
|
static void __page_cache_release(struct page *page)
|
||||||
{
|
{
|
||||||
if (PageLRU(page)) {
|
if (PageLRU(page)) {
|
||||||
pg_data_t *pgdat = page_pgdat(page);
|
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
lruvec = lock_page_lruvec_irqsave(page, &flags);
|
||||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
|
||||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||||
__ClearPageLRU(page);
|
__ClearPageLRU(page);
|
||||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
}
|
}
|
||||||
__ClearPageWaiters(page);
|
__ClearPageWaiters(page);
|
||||||
}
|
}
|
||||||
@ -204,63 +202,46 @@ int get_kernel_page(unsigned long start, int write, struct page **pages)
|
|||||||
EXPORT_SYMBOL_GPL(get_kernel_page);
|
EXPORT_SYMBOL_GPL(get_kernel_page);
|
||||||
|
|
||||||
static void pagevec_lru_move_fn(struct pagevec *pvec,
|
static void pagevec_lru_move_fn(struct pagevec *pvec,
|
||||||
void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
|
void (*move_fn)(struct page *page, struct lruvec *lruvec))
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct pglist_data *pgdat = NULL;
|
struct lruvec *lruvec = NULL;
|
||||||
struct lruvec *lruvec;
|
|
||||||
unsigned long flags = 0;
|
unsigned long flags = 0;
|
||||||
|
|
||||||
for (i = 0; i < pagevec_count(pvec); i++) {
|
for (i = 0; i < pagevec_count(pvec); i++) {
|
||||||
struct page *page = pvec->pages[i];
|
struct page *page = pvec->pages[i];
|
||||||
struct pglist_data *pagepgdat = page_pgdat(page);
|
|
||||||
|
|
||||||
if (pagepgdat != pgdat) {
|
/* block memcg migration during page moving between lru */
|
||||||
if (pgdat)
|
if (!TestClearPageLRU(page))
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
continue;
|
||||||
pgdat = pagepgdat;
|
|
||||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
|
||||||
(*move_fn)(page, lruvec, arg);
|
(*move_fn)(page, lruvec);
|
||||||
|
|
||||||
|
SetPageLRU(page);
|
||||||
}
|
}
|
||||||
if (pgdat)
|
if (lruvec)
|
||||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
release_pages(pvec->pages, pvec->nr);
|
release_pages(pvec->pages, pvec->nr);
|
||||||
pagevec_reinit(pvec);
|
pagevec_reinit(pvec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
|
static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
int *pgmoved = arg;
|
if (!PageUnevictable(page)) {
|
||||||
|
|
||||||
if (PageLRU(page) && !PageUnevictable(page)) {
|
|
||||||
del_page_from_lru_list(page, lruvec, page_lru(page));
|
del_page_from_lru_list(page, lruvec, page_lru(page));
|
||||||
ClearPageActive(page);
|
ClearPageActive(page);
|
||||||
add_page_to_lru_list_tail(page, lruvec, page_lru(page));
|
add_page_to_lru_list_tail(page, lruvec, page_lru(page));
|
||||||
(*pgmoved) += thp_nr_pages(page);
|
__count_vm_events(PGROTATED, thp_nr_pages(page));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* pagevec_move_tail() must be called with IRQ disabled.
|
|
||||||
* Otherwise this may cause nasty races.
|
|
||||||
*/
|
|
||||||
static void pagevec_move_tail(struct pagevec *pvec)
|
|
||||||
{
|
|
||||||
int pgmoved = 0;
|
|
||||||
|
|
||||||
pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
|
|
||||||
__count_vm_events(PGROTATED, pgmoved);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Writeback is about to end against a page which has been marked for immediate
|
* Writeback is about to end against a page which has been marked for immediate
|
||||||
* reclaim. If it still appears to be reclaimable, move it to the tail of the
|
* reclaim. If it still appears to be reclaimable, move it to the tail of the
|
||||||
* inactive list.
|
* inactive list.
|
||||||
|
*
|
||||||
|
* rotate_reclaimable_page() must disable IRQs, to prevent nasty races.
|
||||||
*/
|
*/
|
||||||
void rotate_reclaimable_page(struct page *page)
|
void rotate_reclaimable_page(struct page *page)
|
||||||
{
|
{
|
||||||
@ -273,7 +254,7 @@ void rotate_reclaimable_page(struct page *page)
|
|||||||
local_lock_irqsave(&lru_rotate.lock, flags);
|
local_lock_irqsave(&lru_rotate.lock, flags);
|
||||||
pvec = this_cpu_ptr(&lru_rotate.pvec);
|
pvec = this_cpu_ptr(&lru_rotate.pvec);
|
||||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||||
pagevec_move_tail(pvec);
|
pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
|
||||||
local_unlock_irqrestore(&lru_rotate.lock, flags);
|
local_unlock_irqrestore(&lru_rotate.lock, flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -283,6 +264,14 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
|
|||||||
do {
|
do {
|
||||||
unsigned long lrusize;
|
unsigned long lrusize;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hold lruvec->lru_lock is safe here, since
|
||||||
|
* 1) The pinned lruvec in reclaim, or
|
||||||
|
* 2) From a pre-LRU page during refault (which also holds the
|
||||||
|
* rcu lock, so would be safe even if the page was on the LRU
|
||||||
|
* and could move simultaneously to a new lruvec).
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
/* Record cost event */
|
/* Record cost event */
|
||||||
if (file)
|
if (file)
|
||||||
lruvec->file_cost += nr_pages;
|
lruvec->file_cost += nr_pages;
|
||||||
@ -306,6 +295,7 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
|
|||||||
lruvec->file_cost /= 2;
|
lruvec->file_cost /= 2;
|
||||||
lruvec->anon_cost /= 2;
|
lruvec->anon_cost /= 2;
|
||||||
}
|
}
|
||||||
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
} while ((lruvec = parent_lruvec(lruvec)));
|
} while ((lruvec = parent_lruvec(lruvec)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,10 +305,9 @@ void lru_note_cost_page(struct page *page)
|
|||||||
page_is_file_lru(page), thp_nr_pages(page));
|
page_is_file_lru(page), thp_nr_pages(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __activate_page(struct page *page, struct lruvec *lruvec,
|
static void __activate_page(struct page *page, struct lruvec *lruvec)
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
if (!PageActive(page) && !PageUnevictable(page)) {
|
||||||
int lru = page_lru_base_type(page);
|
int lru = page_lru_base_type(page);
|
||||||
int nr_pages = thp_nr_pages(page);
|
int nr_pages = thp_nr_pages(page);
|
||||||
|
|
||||||
@ -340,7 +329,7 @@ static void activate_page_drain(int cpu)
|
|||||||
struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
|
struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
|
||||||
|
|
||||||
if (pagevec_count(pvec))
|
if (pagevec_count(pvec))
|
||||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
pagevec_lru_move_fn(pvec, __activate_page);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool need_activate_page_drain(int cpu)
|
static bool need_activate_page_drain(int cpu)
|
||||||
@ -358,7 +347,7 @@ static void activate_page(struct page *page)
|
|||||||
pvec = this_cpu_ptr(&lru_pvecs.activate_page);
|
pvec = this_cpu_ptr(&lru_pvecs.activate_page);
|
||||||
get_page(page);
|
get_page(page);
|
||||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
pagevec_lru_move_fn(pvec, __activate_page);
|
||||||
local_unlock(&lru_pvecs.lock);
|
local_unlock(&lru_pvecs.lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -370,12 +359,15 @@ static inline void activate_page_drain(int cpu)
|
|||||||
|
|
||||||
static void activate_page(struct page *page)
|
static void activate_page(struct page *page)
|
||||||
{
|
{
|
||||||
pg_data_t *pgdat = page_pgdat(page);
|
struct lruvec *lruvec;
|
||||||
|
|
||||||
page = compound_head(page);
|
page = compound_head(page);
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
if (TestClearPageLRU(page)) {
|
||||||
__activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL);
|
lruvec = lock_page_lruvec_irq(page);
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
__activate_page(page, lruvec);
|
||||||
|
unlock_page_lruvec_irq(lruvec);
|
||||||
|
SetPageLRU(page);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -525,16 +517,12 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
|
|||||||
* be write it out by flusher threads as this is much more effective
|
* be write it out by flusher threads as this is much more effective
|
||||||
* than the single-page writeout from reclaim.
|
* than the single-page writeout from reclaim.
|
||||||
*/
|
*/
|
||||||
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
|
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
int lru;
|
int lru;
|
||||||
bool active;
|
bool active;
|
||||||
int nr_pages = thp_nr_pages(page);
|
int nr_pages = thp_nr_pages(page);
|
||||||
|
|
||||||
if (!PageLRU(page))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (PageUnevictable(page))
|
if (PageUnevictable(page))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -573,10 +561,9 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
|
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
|
if (PageActive(page) && !PageUnevictable(page)) {
|
||||||
int lru = page_lru_base_type(page);
|
int lru = page_lru_base_type(page);
|
||||||
int nr_pages = thp_nr_pages(page);
|
int nr_pages = thp_nr_pages(page);
|
||||||
|
|
||||||
@ -591,10 +578,9 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
|
static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec)
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
|
if (PageAnon(page) && PageSwapBacked(page) &&
|
||||||
!PageSwapCache(page) && !PageUnevictable(page)) {
|
!PageSwapCache(page) && !PageUnevictable(page)) {
|
||||||
bool active = PageActive(page);
|
bool active = PageActive(page);
|
||||||
int nr_pages = thp_nr_pages(page);
|
int nr_pages = thp_nr_pages(page);
|
||||||
@ -636,21 +622,21 @@ void lru_add_drain_cpu(int cpu)
|
|||||||
|
|
||||||
/* No harm done if a racing interrupt already did this */
|
/* No harm done if a racing interrupt already did this */
|
||||||
local_lock_irqsave(&lru_rotate.lock, flags);
|
local_lock_irqsave(&lru_rotate.lock, flags);
|
||||||
pagevec_move_tail(pvec);
|
pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
|
||||||
local_unlock_irqrestore(&lru_rotate.lock, flags);
|
local_unlock_irqrestore(&lru_rotate.lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
|
pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
|
||||||
if (pagevec_count(pvec))
|
if (pagevec_count(pvec))
|
||||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
|
||||||
|
|
||||||
pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
|
pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
|
||||||
if (pagevec_count(pvec))
|
if (pagevec_count(pvec))
|
||||||
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
|
pagevec_lru_move_fn(pvec, lru_deactivate_fn);
|
||||||
|
|
||||||
pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
|
pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
|
||||||
if (pagevec_count(pvec))
|
if (pagevec_count(pvec))
|
||||||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
|
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
|
||||||
|
|
||||||
activate_page_drain(cpu);
|
activate_page_drain(cpu);
|
||||||
}
|
}
|
||||||
@ -679,7 +665,7 @@ void deactivate_file_page(struct page *page)
|
|||||||
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
|
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
|
||||||
|
|
||||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
|
||||||
local_unlock(&lru_pvecs.lock);
|
local_unlock(&lru_pvecs.lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -701,7 +687,7 @@ void deactivate_page(struct page *page)
|
|||||||
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
|
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
|
||||||
get_page(page);
|
get_page(page);
|
||||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||||
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
|
pagevec_lru_move_fn(pvec, lru_deactivate_fn);
|
||||||
local_unlock(&lru_pvecs.lock);
|
local_unlock(&lru_pvecs.lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -723,7 +709,7 @@ void mark_page_lazyfree(struct page *page)
|
|||||||
pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
|
pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
|
||||||
get_page(page);
|
get_page(page);
|
||||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
|
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
|
||||||
local_unlock(&lru_pvecs.lock);
|
local_unlock(&lru_pvecs.lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -871,8 +857,7 @@ void release_pages(struct page **pages, int nr)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
LIST_HEAD(pages_to_free);
|
LIST_HEAD(pages_to_free);
|
||||||
struct pglist_data *locked_pgdat = NULL;
|
struct lruvec *lruvec = NULL;
|
||||||
struct lruvec *lruvec;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned int lock_batch;
|
unsigned int lock_batch;
|
||||||
|
|
||||||
@ -882,11 +867,11 @@ void release_pages(struct page **pages, int nr)
|
|||||||
/*
|
/*
|
||||||
* Make sure the IRQ-safe lock-holding time does not get
|
* Make sure the IRQ-safe lock-holding time does not get
|
||||||
* excessive with a continuous string of pages from the
|
* excessive with a continuous string of pages from the
|
||||||
* same pgdat. The lock is held only if pgdat != NULL.
|
* same lruvec. The lock is held only if lruvec != NULL.
|
||||||
*/
|
*/
|
||||||
if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
|
if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
|
||||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
locked_pgdat = NULL;
|
lruvec = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
page = compound_head(page);
|
page = compound_head(page);
|
||||||
@ -894,10 +879,9 @@ void release_pages(struct page **pages, int nr)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (is_zone_device_page(page)) {
|
if (is_zone_device_page(page)) {
|
||||||
if (locked_pgdat) {
|
if (lruvec) {
|
||||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock,
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
flags);
|
lruvec = NULL;
|
||||||
locked_pgdat = NULL;
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* ZONE_DEVICE pages that return 'false' from
|
* ZONE_DEVICE pages that return 'false' from
|
||||||
@ -918,27 +902,22 @@ void release_pages(struct page **pages, int nr)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (PageCompound(page)) {
|
if (PageCompound(page)) {
|
||||||
if (locked_pgdat) {
|
if (lruvec) {
|
||||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
locked_pgdat = NULL;
|
lruvec = NULL;
|
||||||
}
|
}
|
||||||
__put_compound_page(page);
|
__put_compound_page(page);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PageLRU(page)) {
|
if (PageLRU(page)) {
|
||||||
struct pglist_data *pgdat = page_pgdat(page);
|
struct lruvec *prev_lruvec = lruvec;
|
||||||
|
|
||||||
if (pgdat != locked_pgdat) {
|
lruvec = relock_page_lruvec_irqsave(page, lruvec,
|
||||||
if (locked_pgdat)
|
&flags);
|
||||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock,
|
if (prev_lruvec != lruvec)
|
||||||
flags);
|
|
||||||
lock_batch = 0;
|
lock_batch = 0;
|
||||||
locked_pgdat = pgdat;
|
|
||||||
spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
|
|
||||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||||
__ClearPageLRU(page);
|
__ClearPageLRU(page);
|
||||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||||
@ -948,8 +927,8 @@ void release_pages(struct page **pages, int nr)
|
|||||||
|
|
||||||
list_add(&page->lru, &pages_to_free);
|
list_add(&page->lru, &pages_to_free);
|
||||||
}
|
}
|
||||||
if (locked_pgdat)
|
if (lruvec)
|
||||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
|
|
||||||
mem_cgroup_uncharge_list(&pages_to_free);
|
mem_cgroup_uncharge_list(&pages_to_free);
|
||||||
free_unref_page_list(&pages_to_free);
|
free_unref_page_list(&pages_to_free);
|
||||||
@ -977,41 +956,7 @@ void __pagevec_release(struct pagevec *pvec)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__pagevec_release);
|
EXPORT_SYMBOL(__pagevec_release);
|
||||||
|
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
|
||||||
/* used by __split_huge_page_refcount() */
|
|
||||||
void lru_add_page_tail(struct page *page, struct page *page_tail,
|
|
||||||
struct lruvec *lruvec, struct list_head *list)
|
|
||||||
{
|
|
||||||
VM_BUG_ON_PAGE(!PageHead(page), page);
|
|
||||||
VM_BUG_ON_PAGE(PageCompound(page_tail), page);
|
|
||||||
VM_BUG_ON_PAGE(PageLRU(page_tail), page);
|
|
||||||
lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);
|
|
||||||
|
|
||||||
if (!list)
|
|
||||||
SetPageLRU(page_tail);
|
|
||||||
|
|
||||||
if (likely(PageLRU(page)))
|
|
||||||
list_add_tail(&page_tail->lru, &page->lru);
|
|
||||||
else if (list) {
|
|
||||||
/* page reclaim is reclaiming a huge page */
|
|
||||||
get_page(page_tail);
|
|
||||||
list_add_tail(&page_tail->lru, list);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Head page has not yet been counted, as an hpage,
|
|
||||||
* so we must account for each subpage individually.
|
|
||||||
*
|
|
||||||
* Put page_tail on the list at the correct position
|
|
||||||
* so they all end up in order.
|
|
||||||
*/
|
|
||||||
add_page_to_lru_list_tail(page_tail, lruvec,
|
|
||||||
page_lru(page_tail));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
||||||
|
|
||||||
static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
|
|
||||||
void *arg)
|
|
||||||
{
|
{
|
||||||
enum lru_list lru;
|
enum lru_list lru;
|
||||||
int was_unevictable = TestClearPageUnevictable(page);
|
int was_unevictable = TestClearPageUnevictable(page);
|
||||||
@ -1070,7 +1015,20 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
|
|||||||
*/
|
*/
|
||||||
void __pagevec_lru_add(struct pagevec *pvec)
|
void __pagevec_lru_add(struct pagevec *pvec)
|
||||||
{
|
{
|
||||||
pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);
|
int i;
|
||||||
|
struct lruvec *lruvec = NULL;
|
||||||
|
unsigned long flags = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < pagevec_count(pvec); i++) {
|
||||||
|
struct page *page = pvec->pages[i];
|
||||||
|
|
||||||
|
lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
|
||||||
|
__pagevec_lru_add_fn(page, lruvec);
|
||||||
|
}
|
||||||
|
if (lruvec)
|
||||||
|
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||||
|
release_pages(pvec->pages, pvec->nr);
|
||||||
|
pagevec_reinit(pvec);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
207
mm/vmscan.c
207
mm/vmscan.c
@ -1539,9 +1539,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
|
|||||||
*
|
*
|
||||||
* returns 0 on success, -ve errno on failure.
|
* returns 0 on success, -ve errno on failure.
|
||||||
*/
|
*/
|
||||||
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
|
int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
|
||||||
{
|
{
|
||||||
int ret = -EINVAL;
|
int ret = -EBUSY;
|
||||||
|
|
||||||
/* Only take pages on the LRU. */
|
/* Only take pages on the LRU. */
|
||||||
if (!PageLRU(page))
|
if (!PageLRU(page))
|
||||||
@ -1551,8 +1551,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
|
|||||||
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
|
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = -EBUSY;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To minimise LRU disruption, the caller can indicate that it only
|
* To minimise LRU disruption, the caller can indicate that it only
|
||||||
* wants to isolate pages it will be able to operate on without
|
* wants to isolate pages it will be able to operate on without
|
||||||
@ -1593,20 +1591,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
|
|||||||
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
|
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (likely(get_page_unless_zero(page))) {
|
return 0;
|
||||||
/*
|
|
||||||
* Be careful not to clear PageLRU until after we're
|
|
||||||
* sure the page is not being freed elsewhere -- the
|
|
||||||
* page release code relies on it.
|
|
||||||
*/
|
|
||||||
ClearPageLRU(page);
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update LRU sizes after isolating pages. The LRU size updates must
|
* Update LRU sizes after isolating pages. The LRU size updates must
|
||||||
* be complete before mem_cgroup_update_lru_size due to a sanity check.
|
* be complete before mem_cgroup_update_lru_size due to a sanity check.
|
||||||
@ -1626,14 +1613,16 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pgdat->lru_lock is heavily contended. Some of the functions that
|
* Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
|
||||||
|
*
|
||||||
|
* lruvec->lru_lock is heavily contended. Some of the functions that
|
||||||
* shrink the lists perform better by taking out a batch of pages
|
* shrink the lists perform better by taking out a batch of pages
|
||||||
* and working on them outside the LRU lock.
|
* and working on them outside the LRU lock.
|
||||||
*
|
*
|
||||||
* For pagecache intensive workloads, this function is the hottest
|
* For pagecache intensive workloads, this function is the hottest
|
||||||
* spot in the kernel (apart from copy_*_user functions).
|
* spot in the kernel (apart from copy_*_user functions).
|
||||||
*
|
*
|
||||||
* Appropriate locks must be held before calling this function.
|
* Lru_lock must be held before calling this function.
|
||||||
*
|
*
|
||||||
* @nr_to_scan: The number of eligible pages to look through on the list.
|
* @nr_to_scan: The number of eligible pages to look through on the list.
|
||||||
* @lruvec: The LRU vector to pull pages from.
|
* @lruvec: The LRU vector to pull pages from.
|
||||||
@ -1666,8 +1655,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
|
|||||||
page = lru_to_page(src);
|
page = lru_to_page(src);
|
||||||
prefetchw_prev_lru_page(page, src, flags);
|
prefetchw_prev_lru_page(page, src, flags);
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
|
||||||
|
|
||||||
nr_pages = compound_nr(page);
|
nr_pages = compound_nr(page);
|
||||||
total_scan += nr_pages;
|
total_scan += nr_pages;
|
||||||
|
|
||||||
@ -1688,20 +1675,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
|
|||||||
* only when the page is being freed somewhere else.
|
* only when the page is being freed somewhere else.
|
||||||
*/
|
*/
|
||||||
scan += nr_pages;
|
scan += nr_pages;
|
||||||
switch (__isolate_lru_page(page, mode)) {
|
switch (__isolate_lru_page_prepare(page, mode)) {
|
||||||
case 0:
|
case 0:
|
||||||
|
/*
|
||||||
|
* Be careful not to clear PageLRU until after we're
|
||||||
|
* sure the page is not being freed elsewhere -- the
|
||||||
|
* page release code relies on it.
|
||||||
|
*/
|
||||||
|
if (unlikely(!get_page_unless_zero(page)))
|
||||||
|
goto busy;
|
||||||
|
|
||||||
|
if (!TestClearPageLRU(page)) {
|
||||||
|
/*
|
||||||
|
* This page may in other isolation path,
|
||||||
|
* but we still hold lru_lock.
|
||||||
|
*/
|
||||||
|
put_page(page);
|
||||||
|
goto busy;
|
||||||
|
}
|
||||||
|
|
||||||
nr_taken += nr_pages;
|
nr_taken += nr_pages;
|
||||||
nr_zone_taken[page_zonenum(page)] += nr_pages;
|
nr_zone_taken[page_zonenum(page)] += nr_pages;
|
||||||
list_move(&page->lru, dst);
|
list_move(&page->lru, dst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case -EBUSY:
|
default:
|
||||||
|
busy:
|
||||||
/* else it is being freed elsewhere */
|
/* else it is being freed elsewhere */
|
||||||
list_move(&page->lru, src);
|
list_move(&page->lru, src);
|
||||||
continue;
|
|
||||||
|
|
||||||
default:
|
|
||||||
BUG();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1764,21 +1765,16 @@ int isolate_lru_page(struct page *page)
|
|||||||
VM_BUG_ON_PAGE(!page_count(page), page);
|
VM_BUG_ON_PAGE(!page_count(page), page);
|
||||||
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
|
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
|
||||||
|
|
||||||
if (PageLRU(page)) {
|
if (TestClearPageLRU(page)) {
|
||||||
pg_data_t *pgdat = page_pgdat(page);
|
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec;
|
||||||
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
get_page(page);
|
||||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
lruvec = lock_page_lruvec_irq(page);
|
||||||
if (PageLRU(page)) {
|
del_page_from_lru_list(page, lruvec, page_lru(page));
|
||||||
int lru = page_lru(page);
|
unlock_page_lruvec_irq(lruvec);
|
||||||
get_page(page);
|
ret = 0;
|
||||||
ClearPageLRU(page);
|
|
||||||
del_page_from_lru_list(page, lruvec, lru);
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1820,29 +1816,14 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This moves pages from @list to corresponding LRU list.
|
* move_pages_to_lru() moves pages from private @list to appropriate LRU list.
|
||||||
*
|
* On return, @list is reused as a list of pages to be freed by the caller.
|
||||||
* We move them the other way if the page is referenced by one or more
|
|
||||||
* processes, from rmap.
|
|
||||||
*
|
|
||||||
* If the pages are mostly unmapped, the processing is fast and it is
|
|
||||||
* appropriate to hold zone_lru_lock across the whole operation. But if
|
|
||||||
* the pages are mapped, the processing is slow (page_referenced()) so we
|
|
||||||
* should drop zone_lru_lock around each page. It's impossible to balance
|
|
||||||
* this, so instead we remove the pages from the LRU while processing them.
|
|
||||||
* It is safe to rely on PG_active against the non-LRU pages in here because
|
|
||||||
* nobody will play with that bit on a non-LRU page.
|
|
||||||
*
|
|
||||||
* The downside is that we have to touch page->_refcount against each page.
|
|
||||||
* But we had to alter page->flags anyway.
|
|
||||||
*
|
*
|
||||||
* Returns the number of pages moved to the given lruvec.
|
* Returns the number of pages moved to the given lruvec.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
|
static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
|
||||||
struct list_head *list)
|
struct list_head *list)
|
||||||
{
|
{
|
||||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
|
||||||
int nr_pages, nr_moved = 0;
|
int nr_pages, nr_moved = 0;
|
||||||
LIST_HEAD(pages_to_free);
|
LIST_HEAD(pages_to_free);
|
||||||
struct page *page;
|
struct page *page;
|
||||||
@ -1851,38 +1832,54 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
|
|||||||
while (!list_empty(list)) {
|
while (!list_empty(list)) {
|
||||||
page = lru_to_page(list);
|
page = lru_to_page(list);
|
||||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||||
|
list_del(&page->lru);
|
||||||
if (unlikely(!page_evictable(page))) {
|
if (unlikely(!page_evictable(page))) {
|
||||||
list_del(&page->lru);
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
|
||||||
putback_lru_page(page);
|
putback_lru_page(page);
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The SetPageLRU needs to be kept here for list integrity.
|
||||||
|
* Otherwise:
|
||||||
|
* #0 move_pages_to_lru #1 release_pages
|
||||||
|
* if !put_page_testzero
|
||||||
|
* if (put_page_testzero())
|
||||||
|
* !PageLRU //skip lru_lock
|
||||||
|
* SetPageLRU()
|
||||||
|
* list_add(&page->lru,)
|
||||||
|
* list_add(&page->lru,)
|
||||||
|
*/
|
||||||
SetPageLRU(page);
|
SetPageLRU(page);
|
||||||
lru = page_lru(page);
|
|
||||||
|
|
||||||
nr_pages = thp_nr_pages(page);
|
if (unlikely(put_page_testzero(page))) {
|
||||||
update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
|
|
||||||
list_move(&page->lru, &lruvec->lists[lru]);
|
|
||||||
|
|
||||||
if (put_page_testzero(page)) {
|
|
||||||
__ClearPageLRU(page);
|
__ClearPageLRU(page);
|
||||||
__ClearPageActive(page);
|
__ClearPageActive(page);
|
||||||
del_page_from_lru_list(page, lruvec, lru);
|
|
||||||
|
|
||||||
if (unlikely(PageCompound(page))) {
|
if (unlikely(PageCompound(page))) {
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
destroy_compound_page(page);
|
destroy_compound_page(page);
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
} else
|
} else
|
||||||
list_add(&page->lru, &pages_to_free);
|
list_add(&page->lru, &pages_to_free);
|
||||||
} else {
|
|
||||||
nr_moved += nr_pages;
|
continue;
|
||||||
if (PageActive(page))
|
|
||||||
workingset_age_nonresident(lruvec, nr_pages);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All pages were isolated from the same lruvec (and isolation
|
||||||
|
* inhibits memcg migration).
|
||||||
|
*/
|
||||||
|
VM_BUG_ON_PAGE(!lruvec_holds_page_lru_lock(page, lruvec), page);
|
||||||
|
lru = page_lru(page);
|
||||||
|
nr_pages = thp_nr_pages(page);
|
||||||
|
|
||||||
|
update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
|
||||||
|
list_add(&page->lru, &lruvec->lists[lru]);
|
||||||
|
nr_moved += nr_pages;
|
||||||
|
if (PageActive(page))
|
||||||
|
workingset_age_nonresident(lruvec, nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1939,7 +1936,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|||||||
|
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
|
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
|
||||||
&nr_scanned, sc, lru);
|
&nr_scanned, sc, lru);
|
||||||
@ -1951,27 +1948,25 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|||||||
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
|
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
|
||||||
__count_vm_events(PGSCAN_ANON + file, nr_scanned);
|
__count_vm_events(PGSCAN_ANON + file, nr_scanned);
|
||||||
|
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
if (nr_taken == 0)
|
if (nr_taken == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false);
|
nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false);
|
||||||
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
move_pages_to_lru(lruvec, &page_list);
|
move_pages_to_lru(lruvec, &page_list);
|
||||||
|
|
||||||
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
|
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
|
||||||
lru_note_cost(lruvec, file, stat.nr_pageout);
|
|
||||||
item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
|
item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
|
||||||
if (!cgroup_reclaim(sc))
|
if (!cgroup_reclaim(sc))
|
||||||
__count_vm_events(item, nr_reclaimed);
|
__count_vm_events(item, nr_reclaimed);
|
||||||
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
|
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
|
||||||
__count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
|
__count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
|
||||||
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
lru_note_cost(lruvec, file, stat.nr_pageout);
|
||||||
|
|
||||||
mem_cgroup_uncharge_list(&page_list);
|
mem_cgroup_uncharge_list(&page_list);
|
||||||
free_unref_page_list(&page_list);
|
free_unref_page_list(&page_list);
|
||||||
|
|
||||||
@ -2003,6 +1998,23 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|||||||
return nr_reclaimed;
|
return nr_reclaimed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* shrink_active_list() moves pages from the active LRU to the inactive LRU.
|
||||||
|
*
|
||||||
|
* We move them the other way if the page is referenced by one or more
|
||||||
|
* processes.
|
||||||
|
*
|
||||||
|
* If the pages are mostly unmapped, the processing is fast and it is
|
||||||
|
* appropriate to hold lru_lock across the whole operation. But if
|
||||||
|
* the pages are mapped, the processing is slow (page_referenced()), so
|
||||||
|
* we should drop lru_lock around each page. It's impossible to balance
|
||||||
|
* this, so instead we remove the pages from the LRU while processing them.
|
||||||
|
* It is safe to rely on PG_active against the non-LRU pages in here because
|
||||||
|
* nobody will play with that bit on a non-LRU page.
|
||||||
|
*
|
||||||
|
* The downside is that we have to touch page->_refcount against each page.
|
||||||
|
* But we had to alter page->flags anyway.
|
||||||
|
*/
|
||||||
static void shrink_active_list(unsigned long nr_to_scan,
|
static void shrink_active_list(unsigned long nr_to_scan,
|
||||||
struct lruvec *lruvec,
|
struct lruvec *lruvec,
|
||||||
struct scan_control *sc,
|
struct scan_control *sc,
|
||||||
@ -2022,7 +2034,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
|
|||||||
|
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
|
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
|
||||||
&nr_scanned, sc, lru);
|
&nr_scanned, sc, lru);
|
||||||
@ -2033,7 +2045,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
|
|||||||
__count_vm_events(PGREFILL, nr_scanned);
|
__count_vm_events(PGREFILL, nr_scanned);
|
||||||
__count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
|
__count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
|
||||||
|
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
while (!list_empty(&l_hold)) {
|
while (!list_empty(&l_hold)) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
@ -2079,7 +2091,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
|
|||||||
/*
|
/*
|
||||||
* Move pages back to the lru list.
|
* Move pages back to the lru list.
|
||||||
*/
|
*/
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
nr_activate = move_pages_to_lru(lruvec, &l_active);
|
nr_activate = move_pages_to_lru(lruvec, &l_active);
|
||||||
nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
|
nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
|
||||||
@ -2090,7 +2102,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
|
|||||||
__count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
|
__count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
|
||||||
|
|
||||||
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
|
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
spin_unlock_irq(&lruvec->lru_lock);
|
||||||
|
|
||||||
mem_cgroup_uncharge_list(&l_active);
|
mem_cgroup_uncharge_list(&l_active);
|
||||||
free_unref_page_list(&l_active);
|
free_unref_page_list(&l_active);
|
||||||
@ -2678,10 +2690,10 @@ again:
|
|||||||
/*
|
/*
|
||||||
* Determine the scan balance between anon and file LRUs.
|
* Determine the scan balance between anon and file LRUs.
|
||||||
*/
|
*/
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
spin_lock_irq(&target_lruvec->lru_lock);
|
||||||
sc->anon_cost = target_lruvec->anon_cost;
|
sc->anon_cost = target_lruvec->anon_cost;
|
||||||
sc->file_cost = target_lruvec->file_cost;
|
sc->file_cost = target_lruvec->file_cost;
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
spin_unlock_irq(&target_lruvec->lru_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Target desirable inactive:active list ratios for the anon
|
* Target desirable inactive:active list ratios for the anon
|
||||||
@ -4257,15 +4269,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
|
|||||||
*/
|
*/
|
||||||
void check_move_unevictable_pages(struct pagevec *pvec)
|
void check_move_unevictable_pages(struct pagevec *pvec)
|
||||||
{
|
{
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec = NULL;
|
||||||
struct pglist_data *pgdat = NULL;
|
|
||||||
int pgscanned = 0;
|
int pgscanned = 0;
|
||||||
int pgrescued = 0;
|
int pgrescued = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < pvec->nr; i++) {
|
for (i = 0; i < pvec->nr; i++) {
|
||||||
struct page *page = pvec->pages[i];
|
struct page *page = pvec->pages[i];
|
||||||
struct pglist_data *pagepgdat = page_pgdat(page);
|
|
||||||
int nr_pages;
|
int nr_pages;
|
||||||
|
|
||||||
if (PageTransTail(page))
|
if (PageTransTail(page))
|
||||||
@ -4274,18 +4284,12 @@ void check_move_unevictable_pages(struct pagevec *pvec)
|
|||||||
nr_pages = thp_nr_pages(page);
|
nr_pages = thp_nr_pages(page);
|
||||||
pgscanned += nr_pages;
|
pgscanned += nr_pages;
|
||||||
|
|
||||||
if (pagepgdat != pgdat) {
|
/* block memcg migration during page moving between lru */
|
||||||
if (pgdat)
|
if (!TestClearPageLRU(page))
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
|
||||||
pgdat = pagepgdat;
|
|
||||||
spin_lock_irq(&pgdat->lru_lock);
|
|
||||||
}
|
|
||||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
|
||||||
|
|
||||||
if (!PageLRU(page) || !PageUnevictable(page))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (page_evictable(page)) {
|
lruvec = relock_page_lruvec_irq(page, lruvec);
|
||||||
|
if (page_evictable(page) && PageUnevictable(page)) {
|
||||||
enum lru_list lru = page_lru_base_type(page);
|
enum lru_list lru = page_lru_base_type(page);
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(PageActive(page), page);
|
VM_BUG_ON_PAGE(PageActive(page), page);
|
||||||
@ -4294,12 +4298,15 @@ void check_move_unevictable_pages(struct pagevec *pvec)
|
|||||||
add_page_to_lru_list(page, lruvec, lru);
|
add_page_to_lru_list(page, lruvec, lru);
|
||||||
pgrescued += nr_pages;
|
pgrescued += nr_pages;
|
||||||
}
|
}
|
||||||
|
SetPageLRU(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pgdat) {
|
if (lruvec) {
|
||||||
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
|
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
|
||||||
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
|
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
|
||||||
spin_unlock_irq(&pgdat->lru_lock);
|
unlock_page_lruvec_irq(lruvec);
|
||||||
|
} else if (pgscanned) {
|
||||||
|
count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
|
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
|
||||||
|
@ -381,9 +381,7 @@ void workingset_refault(struct page *page, void *shadow)
|
|||||||
if (workingset) {
|
if (workingset) {
|
||||||
SetPageWorkingset(page);
|
SetPageWorkingset(page);
|
||||||
/* XXX: Move to lru_cache_add() when it supports new vs putback */
|
/* XXX: Move to lru_cache_add() when it supports new vs putback */
|
||||||
spin_lock_irq(&page_pgdat(page)->lru_lock);
|
|
||||||
lru_note_cost_page(page);
|
lru_note_cost_page(page);
|
||||||
spin_unlock_irq(&page_pgdat(page)->lru_lock);
|
|
||||||
inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file);
|
inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
Loading…
Reference in New Issue
Block a user