2017-02-25 01:57:45 +03:00
# include <linux/mm.h>
# include <linux/rmap.h>
# include <linux/hugetlb.h>
# include <linux/swap.h>
# include <linux/swapops.h>
# include "internal.h"
static inline bool check_pmd ( struct page_vma_mapped_walk * pvmw )
{
pmd_t pmde ;
/*
* Make sure we don ' t re - load pmd between present and ! trans_huge check .
* We need a consistent view .
*/
pmde = READ_ONCE ( * pvmw - > pmd ) ;
return pmd_present ( pmde ) & & ! pmd_trans_huge ( pmde ) ;
}
static inline bool not_found ( struct page_vma_mapped_walk * pvmw )
{
page_vma_mapped_walk_done ( pvmw ) ;
return false ;
}
static bool map_pte ( struct page_vma_mapped_walk * pvmw )
{
pvmw - > pte = pte_offset_map ( pvmw - > pmd , pvmw - > address ) ;
if ( ! ( pvmw - > flags & PVMW_SYNC ) ) {
if ( pvmw - > flags & PVMW_MIGRATION ) {
if ( ! is_swap_pte ( * pvmw - > pte ) )
return false ;
} else {
if ( ! pte_present ( * pvmw - > pte ) )
return false ;
}
}
pvmw - > ptl = pte_lockptr ( pvmw - > vma - > vm_mm , pvmw - > pmd ) ;
spin_lock ( pvmw - > ptl ) ;
return true ;
}
static bool check_pte ( struct page_vma_mapped_walk * pvmw )
{
if ( pvmw - > flags & PVMW_MIGRATION ) {
# ifdef CONFIG_MIGRATION
swp_entry_t entry ;
if ( ! is_swap_pte ( * pvmw - > pte ) )
return false ;
entry = pte_to_swp_entry ( * pvmw - > pte ) ;
if ( ! is_migration_entry ( entry ) )
return false ;
if ( migration_entry_to_page ( entry ) - pvmw - > page > =
hpage_nr_pages ( pvmw - > page ) ) {
return false ;
}
if ( migration_entry_to_page ( entry ) < pvmw - > page )
return false ;
# else
WARN_ON_ONCE ( 1 ) ;
# endif
} else {
if ( ! pte_present ( * pvmw - > pte ) )
return false ;
/* THP can be referenced by any subpage */
if ( pte_page ( * pvmw - > pte ) - pvmw - > page > =
hpage_nr_pages ( pvmw - > page ) ) {
return false ;
}
if ( pte_page ( * pvmw - > pte ) < pvmw - > page )
return false ;
}
return true ;
}
/**
* page_vma_mapped_walk - check if @ pvmw - > page is mapped in @ pvmw - > vma at
* @ pvmw - > address
* @ pvmw : pointer to struct page_vma_mapped_walk . page , vma , address and flags
* must be set . pmd , pte and ptl must be NULL .
*
* Returns true if the page is mapped in the vma . @ pvmw - > pmd and @ pvmw - > pte point
* to relevant page table entries . @ pvmw - > ptl is locked . @ pvmw - > address is
* adjusted if needed ( for PTE - mapped THPs ) .
*
* If @ pvmw - > pmd is set but @ pvmw - > pte is not , you have found PMD - mapped page
* ( usually THP ) . For PTE - mapped THP , you should run page_vma_mapped_walk ( ) in
* a loop to find all PTEs that map the THP .
*
* For HugeTLB pages , @ pvmw - > pte is set to the relevant page table entry
* regardless of which page table level the page is mapped at . @ pvmw - > pmd is
* NULL .
*
* Retruns false if there are no more page table entries for the page in
* the vma . @ pvmw - > ptl is unlocked and @ pvmw - > pte is unmapped .
*
* If you need to stop the walk before page_vma_mapped_walk ( ) returned false ,
* use page_vma_mapped_walk_done ( ) . It will do the housekeeping .
*/
bool page_vma_mapped_walk ( struct page_vma_mapped_walk * pvmw )
{
struct mm_struct * mm = pvmw - > vma - > vm_mm ;
struct page * page = pvmw - > page ;
pgd_t * pgd ;
2017-03-09 17:24:07 +03:00
p4d_t * p4d ;
2017-02-25 01:57:45 +03:00
pud_t * pud ;
/* The only possible pmd mapping has been handled on last iteration */
if ( pvmw - > pmd & & ! pvmw - > pte )
return not_found ( pvmw ) ;
mm: fix page_vma_mapped_walk() for ksm pages
Doug Smythies reports oops with KSM in this backtrace, I've been seeing
the same:
page_vma_mapped_walk+0xe6/0x5b0
page_referenced_one+0x91/0x1a0
rmap_walk_ksm+0x100/0x190
rmap_walk+0x4f/0x60
page_referenced+0x149/0x170
shrink_active_list+0x1c2/0x430
shrink_node_memcg+0x67a/0x7a0
shrink_node+0xe1/0x320
kswapd+0x34b/0x720
Just as observed in commit 4b0ece6fa016 ("mm: migrate: fix
remove_migration_pte() for ksm pages"), you cannot use page->index
calculations on ksm pages.
page_vma_mapped_walk() is relying on __vma_address(), where a ksm page
can lead it off the end of the page table, and into whatever nonsense is
in the next page, ending as an oops inside check_pte()'s pte_page().
KSM tells page_vma_mapped_walk() exactly where to look for the page, it
does not need any page->index calculation: and that's so also for all
the normal and file and anon pages - just not for THPs and their
subpages. Get out early in most cases: instead of a PageKsm test, move
down the earlier not-THP-page test, as suggested by Kirill.
I'm also slightly worried that this loop can stray into other vmas, so
added a vm_end test to prevent surprises; though I have not imagined
anything worse than a very contrived case, in which a page mlocked in
the next vma might be reclaimed because it is not mlocked in this vma.
Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1704031104400.1118@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-04-08 02:04:39 +03:00
if ( pvmw - > pte )
2017-02-25 01:57:45 +03:00
goto next_pte ;
if ( unlikely ( PageHuge ( pvmw - > page ) ) ) {
/* when pud is not present, pte will be NULL */
2017-07-07 01:39:42 +03:00
pvmw - > pte = huge_pte_offset ( mm , pvmw - > address ,
PAGE_SIZE < < compound_order ( page ) ) ;
2017-02-25 01:57:45 +03:00
if ( ! pvmw - > pte )
return false ;
pvmw - > ptl = huge_pte_lockptr ( page_hstate ( page ) , mm , pvmw - > pte ) ;
spin_lock ( pvmw - > ptl ) ;
if ( ! check_pte ( pvmw ) )
return not_found ( pvmw ) ;
return true ;
}
restart :
pgd = pgd_offset ( mm , pvmw - > address ) ;
if ( ! pgd_present ( * pgd ) )
return false ;
2017-03-09 17:24:07 +03:00
p4d = p4d_offset ( pgd , pvmw - > address ) ;
if ( ! p4d_present ( * p4d ) )
return false ;
pud = pud_offset ( p4d , pvmw - > address ) ;
2017-02-25 01:57:45 +03:00
if ( ! pud_present ( * pud ) )
return false ;
pvmw - > pmd = pmd_offset ( pud , pvmw - > address ) ;
if ( pmd_trans_huge ( * pvmw - > pmd ) ) {
pvmw - > ptl = pmd_lock ( mm , pvmw - > pmd ) ;
if ( ! pmd_present ( * pvmw - > pmd ) )
return not_found ( pvmw ) ;
if ( likely ( pmd_trans_huge ( * pvmw - > pmd ) ) ) {
if ( pvmw - > flags & PVMW_MIGRATION )
return not_found ( pvmw ) ;
if ( pmd_page ( * pvmw - > pmd ) ! = page )
return not_found ( pvmw ) ;
return true ;
} else {
/* THP pmd was split under us: handle on pte level */
spin_unlock ( pvmw - > ptl ) ;
pvmw - > ptl = NULL ;
}
} else {
if ( ! check_pmd ( pvmw ) )
return false ;
}
if ( ! map_pte ( pvmw ) )
goto next_pte ;
while ( 1 ) {
if ( check_pte ( pvmw ) )
return true ;
mm: fix page_vma_mapped_walk() for ksm pages
Doug Smythies reports oops with KSM in this backtrace, I've been seeing
the same:
page_vma_mapped_walk+0xe6/0x5b0
page_referenced_one+0x91/0x1a0
rmap_walk_ksm+0x100/0x190
rmap_walk+0x4f/0x60
page_referenced+0x149/0x170
shrink_active_list+0x1c2/0x430
shrink_node_memcg+0x67a/0x7a0
shrink_node+0xe1/0x320
kswapd+0x34b/0x720
Just as observed in commit 4b0ece6fa016 ("mm: migrate: fix
remove_migration_pte() for ksm pages"), you cannot use page->index
calculations on ksm pages.
page_vma_mapped_walk() is relying on __vma_address(), where a ksm page
can lead it off the end of the page table, and into whatever nonsense is
in the next page, ending as an oops inside check_pte()'s pte_page().
KSM tells page_vma_mapped_walk() exactly where to look for the page, it
does not need any page->index calculation: and that's so also for all
the normal and file and anon pages - just not for THPs and their
subpages. Get out early in most cases: instead of a PageKsm test, move
down the earlier not-THP-page test, as suggested by Kirill.
I'm also slightly worried that this loop can stray into other vmas, so
added a vm_end test to prevent surprises; though I have not imagined
anything worse than a very contrived case, in which a page mlocked in
the next vma might be reclaimed because it is not mlocked in this vma.
Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1704031104400.1118@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-04-08 02:04:39 +03:00
next_pte :
/* Seek to next pte only makes sense for THP */
if ( ! PageTransHuge ( pvmw - > page ) | | PageHuge ( pvmw - > page ) )
return not_found ( pvmw ) ;
do {
2017-02-25 01:57:45 +03:00
pvmw - > address + = PAGE_SIZE ;
mm: fix page_vma_mapped_walk() for ksm pages
Doug Smythies reports oops with KSM in this backtrace, I've been seeing
the same:
page_vma_mapped_walk+0xe6/0x5b0
page_referenced_one+0x91/0x1a0
rmap_walk_ksm+0x100/0x190
rmap_walk+0x4f/0x60
page_referenced+0x149/0x170
shrink_active_list+0x1c2/0x430
shrink_node_memcg+0x67a/0x7a0
shrink_node+0xe1/0x320
kswapd+0x34b/0x720
Just as observed in commit 4b0ece6fa016 ("mm: migrate: fix
remove_migration_pte() for ksm pages"), you cannot use page->index
calculations on ksm pages.
page_vma_mapped_walk() is relying on __vma_address(), where a ksm page
can lead it off the end of the page table, and into whatever nonsense is
in the next page, ending as an oops inside check_pte()'s pte_page().
KSM tells page_vma_mapped_walk() exactly where to look for the page, it
does not need any page->index calculation: and that's so also for all
the normal and file and anon pages - just not for THPs and their
subpages. Get out early in most cases: instead of a PageKsm test, move
down the earlier not-THP-page test, as suggested by Kirill.
I'm also slightly worried that this loop can stray into other vmas, so
added a vm_end test to prevent surprises; though I have not imagined
anything worse than a very contrived case, in which a page mlocked in
the next vma might be reclaimed because it is not mlocked in this vma.
Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1704031104400.1118@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-04-08 02:04:39 +03:00
if ( pvmw - > address > = pvmw - > vma - > vm_end | |
pvmw - > address > =
2017-02-25 01:57:45 +03:00
__vma_address ( pvmw - > page , pvmw - > vma ) +
hpage_nr_pages ( pvmw - > page ) * PAGE_SIZE )
return not_found ( pvmw ) ;
/* Did we cross page table boundary? */
if ( pvmw - > address % PMD_SIZE = = 0 ) {
pte_unmap ( pvmw - > pte ) ;
if ( pvmw - > ptl ) {
spin_unlock ( pvmw - > ptl ) ;
pvmw - > ptl = NULL ;
}
goto restart ;
} else {
pvmw - > pte + + ;
}
} while ( pte_none ( * pvmw - > pte ) ) ;
if ( ! pvmw - > ptl ) {
pvmw - > ptl = pte_lockptr ( mm , pvmw - > pmd ) ;
spin_lock ( pvmw - > ptl ) ;
}
}
}
2017-02-25 01:58:10 +03:00
/**
* page_mapped_in_vma - check whether a page is really mapped in a VMA
* @ page : the page to test
* @ vma : the VMA to test
*
* Returns 1 if the page is mapped into the page tables of the VMA , 0
* if the page is not mapped into the page tables of this VMA . Only
* valid for normal file or anonymous VMAs .
*/
int page_mapped_in_vma ( struct page * page , struct vm_area_struct * vma )
{
struct page_vma_mapped_walk pvmw = {
. page = page ,
. vma = vma ,
. flags = PVMW_SYNC ,
} ;
unsigned long start , end ;
start = __vma_address ( page , vma ) ;
end = start + PAGE_SIZE * ( hpage_nr_pages ( page ) - 1 ) ;
if ( unlikely ( end < vma - > vm_start | | start > = vma - > vm_end ) )
return 0 ;
pvmw . address = max ( start , vma - > vm_start ) ;
if ( ! page_vma_mapped_walk ( & pvmw ) )
return 0 ;
page_vma_mapped_walk_done ( & pvmw ) ;
return 1 ;
}