2019-05-27 08:55:06 +02:00
// SPDX-License-Identifier: GPL-2.0-or-later
2017-09-08 16:11:23 -07:00
/*
* Copyright 2013 Red Hat Inc .
*
2018-10-30 15:04:06 -07:00
* Authors : Jérôme Glisse < jglisse @ redhat . com >
2017-09-08 16:11:23 -07:00
*/
/*
* Refer to include / linux / hmm . h for information about heterogeneous memory
* management or HMM for short .
*/
2019-08-28 16:19:53 +02:00
# include <linux/pagewalk.h>
2017-09-08 16:11:23 -07:00
# include <linux/hmm.h>
2017-09-08 16:12:02 -07:00
# include <linux/init.h>
2017-09-08 16:11:31 -07:00
# include <linux/rmap.h>
# include <linux/swap.h>
2017-09-08 16:11:23 -07:00
# include <linux/slab.h>
# include <linux/sched.h>
2017-09-08 16:11:58 -07:00
# include <linux/mmzone.h>
# include <linux/pagemap.h>
2017-09-08 16:11:31 -07:00
# include <linux/swapops.h>
# include <linux/hugetlb.h>
2017-09-08 16:11:58 -07:00
# include <linux/memremap.h>
2019-05-23 10:36:46 -03:00
# include <linux/sched/mm.h>
2017-09-08 16:11:46 -07:00
# include <linux/jump_label.h>
2019-05-13 17:20:28 -07:00
# include <linux/dma-mapping.h>
2017-09-08 16:11:27 -07:00
# include <linux/mmu_notifier.h>
2017-09-08 16:11:58 -07:00
# include <linux/memory_hotplug.h>
2017-09-08 16:11:35 -07:00
struct hmm_vma_walk {
struct hmm_range * range ;
unsigned long last ;
} ;
2020-03-27 17:00:14 -03:00
enum {
HMM_NEED_FAULT = 1 < < 0 ,
HMM_NEED_WRITE_FAULT = 1 < < 1 ,
HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT ,
} ;
2020-03-27 17:00:15 -03:00
/*
* hmm_device_entry_from_pfn ( ) - create a valid device entry value from pfn
* @ range : range use to encode HMM pfn value
* @ pfn : pfn value for which to create the device entry
* Return : valid device entry for the pfn
*/
static uint64_t hmm_device_entry_from_pfn ( const struct hmm_range * range ,
unsigned long pfn )
{
return ( pfn < < range - > pfn_shift ) | range - > flags [ HMM_PFN_VALID ] ;
}
2019-11-04 14:21:40 -08:00
static int hmm_pfns_fill ( unsigned long addr , unsigned long end ,
struct hmm_range * range , enum hmm_pfn_value_e value )
2017-09-08 16:11:31 -07:00
{
2018-04-10 16:28:38 -07:00
uint64_t * pfns = range - > pfns ;
2017-09-08 16:11:31 -07:00
unsigned long i ;
i = ( addr - range - > start ) > > PAGE_SHIFT ;
for ( ; addr < end ; addr + = PAGE_SIZE , i + + )
2019-11-04 14:21:40 -08:00
pfns [ i ] = range - > values [ value ] ;
2017-09-08 16:11:31 -07:00
return 0 ;
}
2018-04-10 16:28:46 -07:00
/*
2020-03-16 14:53:09 +01:00
* hmm_vma_fault ( ) - fault in a range lacking valid pmd or pte ( s )
2019-07-25 17:56:45 -07:00
* @ addr : range virtual start address ( inclusive )
2018-04-10 16:28:46 -07:00
* @ end : range virtual end address ( exclusive )
2020-03-27 17:00:14 -03:00
* @ required_fault : HMM_NEED_ * flags
2018-04-10 16:28:46 -07:00
* @ walk : mm_walk structure
2020-03-16 14:53:09 +01:00
* Return : - EBUSY after page fault , or page fault error
2018-04-10 16:28:46 -07:00
*
* This function will be called whenever pmd_none ( ) or pte_none ( ) returns true ,
* or whenever there is no page directory covering the virtual address range .
*/
2020-03-16 14:53:09 +01:00
static int hmm_vma_fault ( unsigned long addr , unsigned long end ,
2020-03-27 17:00:14 -03:00
unsigned int required_fault , struct mm_walk * walk )
2017-09-08 16:11:31 -07:00
{
2017-09-08 16:11:35 -07:00
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
2020-03-16 14:53:10 +01:00
struct vm_area_struct * vma = walk - > vma ;
unsigned int fault_flags = FAULT_FLAG_REMOTE ;
2017-09-08 16:11:31 -07:00
2020-03-27 17:00:14 -03:00
WARN_ON_ONCE ( ! required_fault ) ;
2017-09-08 16:11:35 -07:00
hmm_vma_walk - > last = addr ;
2019-05-13 17:20:18 -07:00
2020-03-27 17:00:14 -03:00
if ( required_fault & HMM_NEED_WRITE_FAULT ) {
2020-03-16 14:53:10 +01:00
if ( ! ( vma - > vm_flags & VM_WRITE ) )
return - EPERM ;
fault_flags | = FAULT_FLAG_WRITE ;
2017-09-08 16:11:35 -07:00
}
2020-03-27 17:00:20 -03:00
for ( ; addr < end ; addr + = PAGE_SIZE )
2020-03-16 14:53:10 +01:00
if ( handle_mm_fault ( vma , addr , fault_flags ) & VM_FAULT_ERROR )
2020-03-27 17:00:20 -03:00
return - EFAULT ;
2020-03-16 14:53:09 +01:00
return - EBUSY ;
2018-04-10 16:29:02 -07:00
}
2020-03-27 17:00:14 -03:00
static unsigned int hmm_pte_need_fault ( const struct hmm_vma_walk * hmm_vma_walk ,
uint64_t pfns , uint64_t cpu_flags )
2018-04-10 16:29:02 -07:00
{
2018-04-10 16:29:06 -07:00
struct hmm_range * range = hmm_vma_walk - > range ;
2019-05-13 17:20:05 -07:00
/*
* So we not only consider the individual per page request we also
* consider the default flags requested for the range . The API can
2019-07-25 17:56:45 -07:00
* be used 2 ways . The first one where the HMM user coalesces
* multiple page faults into one request and sets flags per pfn for
* those faults . The second one where the HMM user wants to pre -
2019-05-13 17:20:05 -07:00
* fault a range with specific flags . For the latter one it is a
* waste to have the user pre - fill the pfn arrays with a default
* flags value .
*/
pfns = ( pfns & range - > pfn_flags_mask ) | range - > default_flags ;
2018-04-10 16:29:02 -07:00
/* We aren't ask to do anything ... */
2018-04-10 16:29:06 -07:00
if ( ! ( pfns & range - > flags [ HMM_PFN_VALID ] ) )
2020-03-27 17:00:14 -03:00
return 0 ;
2018-04-10 16:29:06 -07:00
/* Need to write fault ? */
if ( ( pfns & range - > flags [ HMM_PFN_WRITE ] ) & &
2020-03-27 17:00:14 -03:00
! ( cpu_flags & range - > flags [ HMM_PFN_WRITE ] ) )
return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT ;
/* If CPU page table is not valid then we need to fault */
if ( ! ( cpu_flags & range - > flags [ HMM_PFN_VALID ] ) )
return HMM_NEED_FAULT ;
return 0 ;
2018-04-10 16:29:02 -07:00
}
2020-03-27 17:00:14 -03:00
static unsigned int
hmm_range_need_fault ( const struct hmm_vma_walk * hmm_vma_walk ,
const uint64_t * pfns , unsigned long npages ,
uint64_t cpu_flags )
2018-04-10 16:29:02 -07:00
{
2020-03-27 17:00:16 -03:00
struct hmm_range * range = hmm_vma_walk - > range ;
2020-03-27 17:00:14 -03:00
unsigned int required_fault = 0 ;
2018-04-10 16:29:02 -07:00
unsigned long i ;
2020-03-27 17:00:16 -03:00
/*
* If the default flags do not request to fault pages , and the mask does
* not allow for individual pages to be faulted , then
* hmm_pte_need_fault ( ) will always return 0.
*/
if ( ! ( ( range - > default_flags | range - > pfn_flags_mask ) &
range - > flags [ HMM_PFN_VALID ] ) )
2020-03-27 17:00:14 -03:00
return 0 ;
2018-04-10 16:29:02 -07:00
for ( i = 0 ; i < npages ; + + i ) {
2020-03-27 17:00:14 -03:00
required_fault | =
hmm_pte_need_fault ( hmm_vma_walk , pfns [ i ] , cpu_flags ) ;
if ( required_fault = = HMM_NEED_ALL_BITS )
return required_fault ;
2018-04-10 16:29:02 -07:00
}
2020-03-27 17:00:14 -03:00
return required_fault ;
2018-04-10 16:29:02 -07:00
}
static int hmm_vma_walk_hole ( unsigned long addr , unsigned long end ,
2020-02-03 17:36:03 -08:00
__always_unused int depth , struct mm_walk * walk )
2018-04-10 16:29:02 -07:00
{
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
struct hmm_range * range = hmm_vma_walk - > range ;
2020-03-27 17:00:14 -03:00
unsigned int required_fault ;
2018-04-10 16:29:02 -07:00
unsigned long i , npages ;
uint64_t * pfns ;
i = ( addr - range - > start ) > > PAGE_SHIFT ;
npages = ( end - addr ) > > PAGE_SHIFT ;
pfns = & range - > pfns [ i ] ;
2020-03-27 17:00:14 -03:00
required_fault = hmm_range_need_fault ( hmm_vma_walk , pfns , npages , 0 ) ;
2020-03-27 17:00:21 -03:00
if ( ! walk - > vma ) {
if ( required_fault )
return - EFAULT ;
return hmm_pfns_fill ( addr , end , range , HMM_PFN_ERROR ) ;
}
2020-03-27 17:00:14 -03:00
if ( required_fault )
return hmm_vma_fault ( addr , end , required_fault , walk ) ;
2020-03-16 14:53:09 +01:00
hmm_vma_walk - > last = addr ;
return hmm_pfns_fill ( addr , end , range , HMM_PFN_NONE ) ;
2018-04-10 16:29:02 -07:00
}
2018-04-10 16:29:06 -07:00
static inline uint64_t pmd_to_hmm_pfn_flags ( struct hmm_range * range , pmd_t pmd )
2018-04-10 16:29:02 -07:00
{
if ( pmd_protnone ( pmd ) )
return 0 ;
2018-04-10 16:29:06 -07:00
return pmd_write ( pmd ) ? range - > flags [ HMM_PFN_VALID ] |
range - > flags [ HMM_PFN_WRITE ] :
range - > flags [ HMM_PFN_VALID ] ;
2017-09-08 16:11:31 -07:00
}
2019-05-13 17:20:21 -07:00
# ifdef CONFIG_TRANSPARENT_HUGEPAGE
2019-08-06 19:05:49 +03:00
static int hmm_vma_handle_pmd ( struct mm_walk * walk , unsigned long addr ,
unsigned long end , uint64_t * pfns , pmd_t pmd )
{
2018-04-10 16:28:59 -07:00
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
2018-04-10 16:29:06 -07:00
struct hmm_range * range = hmm_vma_walk - > range ;
2018-04-10 16:29:02 -07:00
unsigned long pfn , npages , i ;
2020-03-27 17:00:14 -03:00
unsigned int required_fault ;
2018-04-10 16:29:06 -07:00
uint64_t cpu_flags ;
2018-04-10 16:28:59 -07:00
2018-04-10 16:29:02 -07:00
npages = ( end - addr ) > > PAGE_SHIFT ;
2018-04-10 16:29:06 -07:00
cpu_flags = pmd_to_hmm_pfn_flags ( range , pmd ) ;
2020-03-27 17:00:14 -03:00
required_fault =
hmm_range_need_fault ( hmm_vma_walk , pfns , npages , cpu_flags ) ;
if ( required_fault )
return hmm_vma_fault ( addr , end , required_fault , walk ) ;
2018-04-10 16:28:59 -07:00
2019-08-06 19:05:47 +03:00
pfn = pmd_pfn ( pmd ) + ( ( addr & ~ PMD_MASK ) > > PAGE_SHIFT ) ;
2020-03-27 17:00:13 -03:00
for ( i = 0 ; addr < end ; addr + = PAGE_SIZE , i + + , pfn + + )
2019-05-13 17:20:31 -07:00
pfns [ i ] = hmm_device_entry_from_pfn ( range , pfn ) | cpu_flags ;
2018-04-10 16:28:59 -07:00
hmm_vma_walk - > last = end ;
return 0 ;
}
2019-08-06 19:05:49 +03:00
# else /* CONFIG_TRANSPARENT_HUGEPAGE */
/* stub to allow the code below to compile */
int hmm_vma_handle_pmd ( struct mm_walk * walk , unsigned long addr ,
unsigned long end , uint64_t * pfns , pmd_t pmd ) ;
# endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2018-04-10 16:28:59 -07:00
2020-03-16 20:32:16 +01:00
static inline bool hmm_is_device_private_entry ( struct hmm_range * range ,
swp_entry_t entry )
{
return is_device_private_entry ( entry ) & &
device_private_entry_to_page ( entry ) - > pgmap - > owner = =
range - > dev_private_owner ;
}
2018-04-10 16:29:06 -07:00
static inline uint64_t pte_to_hmm_pfn_flags ( struct hmm_range * range , pte_t pte )
2018-04-10 16:29:02 -07:00
{
2019-05-23 16:32:31 -04:00
if ( pte_none ( pte ) | | ! pte_present ( pte ) | | pte_protnone ( pte ) )
2018-04-10 16:29:02 -07:00
return 0 ;
2018-04-10 16:29:06 -07:00
return pte_write ( pte ) ? range - > flags [ HMM_PFN_VALID ] |
range - > flags [ HMM_PFN_WRITE ] :
range - > flags [ HMM_PFN_VALID ] ;
2018-04-10 16:29:02 -07:00
}
2018-04-10 16:28:59 -07:00
static int hmm_vma_handle_pte ( struct mm_walk * walk , unsigned long addr ,
unsigned long end , pmd_t * pmdp , pte_t * ptep ,
uint64_t * pfn )
{
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
2018-04-10 16:29:06 -07:00
struct hmm_range * range = hmm_vma_walk - > range ;
2020-03-27 17:00:14 -03:00
unsigned int required_fault ;
2018-04-10 16:29:02 -07:00
uint64_t cpu_flags ;
2018-04-10 16:28:59 -07:00
pte_t pte = * ptep ;
2018-04-10 16:29:06 -07:00
uint64_t orig_pfn = * pfn ;
2018-04-10 16:28:59 -07:00
if ( pte_none ( pte ) ) {
2020-03-27 17:00:14 -03:00
required_fault = hmm_pte_need_fault ( hmm_vma_walk , orig_pfn , 0 ) ;
if ( required_fault )
2018-04-10 16:28:59 -07:00
goto fault ;
2020-03-27 17:00:19 -03:00
* pfn = range - > values [ HMM_PFN_NONE ] ;
2018-04-10 16:28:59 -07:00
return 0 ;
}
if ( ! pte_present ( pte ) ) {
swp_entry_t entry = pte_to_swp_entry ( pte ) ;
/*
2020-03-16 20:32:15 +01:00
* Never fault in device private pages pages , but just report
* the PFN even if not present .
2018-04-10 16:28:59 -07:00
*/
2020-03-16 20:32:16 +01:00
if ( hmm_is_device_private_entry ( range , entry ) ) {
2019-05-13 17:20:31 -07:00
* pfn = hmm_device_entry_from_pfn ( range ,
2020-03-27 17:00:18 -03:00
device_private_entry_to_pfn ( entry ) ) ;
2020-03-16 20:32:15 +01:00
* pfn | = range - > flags [ HMM_PFN_VALID ] ;
if ( is_write_device_private_entry ( entry ) )
* pfn | = range - > flags [ HMM_PFN_WRITE ] ;
2018-04-10 16:28:59 -07:00
return 0 ;
}
2020-03-27 17:00:14 -03:00
required_fault = hmm_pte_need_fault ( hmm_vma_walk , orig_pfn , 0 ) ;
2020-03-27 17:00:19 -03:00
if ( ! required_fault ) {
* pfn = range - > values [ HMM_PFN_NONE ] ;
2018-04-10 16:28:59 -07:00
return 0 ;
2020-03-27 17:00:19 -03:00
}
2020-02-28 15:52:32 -04:00
if ( ! non_swap_entry ( entry ) )
goto fault ;
if ( is_migration_entry ( entry ) ) {
pte_unmap ( ptep ) ;
hmm_vma_walk - > last = addr ;
migration_entry_wait ( walk - > mm , pmdp , addr ) ;
return - EBUSY ;
2018-04-10 16:28:59 -07:00
}
/* Report error for everything else */
2020-02-28 15:30:37 -04:00
pte_unmap ( ptep ) ;
2018-04-10 16:28:59 -07:00
return - EFAULT ;
}
2020-02-28 15:52:32 -04:00
cpu_flags = pte_to_hmm_pfn_flags ( range , pte ) ;
2020-03-27 17:00:14 -03:00
required_fault = hmm_pte_need_fault ( hmm_vma_walk , orig_pfn , cpu_flags ) ;
if ( required_fault )
2018-04-10 16:28:59 -07:00
goto fault ;
2020-03-05 14:27:20 -04:00
/*
* Since each architecture defines a struct page for the zero page , just
* fall through and treat it like a normal page .
*/
if ( pte_special ( pte ) & & ! is_zero_pfn ( pte_pfn ( pte ) ) ) {
2020-03-27 17:00:14 -03:00
if ( hmm_pte_need_fault ( hmm_vma_walk , orig_pfn , 0 ) ) {
2020-02-28 15:30:37 -04:00
pte_unmap ( ptep ) ;
2019-10-23 12:55:14 -07:00
return - EFAULT ;
}
2020-03-05 14:27:20 -04:00
* pfn = range - > values [ HMM_PFN_SPECIAL ] ;
return 0 ;
2019-05-13 17:20:21 -07:00
}
2019-05-13 17:20:31 -07:00
* pfn = hmm_device_entry_from_pfn ( range , pte_pfn ( pte ) ) | cpu_flags ;
2018-04-10 16:28:59 -07:00
return 0 ;
fault :
pte_unmap ( ptep ) ;
/* Fault any virtual address we were asked to fault */
2020-03-27 17:00:14 -03:00
return hmm_vma_fault ( addr , end , required_fault , walk ) ;
2018-04-10 16:28:59 -07:00
}
2017-09-08 16:11:31 -07:00
static int hmm_vma_walk_pmd ( pmd_t * pmdp ,
unsigned long start ,
unsigned long end ,
struct mm_walk * walk )
{
2017-09-08 16:11:35 -07:00
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
struct hmm_range * range = hmm_vma_walk - > range ;
2020-03-05 15:26:33 -04:00
uint64_t * pfns = & range - > pfns [ ( start - range - > start ) > > PAGE_SHIFT ] ;
unsigned long npages = ( end - start ) > > PAGE_SHIFT ;
unsigned long addr = start ;
2017-09-08 16:11:31 -07:00
pte_t * ptep ;
2018-10-30 15:04:20 -07:00
pmd_t pmd ;
2017-09-08 16:11:31 -07:00
again :
2018-10-30 15:04:20 -07:00
pmd = READ_ONCE ( * pmdp ) ;
if ( pmd_none ( pmd ) )
2020-02-03 17:36:03 -08:00
return hmm_vma_walk_hole ( start , end , - 1 , walk ) ;
2017-09-08 16:11:31 -07:00
2018-10-30 15:04:20 -07:00
if ( thp_migration_supported ( ) & & is_pmd_migration_entry ( pmd ) ) {
2020-03-27 17:00:14 -03:00
if ( hmm_range_need_fault ( hmm_vma_walk , pfns , npages , 0 ) ) {
2018-10-30 15:04:20 -07:00
hmm_vma_walk - > last = addr ;
2019-07-25 17:56:45 -07:00
pmd_migration_entry_wait ( walk - > mm , pmdp ) ;
2019-05-13 17:19:58 -07:00
return - EBUSY ;
2018-10-30 15:04:20 -07:00
}
2020-03-04 16:25:56 -04:00
return hmm_pfns_fill ( start , end , range , HMM_PFN_NONE ) ;
2020-03-05 15:26:33 -04:00
}
if ( ! pmd_present ( pmd ) ) {
2020-03-27 17:00:14 -03:00
if ( hmm_range_need_fault ( hmm_vma_walk , pfns , npages , 0 ) )
2020-03-05 15:26:33 -04:00
return - EFAULT ;
2019-11-04 14:21:40 -08:00
return hmm_pfns_fill ( start , end , range , HMM_PFN_ERROR ) ;
2020-03-05 15:26:33 -04:00
}
2017-09-08 16:11:31 -07:00
2018-10-30 15:04:20 -07:00
if ( pmd_devmap ( pmd ) | | pmd_trans_huge ( pmd ) ) {
2017-09-08 16:11:31 -07:00
/*
2019-07-25 17:56:45 -07:00
* No need to take pmd_lock here , even if some other thread
2017-09-08 16:11:31 -07:00
* is splitting the huge pmd we will get that event through
* mmu_notifier callback .
*
2019-07-25 17:56:45 -07:00
* So just read pmd value and check again it ' s a transparent
2017-09-08 16:11:31 -07:00
* huge or device mapping one and compute corresponding pfn
* values .
*/
pmd = pmd_read_atomic ( pmdp ) ;
barrier ( ) ;
if ( ! pmd_devmap ( pmd ) & & ! pmd_trans_huge ( pmd ) )
goto again ;
2017-09-08 16:11:35 -07:00
2020-03-05 15:26:33 -04:00
return hmm_vma_handle_pmd ( walk , addr , end , pfns , pmd ) ;
2017-09-08 16:11:31 -07:00
}
2018-10-30 15:04:20 -07:00
/*
2019-07-25 17:56:45 -07:00
* We have handled all the valid cases above ie either none , migration ,
2018-10-30 15:04:20 -07:00
* huge or transparent huge . At this point either it is a valid pmd
* entry pointing to pte directory or it is a bad pmd that will not
* recover .
*/
2020-03-05 15:26:33 -04:00
if ( pmd_bad ( pmd ) ) {
2020-03-27 17:00:14 -03:00
if ( hmm_range_need_fault ( hmm_vma_walk , pfns , npages , 0 ) )
2020-03-05 15:26:33 -04:00
return - EFAULT ;
2019-11-04 14:21:40 -08:00
return hmm_pfns_fill ( start , end , range , HMM_PFN_ERROR ) ;
2020-03-05 15:26:33 -04:00
}
2017-09-08 16:11:31 -07:00
ptep = pte_offset_map ( pmdp , addr ) ;
2020-03-05 15:26:33 -04:00
for ( ; addr < end ; addr + = PAGE_SIZE , ptep + + , pfns + + ) {
2018-04-10 16:28:59 -07:00
int r ;
2017-09-08 16:11:35 -07:00
2020-03-05 15:26:33 -04:00
r = hmm_vma_handle_pte ( walk , addr , end , pmdp , ptep , pfns ) ;
2018-04-10 16:28:59 -07:00
if ( r ) {
2020-02-28 15:30:37 -04:00
/* hmm_vma_handle_pte() did pte_unmap() */
2018-04-10 16:28:59 -07:00
hmm_vma_walk - > last = addr ;
return r ;
2017-09-08 16:11:35 -07:00
}
2017-09-08 16:11:31 -07:00
}
pte_unmap ( ptep - 1 ) ;
2018-04-10 16:28:59 -07:00
hmm_vma_walk - > last = addr ;
2017-09-08 16:11:31 -07:00
return 0 ;
}
2019-08-06 19:05:48 +03:00
# if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
defined ( CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD )
static inline uint64_t pud_to_hmm_pfn_flags ( struct hmm_range * range , pud_t pud )
{
if ( ! pud_present ( pud ) )
return 0 ;
return pud_write ( pud ) ? range - > flags [ HMM_PFN_VALID ] |
range - > flags [ HMM_PFN_WRITE ] :
range - > flags [ HMM_PFN_VALID ] ;
}
static int hmm_vma_walk_pud ( pud_t * pudp , unsigned long start , unsigned long end ,
struct mm_walk * walk )
2019-05-13 17:20:21 -07:00
{
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
struct hmm_range * range = hmm_vma_walk - > range ;
2020-02-03 17:35:45 -08:00
unsigned long addr = start ;
2019-05-13 17:20:21 -07:00
pud_t pud ;
2020-02-03 17:35:45 -08:00
int ret = 0 ;
spinlock_t * ptl = pud_trans_huge_lock ( pudp , walk - > vma ) ;
if ( ! ptl )
return 0 ;
/* Normally we don't want to split the huge page */
walk - > action = ACTION_CONTINUE ;
2019-05-13 17:20:21 -07:00
pud = READ_ONCE ( * pudp ) ;
2020-02-03 17:35:45 -08:00
if ( pud_none ( pud ) ) {
2020-03-02 15:26:44 -04:00
spin_unlock ( ptl ) ;
return hmm_vma_walk_hole ( start , end , - 1 , walk ) ;
2020-02-03 17:35:45 -08:00
}
2019-05-13 17:20:21 -07:00
if ( pud_huge ( pud ) & & pud_devmap ( pud ) ) {
unsigned long i , npages , pfn ;
2020-03-27 17:00:14 -03:00
unsigned int required_fault ;
2019-05-13 17:20:21 -07:00
uint64_t * pfns , cpu_flags ;
2020-02-03 17:35:45 -08:00
if ( ! pud_present ( pud ) ) {
2020-03-02 15:26:44 -04:00
spin_unlock ( ptl ) ;
return hmm_vma_walk_hole ( start , end , - 1 , walk ) ;
2020-02-03 17:35:45 -08:00
}
2019-05-13 17:20:21 -07:00
i = ( addr - range - > start ) > > PAGE_SHIFT ;
npages = ( end - addr ) > > PAGE_SHIFT ;
pfns = & range - > pfns [ i ] ;
cpu_flags = pud_to_hmm_pfn_flags ( range , pud ) ;
2020-03-27 17:00:14 -03:00
required_fault = hmm_range_need_fault ( hmm_vma_walk , pfns ,
npages , cpu_flags ) ;
if ( required_fault ) {
2020-03-02 15:26:44 -04:00
spin_unlock ( ptl ) ;
2020-03-27 17:00:14 -03:00
return hmm_vma_fault ( addr , end , required_fault , walk ) ;
2020-02-03 17:35:45 -08:00
}
2019-05-13 17:20:21 -07:00
pfn = pud_pfn ( pud ) + ( ( addr & ~ PUD_MASK ) > > PAGE_SHIFT ) ;
2020-03-27 17:00:13 -03:00
for ( i = 0 ; i < npages ; + + i , + + pfn )
2019-05-13 17:20:31 -07:00
pfns [ i ] = hmm_device_entry_from_pfn ( range , pfn ) |
cpu_flags ;
2019-05-13 17:20:21 -07:00
hmm_vma_walk - > last = end ;
2020-02-03 17:35:45 -08:00
goto out_unlock ;
2019-05-13 17:20:21 -07:00
}
2020-02-03 17:35:45 -08:00
/* Ask for the PUD to be split */
walk - > action = ACTION_SUBTREE ;
2019-05-13 17:20:21 -07:00
2020-02-03 17:35:45 -08:00
out_unlock :
spin_unlock ( ptl ) ;
return ret ;
2019-05-13 17:20:21 -07:00
}
2019-08-06 19:05:48 +03:00
# else
# define hmm_vma_walk_pud NULL
# endif
2019-05-13 17:20:21 -07:00
2019-08-06 19:05:50 +03:00
# ifdef CONFIG_HUGETLB_PAGE
2019-05-13 17:20:18 -07:00
static int hmm_vma_walk_hugetlb_entry ( pte_t * pte , unsigned long hmask ,
unsigned long start , unsigned long end ,
struct mm_walk * walk )
{
2019-08-06 19:05:46 +03:00
unsigned long addr = start , i , pfn ;
2019-05-13 17:20:18 -07:00
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
struct hmm_range * range = hmm_vma_walk - > range ;
struct vm_area_struct * vma = walk - > vma ;
uint64_t orig_pfn , cpu_flags ;
2020-03-27 17:00:14 -03:00
unsigned int required_fault ;
2019-05-13 17:20:18 -07:00
spinlock_t * ptl ;
pte_t entry ;
2019-07-25 17:56:45 -07:00
ptl = huge_pte_lock ( hstate_vma ( vma ) , walk - > mm , pte ) ;
2019-05-13 17:20:18 -07:00
entry = huge_ptep_get ( pte ) ;
2019-08-06 19:05:45 +03:00
i = ( start - range - > start ) > > PAGE_SHIFT ;
2019-05-13 17:20:18 -07:00
orig_pfn = range - > pfns [ i ] ;
cpu_flags = pte_to_hmm_pfn_flags ( range , entry ) ;
2020-03-27 17:00:14 -03:00
required_fault = hmm_pte_need_fault ( hmm_vma_walk , orig_pfn , cpu_flags ) ;
if ( required_fault ) {
2020-03-16 14:53:08 +01:00
spin_unlock ( ptl ) ;
2020-03-27 17:00:14 -03:00
return hmm_vma_fault ( addr , end , required_fault , walk ) ;
2019-05-13 17:20:18 -07:00
}
2019-08-06 19:05:46 +03:00
pfn = pte_pfn ( entry ) + ( ( start & ~ hmask ) > > PAGE_SHIFT ) ;
2019-08-06 19:05:45 +03:00
for ( ; addr < end ; addr + = PAGE_SIZE , i + + , pfn + + )
2019-05-13 17:20:31 -07:00
range - > pfns [ i ] = hmm_device_entry_from_pfn ( range , pfn ) |
cpu_flags ;
2019-05-13 17:20:18 -07:00
hmm_vma_walk - > last = end ;
spin_unlock ( ptl ) ;
2020-03-16 14:53:08 +01:00
return 0 ;
2019-05-13 17:20:18 -07:00
}
2019-08-06 19:05:50 +03:00
# else
# define hmm_vma_walk_hugetlb_entry NULL
# endif /* CONFIG_HUGETLB_PAGE */
2019-05-13 17:20:18 -07:00
2019-11-04 14:21:40 -08:00
static int hmm_vma_walk_test ( unsigned long start , unsigned long end ,
struct mm_walk * walk )
2018-04-10 16:28:54 -07:00
{
2019-11-04 14:21:40 -08:00
struct hmm_vma_walk * hmm_vma_walk = walk - > private ;
struct hmm_range * range = hmm_vma_walk - > range ;
struct vm_area_struct * vma = walk - > vma ;
2020-03-27 17:00:14 -03:00
if ( ! ( vma - > vm_flags & ( VM_IO | VM_PFNMAP | VM_MIXEDMAP ) ) & &
vma - > vm_flags & VM_READ )
return 0 ;
2019-11-04 14:21:40 -08:00
/*
2020-03-27 17:00:14 -03:00
* vma ranges that don ' t have struct page backing them or map I / O
* devices directly cannot be handled by hmm_range_fault ( ) .
2020-03-05 12:00:22 -04:00
*
2019-11-04 14:21:40 -08:00
* If the vma does not allow read access , then assume that it does not
2020-03-05 12:00:22 -04:00
* allow write access either . HMM does not support architectures that
* allow write without read .
2020-03-27 17:00:14 -03:00
*
* If a fault is requested for an unsupported range then it is a hard
* failure .
2019-11-04 14:21:40 -08:00
*/
2020-03-27 17:00:14 -03:00
if ( hmm_range_need_fault ( hmm_vma_walk ,
range - > pfns +
( ( start - range - > start ) > > PAGE_SHIFT ) ,
( end - start ) > > PAGE_SHIFT , 0 ) )
return - EFAULT ;
2019-11-04 14:21:40 -08:00
2020-03-27 17:00:14 -03:00
hmm_pfns_fill ( start , end , range , HMM_PFN_ERROR ) ;
hmm_vma_walk - > last = end ;
2019-11-04 14:21:40 -08:00
2020-03-27 17:00:14 -03:00
/* Skip this vma and continue processing the next vma. */
return 1 ;
2018-04-10 16:28:54 -07:00
}
2019-08-28 16:19:54 +02:00
static const struct mm_walk_ops hmm_walk_ops = {
. pud_entry = hmm_vma_walk_pud ,
. pmd_entry = hmm_vma_walk_pmd ,
. pte_hole = hmm_vma_walk_hole ,
. hugetlb_entry = hmm_vma_walk_hugetlb_entry ,
2019-11-04 14:21:40 -08:00
. test_walk = hmm_vma_walk_test ,
2019-08-28 16:19:54 +02:00
} ;
2019-07-25 17:56:46 -07:00
/**
* hmm_range_fault - try to fault some address in a virtual address range
2020-03-27 17:00:15 -03:00
* @ range : argument structure
2019-07-25 17:56:46 -07:00
*
* Return : the number of valid pages in range - > pfns [ ] ( from range start
* address ) , which may be zero . On error one of the following status codes
* can be returned :
2019-05-13 17:19:58 -07:00
*
2019-07-25 17:56:46 -07:00
* - EINVAL : Invalid arguments or mm or virtual address is in an invalid vma
* ( e . g . , device file vma ) .
* - ENOMEM : Out of memory .
* - EPERM : Invalid permission ( e . g . , asking for write and range is read
* only ) .
* - EBUSY : The range has been invalidated and the caller needs to wait for
* the invalidation to finish .
2020-03-27 17:00:15 -03:00
* - EFAULT : A page was requested to be valid and could not be made valid
* ie it has no backing VMA or it is illegal to access
2017-09-08 16:11:35 -07:00
*
2020-03-27 17:00:15 -03:00
* This is similar to get_user_pages ( ) , except that it can read the page tables
* without mutating them ( ie causing faults ) .
2017-09-08 16:11:35 -07:00
*/
2020-03-27 17:00:16 -03:00
long hmm_range_fault ( struct hmm_range * range )
2017-09-08 16:11:35 -07:00
{
2019-11-04 14:21:40 -08:00
struct hmm_vma_walk hmm_vma_walk = {
. range = range ,
. last = range - > start ,
} ;
2019-11-12 16:22:30 -04:00
struct mm_struct * mm = range - > notifier - > mm ;
2017-09-08 16:11:35 -07:00
int ret ;
2019-11-12 16:22:20 -04:00
lockdep_assert_held ( & mm - > mmap_sem ) ;
2019-05-13 17:19:48 -07:00
2019-05-13 17:20:01 -07:00
do {
/* If range is no longer valid force retry. */
2019-11-12 16:22:30 -04:00
if ( mmu_interval_check_retry ( range - > notifier ,
range - > notifier_seq ) )
2019-07-24 08:52:52 +02:00
return - EBUSY ;
2019-11-04 14:21:40 -08:00
ret = walk_page_range ( mm , hmm_vma_walk . last , range - > end ,
& hmm_walk_ops , & hmm_vma_walk ) ;
} while ( ret = = - EBUSY ) ;
2017-09-08 16:11:35 -07:00
2019-11-04 14:21:40 -08:00
if ( ret )
return ret ;
2019-05-13 17:19:58 -07:00
return ( hmm_vma_walk . last - range - > start ) > > PAGE_SHIFT ;
2017-09-08 16:11:35 -07:00
}
2019-05-13 17:19:58 -07:00
EXPORT_SYMBOL ( hmm_range_fault ) ;