2013-01-21 03:28:06 +04:00
/*
* Copyright ( C ) 2012 - Virtual Open Systems and Columbia University
* Author : Christoffer Dall < c . dall @ virtualopensystems . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License , version 2 , as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*/
2013-01-21 03:28:06 +04:00
# include <linux/mman.h>
# include <linux/kvm_host.h>
# include <linux/io.h>
2013-01-21 03:43:58 +04:00
# include <trace/events/kvm.h>
2013-01-21 03:28:06 +04:00
# include <asm/idmap.h>
# include <asm/pgalloc.h>
2013-01-21 03:28:12 +04:00
# include <asm/cacheflush.h>
2013-01-21 03:28:06 +04:00
# include <asm/kvm_arm.h>
# include <asm/kvm_mmu.h>
2013-01-21 03:43:58 +04:00
# include <asm/kvm_mmio.h>
2013-01-21 03:28:07 +04:00
# include <asm/kvm_asm.h>
2013-01-21 03:28:12 +04:00
# include <asm/kvm_emulate.h>
2013-01-21 03:28:06 +04:00
# include <asm/mach/map.h>
2013-01-21 03:28:07 +04:00
# include <trace/events/kvm.h>
# include "trace.h"
2013-01-21 03:28:06 +04:00
extern char __hyp_idmap_text_start [ ] , __hyp_idmap_text_end [ ] ;
static DEFINE_MUTEX ( kvm_hyp_pgd_mutex ) ;
2013-01-21 03:28:07 +04:00
static void kvm_tlb_flush_vmid ( struct kvm * kvm )
{
kvm_call_hyp ( __kvm_tlb_flush_vmid , kvm ) ;
}
2013-01-21 03:28:06 +04:00
static void kvm_set_pte ( pte_t * pte , pte_t new_pte )
{
pte_val ( * pte ) = new_pte ;
/*
* flush_pmd_entry just takes a void pointer and cleans the necessary
* cache entries , so we can reuse the function for ptes .
*/
flush_pmd_entry ( pte ) ;
}
2013-01-21 03:28:07 +04:00
static int mmu_topup_memory_cache ( struct kvm_mmu_memory_cache * cache ,
int min , int max )
{
void * page ;
BUG_ON ( max > KVM_NR_MEM_OBJS ) ;
if ( cache - > nobjs > = min )
return 0 ;
while ( cache - > nobjs < max ) {
page = ( void * ) __get_free_page ( PGALLOC_GFP ) ;
if ( ! page )
return - ENOMEM ;
cache - > objects [ cache - > nobjs + + ] = page ;
}
return 0 ;
}
static void mmu_free_memory_cache ( struct kvm_mmu_memory_cache * mc )
{
while ( mc - > nobjs )
free_page ( ( unsigned long ) mc - > objects [ - - mc - > nobjs ] ) ;
}
static void * mmu_memory_cache_alloc ( struct kvm_mmu_memory_cache * mc )
{
void * p ;
BUG_ON ( ! mc | | ! mc - > nobjs ) ;
p = mc - > objects [ - - mc - > nobjs ] ;
return p ;
}
2013-01-21 03:28:06 +04:00
static void free_ptes ( pmd_t * pmd , unsigned long addr )
{
pte_t * pte ;
unsigned int i ;
for ( i = 0 ; i < PTRS_PER_PMD ; i + + , addr + = PMD_SIZE ) {
if ( ! pmd_none ( * pmd ) & & pmd_table ( * pmd ) ) {
pte = pte_offset_kernel ( pmd , addr ) ;
pte_free_kernel ( NULL , pte ) ;
}
pmd + + ;
}
}
/**
* free_hyp_pmds - free a Hyp - mode level - 2 tables and child level - 3 tables
*
* Assumes this is a page table used strictly in Hyp - mode and therefore contains
* only mappings in the kernel memory area , which is above PAGE_OFFSET .
*/
void free_hyp_pmds ( void )
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
unsigned long addr ;
mutex_lock ( & kvm_hyp_pgd_mutex ) ;
for ( addr = PAGE_OFFSET ; addr ! = 0 ; addr + = PGDIR_SIZE ) {
pgd = hyp_pgd + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none ( * pud ) )
continue ;
BUG_ON ( pud_bad ( * pud ) ) ;
pmd = pmd_offset ( pud , addr ) ;
free_ptes ( pmd , addr ) ;
pmd_free ( NULL , pmd ) ;
pud_clear ( pud ) ;
}
mutex_unlock ( & kvm_hyp_pgd_mutex ) ;
}
static void create_hyp_pte_mappings ( pmd_t * pmd , unsigned long start ,
unsigned long end )
{
pte_t * pte ;
unsigned long addr ;
struct page * page ;
for ( addr = start & PAGE_MASK ; addr < end ; addr + = PAGE_SIZE ) {
pte = pte_offset_kernel ( pmd , addr ) ;
BUG_ON ( ! virt_addr_valid ( addr ) ) ;
page = virt_to_page ( addr ) ;
kvm_set_pte ( pte , mk_pte ( page , PAGE_HYP ) ) ;
}
}
static void create_hyp_io_pte_mappings ( pmd_t * pmd , unsigned long start ,
unsigned long end ,
unsigned long * pfn_base )
{
pte_t * pte ;
unsigned long addr ;
for ( addr = start & PAGE_MASK ; addr < end ; addr + = PAGE_SIZE ) {
pte = pte_offset_kernel ( pmd , addr ) ;
BUG_ON ( pfn_valid ( * pfn_base ) ) ;
kvm_set_pte ( pte , pfn_pte ( * pfn_base , PAGE_HYP_DEVICE ) ) ;
( * pfn_base ) + + ;
}
}
static int create_hyp_pmd_mappings ( pud_t * pud , unsigned long start ,
unsigned long end , unsigned long * pfn_base )
{
pmd_t * pmd ;
pte_t * pte ;
unsigned long addr , next ;
for ( addr = start ; addr < end ; addr = next ) {
pmd = pmd_offset ( pud , addr ) ;
BUG_ON ( pmd_sect ( * pmd ) ) ;
if ( pmd_none ( * pmd ) ) {
pte = pte_alloc_one_kernel ( NULL , addr ) ;
if ( ! pte ) {
kvm_err ( " Cannot allocate Hyp pte \n " ) ;
return - ENOMEM ;
}
pmd_populate_kernel ( NULL , pmd , pte ) ;
}
next = pmd_addr_end ( addr , end ) ;
/*
* If pfn_base is NULL , we map kernel pages into HYP with the
* virtual address . Otherwise , this is considered an I / O
* mapping and we map the physical region starting at
* * pfn_base to [ start , end [ .
*/
if ( ! pfn_base )
create_hyp_pte_mappings ( pmd , addr , next ) ;
else
create_hyp_io_pte_mappings ( pmd , addr , next , pfn_base ) ;
}
return 0 ;
}
static int __create_hyp_mappings ( void * from , void * to , unsigned long * pfn_base )
{
unsigned long start = ( unsigned long ) from ;
unsigned long end = ( unsigned long ) to ;
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
unsigned long addr , next ;
int err = 0 ;
BUG_ON ( start > end ) ;
if ( start < PAGE_OFFSET )
return - EINVAL ;
mutex_lock ( & kvm_hyp_pgd_mutex ) ;
for ( addr = start ; addr < end ; addr = next ) {
pgd = hyp_pgd + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none_or_clear_bad ( pud ) ) {
pmd = pmd_alloc_one ( NULL , addr ) ;
if ( ! pmd ) {
kvm_err ( " Cannot allocate Hyp pmd \n " ) ;
err = - ENOMEM ;
goto out ;
}
pud_populate ( NULL , pud , pmd ) ;
}
next = pgd_addr_end ( addr , end ) ;
err = create_hyp_pmd_mappings ( pud , addr , next , pfn_base ) ;
if ( err )
goto out ;
}
out :
mutex_unlock ( & kvm_hyp_pgd_mutex ) ;
return err ;
}
/**
* create_hyp_mappings - map a kernel virtual address range in Hyp mode
* @ from : The virtual kernel start address of the range
* @ to : The virtual kernel end address of the range ( exclusive )
*
* The same virtual address as the kernel virtual address is also used in
* Hyp - mode mapping to the same underlying physical pages .
*
* Note : Wrapping around zero in the " to " address is not supported .
*/
int create_hyp_mappings ( void * from , void * to )
{
return __create_hyp_mappings ( from , to , NULL ) ;
}
/**
* create_hyp_io_mappings - map a physical IO range in Hyp mode
* @ from : The virtual HYP start address of the range
* @ to : The virtual HYP end address of the range ( exclusive )
* @ addr : The physical start address which gets mapped
*/
int create_hyp_io_mappings ( void * from , void * to , phys_addr_t addr )
{
unsigned long pfn = __phys_to_pfn ( addr ) ;
return __create_hyp_mappings ( from , to , & pfn ) ;
}
2013-01-21 03:28:07 +04:00
/**
* kvm_alloc_stage2_pgd - allocate level - 1 table for stage - 2 translation .
* @ kvm : The KVM struct pointer for the VM .
*
* Allocates the 1 st level table only of size defined by S2_PGD_ORDER ( can
* support either full 40 - bit input addresses or limited to 32 - bit input
* addresses ) . Clears the allocated pages .
*
* Note we don ' t need locking here as this is only called when the VM is
* created , which can only be done once .
*/
int kvm_alloc_stage2_pgd ( struct kvm * kvm )
{
pgd_t * pgd ;
if ( kvm - > arch . pgd ! = NULL ) {
kvm_err ( " kvm_arch already initialized? \n " ) ;
return - EINVAL ;
}
pgd = ( pgd_t * ) __get_free_pages ( GFP_KERNEL , S2_PGD_ORDER ) ;
if ( ! pgd )
return - ENOMEM ;
/* stage-2 pgd must be aligned to its size */
VM_BUG_ON ( ( unsigned long ) pgd & ( S2_PGD_SIZE - 1 ) ) ;
memset ( pgd , 0 , PTRS_PER_S2_PGD * sizeof ( pgd_t ) ) ;
clean_dcache_area ( pgd , PTRS_PER_S2_PGD * sizeof ( pgd_t ) ) ;
kvm - > arch . pgd = pgd ;
return 0 ;
}
static void clear_pud_entry ( pud_t * pud )
{
pmd_t * pmd_table = pmd_offset ( pud , 0 ) ;
pud_clear ( pud ) ;
pmd_free ( NULL , pmd_table ) ;
put_page ( virt_to_page ( pud ) ) ;
}
static void clear_pmd_entry ( pmd_t * pmd )
{
pte_t * pte_table = pte_offset_kernel ( pmd , 0 ) ;
pmd_clear ( pmd ) ;
pte_free_kernel ( NULL , pte_table ) ;
put_page ( virt_to_page ( pmd ) ) ;
}
static bool pmd_empty ( pmd_t * pmd )
{
struct page * pmd_page = virt_to_page ( pmd ) ;
return page_count ( pmd_page ) = = 1 ;
}
static void clear_pte_entry ( pte_t * pte )
{
if ( pte_present ( * pte ) ) {
kvm_set_pte ( pte , __pte ( 0 ) ) ;
put_page ( virt_to_page ( pte ) ) ;
}
}
static bool pte_empty ( pte_t * pte )
{
struct page * pte_page = virt_to_page ( pte ) ;
return page_count ( pte_page ) = = 1 ;
}
/**
* unmap_stage2_range - - Clear stage2 page table entries to unmap a range
* @ kvm : The VM pointer
* @ start : The intermediate physical base address of the range to unmap
* @ size : The size of the area to unmap
*
* Clear a range of stage - 2 mappings , lowering the various ref - counts . Must
* be called while holding mmu_lock ( unless for freeing the stage2 pgd before
* destroying the VM ) , otherwise another faulting VCPU may come in and mess
* with things behind our backs .
*/
static void unmap_stage2_range ( struct kvm * kvm , phys_addr_t start , u64 size )
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * pte ;
phys_addr_t addr = start , end = start + size ;
u64 range ;
while ( addr < end ) {
pgd = kvm - > arch . pgd + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none ( * pud ) ) {
addr + = PUD_SIZE ;
continue ;
}
pmd = pmd_offset ( pud , addr ) ;
if ( pmd_none ( * pmd ) ) {
addr + = PMD_SIZE ;
continue ;
}
pte = pte_offset_kernel ( pmd , addr ) ;
clear_pte_entry ( pte ) ;
range = PAGE_SIZE ;
/* If we emptied the pte, walk back up the ladder */
if ( pte_empty ( pte ) ) {
clear_pmd_entry ( pmd ) ;
range = PMD_SIZE ;
if ( pmd_empty ( pmd ) ) {
clear_pud_entry ( pud ) ;
range = PUD_SIZE ;
}
}
addr + = range ;
}
}
/**
* kvm_free_stage2_pgd - free all stage - 2 tables
* @ kvm : The KVM struct pointer for the VM .
*
* Walks the level - 1 page table pointed to by kvm - > arch . pgd and frees all
* underlying level - 2 and level - 3 tables before freeing the actual level - 1 table
* and setting the struct pointer to NULL .
*
* Note we don ' t need locking here as this is only called when the VM is
* destroyed , which can only be done once .
*/
void kvm_free_stage2_pgd ( struct kvm * kvm )
{
if ( kvm - > arch . pgd = = NULL )
return ;
unmap_stage2_range ( kvm , 0 , KVM_PHYS_SIZE ) ;
free_pages ( ( unsigned long ) kvm - > arch . pgd , S2_PGD_ORDER ) ;
kvm - > arch . pgd = NULL ;
}
static int stage2_set_pte ( struct kvm * kvm , struct kvm_mmu_memory_cache * cache ,
phys_addr_t addr , const pte_t * new_pte , bool iomap )
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * pte , old_pte ;
/* Create 2nd stage page table mapping - Level 1 */
pgd = kvm - > arch . pgd + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none ( * pud ) ) {
if ( ! cache )
return 0 ; /* ignore calls from kvm_set_spte_hva */
pmd = mmu_memory_cache_alloc ( cache ) ;
pud_populate ( NULL , pud , pmd ) ;
pmd + = pmd_index ( addr ) ;
get_page ( virt_to_page ( pud ) ) ;
} else
pmd = pmd_offset ( pud , addr ) ;
/* Create 2nd stage page table mapping - Level 2 */
if ( pmd_none ( * pmd ) ) {
if ( ! cache )
return 0 ; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc ( cache ) ;
clean_pte_table ( pte ) ;
pmd_populate_kernel ( NULL , pmd , pte ) ;
pte + = pte_index ( addr ) ;
get_page ( virt_to_page ( pmd ) ) ;
} else
pte = pte_offset_kernel ( pmd , addr ) ;
if ( iomap & & pte_present ( * pte ) )
return - EFAULT ;
/* Create 2nd stage page table mapping - Level 3 */
old_pte = * pte ;
kvm_set_pte ( pte , * new_pte ) ;
if ( pte_present ( old_pte ) )
kvm_tlb_flush_vmid ( kvm ) ;
else
get_page ( virt_to_page ( pte ) ) ;
return 0 ;
}
/**
* kvm_phys_addr_ioremap - map a device range to guest IPA
*
* @ kvm : The KVM pointer
* @ guest_ipa : The IPA at which to insert the mapping
* @ pa : The physical address of the device
* @ size : The size of the mapping
*/
int kvm_phys_addr_ioremap ( struct kvm * kvm , phys_addr_t guest_ipa ,
phys_addr_t pa , unsigned long size )
{
phys_addr_t addr , end ;
int ret = 0 ;
unsigned long pfn ;
struct kvm_mmu_memory_cache cache = { 0 , } ;
end = ( guest_ipa + size + PAGE_SIZE - 1 ) & PAGE_MASK ;
pfn = __phys_to_pfn ( pa ) ;
for ( addr = guest_ipa ; addr < end ; addr + = PAGE_SIZE ) {
pte_t pte = pfn_pte ( pfn , PAGE_S2_DEVICE | L_PTE_S2_RDWR ) ;
ret = mmu_topup_memory_cache ( & cache , 2 , 2 ) ;
if ( ret )
goto out ;
spin_lock ( & kvm - > mmu_lock ) ;
ret = stage2_set_pte ( kvm , & cache , addr , & pte , true ) ;
spin_unlock ( & kvm - > mmu_lock ) ;
if ( ret )
goto out ;
pfn + + ;
}
out :
mmu_free_memory_cache ( & cache ) ;
return ret ;
}
2013-01-21 03:28:12 +04:00
static void coherent_icache_guest_page ( struct kvm * kvm , gfn_t gfn )
{
/*
* If we are going to insert an instruction page and the icache is
* either VIPT or PIPT , there is a potential problem where the host
* ( or another VM ) may have used the same page as this guest , and we
* read incorrect data from the icache . If we ' re using a PIPT cache ,
* we can invalidate just that page , but if we are using a VIPT cache
* we need to invalidate the entire icache - damn shame - as written
* in the ARM ARM ( DDI 0406 C . b - Page B3 - 1393 ) .
*
* VIVT caches are tagged using both the ASID and the VMID and doesn ' t
* need any kind of flushing ( DDI 0406 C . b - Page B3 - 1392 ) .
*/
if ( icache_is_pipt ( ) ) {
unsigned long hva = gfn_to_hva ( kvm , gfn ) ;
__cpuc_coherent_user_range ( hva , hva + PAGE_SIZE ) ;
} else if ( ! icache_is_vivt_asid_tagged ( ) ) {
/* any kind of VIPT cache */
__flush_icache_all ( ) ;
}
}
static int user_mem_abort ( struct kvm_vcpu * vcpu , phys_addr_t fault_ipa ,
gfn_t gfn , struct kvm_memory_slot * memslot ,
unsigned long fault_status )
{
pte_t new_pte ;
pfn_t pfn ;
int ret ;
bool write_fault , writable ;
unsigned long mmu_seq ;
struct kvm_mmu_memory_cache * memcache = & vcpu - > arch . mmu_page_cache ;
2012-09-17 22:27:09 +04:00
write_fault = kvm_is_write_fault ( kvm_vcpu_get_hsr ( vcpu ) ) ;
2013-01-21 03:28:12 +04:00
if ( fault_status = = FSC_PERM & & ! write_fault ) {
kvm_err ( " Unexpected L2 read permission error \n " ) ;
return - EFAULT ;
}
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache ( memcache , 2 , KVM_NR_MEM_OBJS ) ;
if ( ret )
return ret ;
mmu_seq = vcpu - > kvm - > mmu_notifier_seq ;
/*
* Ensure the read of mmu_notifier_seq happens before we call
* gfn_to_pfn_prot ( which calls get_user_pages ) , so that we don ' t risk
* the page we just got a reference to gets unmapped before we have a
* chance to grab the mmu_lock , which ensure that if the page gets
* unmapped afterwards , the call to kvm_unmap_hva will take it away
* from us again properly . This smp_rmb ( ) interacts with the smp_wmb ( )
* in kvm_mmu_notifier_invalidate_ < page | range_end > .
*/
smp_rmb ( ) ;
pfn = gfn_to_pfn_prot ( vcpu - > kvm , gfn , write_fault , & writable ) ;
if ( is_error_pfn ( pfn ) )
return - EFAULT ;
new_pte = pfn_pte ( pfn , PAGE_S2 ) ;
coherent_icache_guest_page ( vcpu - > kvm , gfn ) ;
spin_lock ( & vcpu - > kvm - > mmu_lock ) ;
if ( mmu_notifier_retry ( vcpu - > kvm , mmu_seq ) )
goto out_unlock ;
if ( writable ) {
pte_val ( new_pte ) | = L_PTE_S2_RDWR ;
kvm_set_pfn_dirty ( pfn ) ;
}
stage2_set_pte ( vcpu - > kvm , memcache , fault_ipa , & new_pte , false ) ;
out_unlock :
spin_unlock ( & vcpu - > kvm - > mmu_lock ) ;
kvm_release_pfn_clean ( pfn ) ;
return 0 ;
}
/**
* kvm_handle_guest_abort - handles all 2 nd stage aborts
* @ vcpu : the VCPU pointer
* @ run : the kvm_run structure
*
* Any abort that gets to the host is almost guaranteed to be caused by a
* missing second stage translation table entry , which can mean that either the
* guest simply needs more memory and we must allocate an appropriate page or it
* can mean that the guest tried to access I / O memory , which is emulated by user
* space . The distinction is based on the IPA causing the fault and whether this
* memory region has been registered as standard RAM by user space .
*/
2013-01-21 03:28:06 +04:00
int kvm_handle_guest_abort ( struct kvm_vcpu * vcpu , struct kvm_run * run )
{
2013-01-21 03:28:12 +04:00
unsigned long hsr_ec ;
unsigned long fault_status ;
phys_addr_t fault_ipa ;
struct kvm_memory_slot * memslot ;
bool is_iabt ;
gfn_t gfn ;
int ret , idx ;
2012-09-18 17:09:58 +04:00
hsr_ec = kvm_vcpu_trap_get_class ( vcpu ) ;
2013-01-21 03:28:12 +04:00
is_iabt = ( hsr_ec = = HSR_EC_IABT ) ;
2012-09-17 22:27:09 +04:00
fault_ipa = kvm_vcpu_get_fault_ipa ( vcpu ) ;
2013-01-21 03:28:12 +04:00
2012-09-17 22:27:09 +04:00
trace_kvm_guest_fault ( * vcpu_pc ( vcpu ) , kvm_vcpu_get_hsr ( vcpu ) ,
kvm_vcpu_get_hfar ( vcpu ) , fault_ipa ) ;
2013-01-21 03:28:12 +04:00
/* Check the stage-2 fault is trans. fault or write fault */
2012-09-18 17:14:35 +04:00
fault_status = kvm_vcpu_trap_get_fault ( vcpu ) ;
2013-01-21 03:28:12 +04:00
if ( fault_status ! = FSC_FAULT & & fault_status ! = FSC_PERM ) {
kvm_err ( " Unsupported fault status: EC=%#lx DFCS=%#lx \n " ,
hsr_ec , fault_status ) ;
return - EFAULT ;
}
idx = srcu_read_lock ( & vcpu - > kvm - > srcu ) ;
gfn = fault_ipa > > PAGE_SHIFT ;
if ( ! kvm_is_visible_gfn ( vcpu - > kvm , gfn ) ) {
if ( is_iabt ) {
/* Prefetch Abort on I/O address */
2012-09-17 22:27:09 +04:00
kvm_inject_pabt ( vcpu , kvm_vcpu_get_hfar ( vcpu ) ) ;
2013-01-21 03:28:12 +04:00
ret = 1 ;
goto out_unlock ;
}
if ( fault_status ! = FSC_FAULT ) {
kvm_err ( " Unsupported fault status on io memory: %#lx \n " ,
fault_status ) ;
ret = - EFAULT ;
goto out_unlock ;
}
2013-01-21 03:43:58 +04:00
/* Adjust page offset */
2012-09-17 22:27:09 +04:00
fault_ipa | = kvm_vcpu_get_hfar ( vcpu ) & ~ PAGE_MASK ;
2013-01-21 03:43:58 +04:00
ret = io_mem_abort ( vcpu , run , fault_ipa ) ;
2013-01-21 03:28:12 +04:00
goto out_unlock ;
}
memslot = gfn_to_memslot ( vcpu - > kvm , gfn ) ;
ret = user_mem_abort ( vcpu , fault_ipa , gfn , memslot , fault_status ) ;
if ( ret = = 0 )
ret = 1 ;
out_unlock :
srcu_read_unlock ( & vcpu - > kvm - > srcu , idx ) ;
return ret ;
2013-01-21 03:28:06 +04:00
}
2013-01-21 03:28:07 +04:00
static void handle_hva_to_gpa ( struct kvm * kvm ,
unsigned long start ,
unsigned long end ,
void ( * handler ) ( struct kvm * kvm ,
gpa_t gpa , void * data ) ,
void * data )
{
struct kvm_memslots * slots ;
struct kvm_memory_slot * memslot ;
slots = kvm_memslots ( kvm ) ;
/* we only care about the pages that the guest sees */
kvm_for_each_memslot ( memslot , slots ) {
unsigned long hva_start , hva_end ;
gfn_t gfn , gfn_end ;
hva_start = max ( start , memslot - > userspace_addr ) ;
hva_end = min ( end , memslot - > userspace_addr +
( memslot - > npages < < PAGE_SHIFT ) ) ;
if ( hva_start > = hva_end )
continue ;
/*
* { gfn ( page ) | page intersects with [ hva_start , hva_end ) } =
* { gfn_start , gfn_start + 1 , . . . , gfn_end - 1 } .
*/
gfn = hva_to_gfn_memslot ( hva_start , memslot ) ;
gfn_end = hva_to_gfn_memslot ( hva_end + PAGE_SIZE - 1 , memslot ) ;
for ( ; gfn < gfn_end ; + + gfn ) {
gpa_t gpa = gfn < < PAGE_SHIFT ;
handler ( kvm , gpa , data ) ;
}
}
}
static void kvm_unmap_hva_handler ( struct kvm * kvm , gpa_t gpa , void * data )
{
unmap_stage2_range ( kvm , gpa , PAGE_SIZE ) ;
kvm_tlb_flush_vmid ( kvm ) ;
}
int kvm_unmap_hva ( struct kvm * kvm , unsigned long hva )
{
unsigned long end = hva + PAGE_SIZE ;
if ( ! kvm - > arch . pgd )
return 0 ;
trace_kvm_unmap_hva ( hva ) ;
handle_hva_to_gpa ( kvm , hva , end , & kvm_unmap_hva_handler , NULL ) ;
return 0 ;
}
int kvm_unmap_hva_range ( struct kvm * kvm ,
unsigned long start , unsigned long end )
{
if ( ! kvm - > arch . pgd )
return 0 ;
trace_kvm_unmap_hva_range ( start , end ) ;
handle_hva_to_gpa ( kvm , start , end , & kvm_unmap_hva_handler , NULL ) ;
return 0 ;
}
static void kvm_set_spte_handler ( struct kvm * kvm , gpa_t gpa , void * data )
{
pte_t * pte = ( pte_t * ) data ;
stage2_set_pte ( kvm , NULL , gpa , pte , false ) ;
}
void kvm_set_spte_hva ( struct kvm * kvm , unsigned long hva , pte_t pte )
{
unsigned long end = hva + PAGE_SIZE ;
pte_t stage2_pte ;
if ( ! kvm - > arch . pgd )
return ;
trace_kvm_set_spte_hva ( hva ) ;
stage2_pte = pfn_pte ( pte_pfn ( pte ) , PAGE_S2 ) ;
handle_hva_to_gpa ( kvm , hva , end , & kvm_set_spte_handler , & stage2_pte ) ;
}
void kvm_mmu_free_memory_caches ( struct kvm_vcpu * vcpu )
{
mmu_free_memory_cache ( & vcpu - > arch . mmu_page_cache ) ;
}
2013-01-21 03:28:06 +04:00
phys_addr_t kvm_mmu_get_httbr ( void )
{
VM_BUG_ON ( ! virt_addr_valid ( hyp_pgd ) ) ;
return virt_to_phys ( hyp_pgd ) ;
}
int kvm_mmu_init ( void )
{
2013-01-21 03:28:07 +04:00
if ( ! hyp_pgd ) {
kvm_err ( " Hyp mode PGD not allocated \n " ) ;
return - ENOMEM ;
}
return 0 ;
2013-01-21 03:28:06 +04:00
}
/**
* kvm_clear_idmap - remove all idmaps from the hyp pgd
*
* Free the underlying pmds for all pgds in range and clear the pgds ( but
* don ' t free them ) afterwards .
*/
void kvm_clear_hyp_idmap ( void )
{
unsigned long addr , end ;
unsigned long next ;
pgd_t * pgd = hyp_pgd ;
pud_t * pud ;
pmd_t * pmd ;
addr = virt_to_phys ( __hyp_idmap_text_start ) ;
end = virt_to_phys ( __hyp_idmap_text_end ) ;
pgd + = pgd_index ( addr ) ;
do {
next = pgd_addr_end ( addr , end ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
continue ;
pud = pud_offset ( pgd , addr ) ;
pmd = pmd_offset ( pud , addr ) ;
pud_clear ( pud ) ;
clean_pmd_entry ( pmd ) ;
pmd_free ( NULL , ( pmd_t * ) ( ( unsigned long ) pmd & PAGE_MASK ) ) ;
} while ( pgd + + , addr = next , addr < end ) ;
}