2013-01-21 03:28:06 +04:00
/*
* Copyright ( C ) 2012 - Virtual Open Systems and Columbia University
* Author : Christoffer Dall < c . dall @ virtualopensystems . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License , version 2 , as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*/
2013-01-21 03:28:06 +04:00
# include <linux/mman.h>
# include <linux/kvm_host.h>
# include <linux/io.h>
2013-01-21 03:43:58 +04:00
# include <trace/events/kvm.h>
2013-01-21 03:28:06 +04:00
# include <asm/pgalloc.h>
2013-01-21 03:28:12 +04:00
# include <asm/cacheflush.h>
2013-01-21 03:28:06 +04:00
# include <asm/kvm_arm.h>
# include <asm/kvm_mmu.h>
2013-01-21 03:43:58 +04:00
# include <asm/kvm_mmio.h>
2013-01-21 03:28:07 +04:00
# include <asm/kvm_asm.h>
2013-01-21 03:28:12 +04:00
# include <asm/kvm_emulate.h>
2013-01-21 03:28:07 +04:00
# include "trace.h"
2013-01-21 03:28:06 +04:00
extern char __hyp_idmap_text_start [ ] , __hyp_idmap_text_end [ ] ;
2013-04-12 22:12:03 +04:00
static pgd_t * hyp_pgd ;
2013-01-21 03:28:06 +04:00
static DEFINE_MUTEX ( kvm_hyp_pgd_mutex ) ;
2013-01-28 19:27:00 +04:00
static void kvm_tlb_flush_vmid_ipa ( struct kvm * kvm , phys_addr_t ipa )
2013-01-21 03:28:07 +04:00
{
2013-01-28 19:27:00 +04:00
kvm_call_hyp ( __kvm_tlb_flush_vmid_ipa , kvm , ipa ) ;
2013-01-21 03:28:07 +04:00
}
static int mmu_topup_memory_cache ( struct kvm_mmu_memory_cache * cache ,
int min , int max )
{
void * page ;
BUG_ON ( max > KVM_NR_MEM_OBJS ) ;
if ( cache - > nobjs > = min )
return 0 ;
while ( cache - > nobjs < max ) {
page = ( void * ) __get_free_page ( PGALLOC_GFP ) ;
if ( ! page )
return - ENOMEM ;
cache - > objects [ cache - > nobjs + + ] = page ;
}
return 0 ;
}
static void mmu_free_memory_cache ( struct kvm_mmu_memory_cache * mc )
{
while ( mc - > nobjs )
free_page ( ( unsigned long ) mc - > objects [ - - mc - > nobjs ] ) ;
}
static void * mmu_memory_cache_alloc ( struct kvm_mmu_memory_cache * mc )
{
void * p ;
BUG_ON ( ! mc | | ! mc - > nobjs ) ;
p = mc - > objects [ - - mc - > nobjs ] ;
return p ;
}
2013-04-12 22:12:05 +04:00
static void clear_pud_entry ( pud_t * pud )
2013-01-21 03:28:06 +04:00
{
2013-04-12 22:12:05 +04:00
pmd_t * pmd_table = pmd_offset ( pud , 0 ) ;
pud_clear ( pud ) ;
pmd_free ( NULL , pmd_table ) ;
put_page ( virt_to_page ( pud ) ) ;
}
2013-01-21 03:28:06 +04:00
2013-04-12 22:12:05 +04:00
static void clear_pmd_entry ( pmd_t * pmd )
{
pte_t * pte_table = pte_offset_kernel ( pmd , 0 ) ;
pmd_clear ( pmd ) ;
pte_free_kernel ( NULL , pte_table ) ;
put_page ( virt_to_page ( pmd ) ) ;
}
static bool pmd_empty ( pmd_t * pmd )
{
struct page * pmd_page = virt_to_page ( pmd ) ;
return page_count ( pmd_page ) = = 1 ;
}
static void clear_pte_entry ( pte_t * pte )
{
if ( pte_present ( * pte ) ) {
kvm_set_pte ( pte , __pte ( 0 ) ) ;
put_page ( virt_to_page ( pte ) ) ;
2013-01-21 03:28:06 +04:00
}
}
2013-04-12 22:12:05 +04:00
static bool pte_empty ( pte_t * pte )
{
struct page * pte_page = virt_to_page ( pte ) ;
return page_count ( pte_page ) = = 1 ;
}
static void unmap_range ( pgd_t * pgdp , unsigned long long start , u64 size )
2013-03-05 06:43:17 +04:00
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
2013-04-12 22:12:05 +04:00
pte_t * pte ;
unsigned long long addr = start , end = start + size ;
u64 range ;
2013-03-05 06:43:17 +04:00
2013-04-12 22:12:05 +04:00
while ( addr < end ) {
pgd = pgdp + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none ( * pud ) ) {
addr + = PUD_SIZE ;
continue ;
}
2013-03-05 06:43:17 +04:00
2013-04-12 22:12:05 +04:00
pmd = pmd_offset ( pud , addr ) ;
if ( pmd_none ( * pmd ) ) {
addr + = PMD_SIZE ;
continue ;
}
2013-03-05 06:43:17 +04:00
2013-04-12 22:12:05 +04:00
pte = pte_offset_kernel ( pmd , addr ) ;
clear_pte_entry ( pte ) ;
range = PAGE_SIZE ;
/* If we emptied the pte, walk back up the ladder */
if ( pte_empty ( pte ) ) {
clear_pmd_entry ( pmd ) ;
range = PMD_SIZE ;
if ( pmd_empty ( pmd ) ) {
clear_pud_entry ( pud ) ;
range = PUD_SIZE ;
}
}
addr + = range ;
}
2013-03-05 06:43:17 +04:00
}
2013-01-21 03:28:06 +04:00
/**
2013-04-12 22:12:05 +04:00
* free_hyp_pgds - free Hyp - mode page tables
2013-01-21 03:28:06 +04:00
*
2013-04-12 22:12:05 +04:00
* Assumes hyp_pgd is a page table used strictly in Hyp - mode and therefore contains
2013-03-05 06:43:17 +04:00
* either mappings in the kernel memory area ( above PAGE_OFFSET ) , or
* device mappings in the vmalloc range ( from VMALLOC_START to VMALLOC_END ) .
2013-01-21 03:28:06 +04:00
*/
2013-04-12 22:12:05 +04:00
void free_hyp_pgds ( void )
2013-01-21 03:28:06 +04:00
{
unsigned long addr ;
mutex_lock ( & kvm_hyp_pgd_mutex ) ;
2013-04-12 22:12:05 +04:00
if ( hyp_pgd ) {
for ( addr = PAGE_OFFSET ; virt_addr_valid ( addr ) ; addr + = PGDIR_SIZE )
unmap_range ( hyp_pgd , KERN_TO_HYP ( addr ) , PGDIR_SIZE ) ;
for ( addr = VMALLOC_START ; is_vmalloc_addr ( ( void * ) addr ) ; addr + = PGDIR_SIZE )
unmap_range ( hyp_pgd , KERN_TO_HYP ( addr ) , PGDIR_SIZE ) ;
kfree ( hyp_pgd ) ;
}
2013-01-21 03:28:06 +04:00
mutex_unlock ( & kvm_hyp_pgd_mutex ) ;
}
static void create_hyp_pte_mappings ( pmd_t * pmd , unsigned long start ,
2013-04-12 22:12:01 +04:00
unsigned long end , unsigned long pfn ,
pgprot_t prot )
2013-01-21 03:28:06 +04:00
{
pte_t * pte ;
unsigned long addr ;
2013-04-12 22:12:02 +04:00
addr = start ;
do {
2013-04-12 22:12:01 +04:00
pte = pte_offset_kernel ( pmd , addr ) ;
kvm_set_pte ( pte , pfn_pte ( pfn , prot ) ) ;
2013-04-12 22:12:05 +04:00
get_page ( virt_to_page ( pte ) ) ;
2013-04-12 22:12:01 +04:00
pfn + + ;
2013-04-12 22:12:02 +04:00
} while ( addr + = PAGE_SIZE , addr ! = end ) ;
2013-01-21 03:28:06 +04:00
}
static int create_hyp_pmd_mappings ( pud_t * pud , unsigned long start ,
2013-04-12 22:12:01 +04:00
unsigned long end , unsigned long pfn ,
pgprot_t prot )
2013-01-21 03:28:06 +04:00
{
pmd_t * pmd ;
pte_t * pte ;
unsigned long addr , next ;
2013-04-12 22:12:02 +04:00
addr = start ;
do {
2013-04-12 22:12:01 +04:00
pmd = pmd_offset ( pud , addr ) ;
2013-01-21 03:28:06 +04:00
BUG_ON ( pmd_sect ( * pmd ) ) ;
if ( pmd_none ( * pmd ) ) {
2013-04-12 22:12:01 +04:00
pte = pte_alloc_one_kernel ( NULL , addr ) ;
2013-01-21 03:28:06 +04:00
if ( ! pte ) {
kvm_err ( " Cannot allocate Hyp pte \n " ) ;
return - ENOMEM ;
}
pmd_populate_kernel ( NULL , pmd , pte ) ;
2013-04-12 22:12:05 +04:00
get_page ( virt_to_page ( pmd ) ) ;
2013-01-21 03:28:06 +04:00
}
next = pmd_addr_end ( addr , end ) ;
2013-04-12 22:12:01 +04:00
create_hyp_pte_mappings ( pmd , addr , next , pfn , prot ) ;
pfn + = ( next - addr ) > > PAGE_SHIFT ;
2013-04-12 22:12:02 +04:00
} while ( addr = next , addr ! = end ) ;
2013-01-21 03:28:06 +04:00
return 0 ;
}
2013-04-12 22:12:01 +04:00
static int __create_hyp_mappings ( pgd_t * pgdp ,
unsigned long start , unsigned long end ,
unsigned long pfn , pgprot_t prot )
2013-01-21 03:28:06 +04:00
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
unsigned long addr , next ;
int err = 0 ;
mutex_lock ( & kvm_hyp_pgd_mutex ) ;
2013-04-12 22:12:02 +04:00
addr = start & PAGE_MASK ;
end = PAGE_ALIGN ( end ) ;
do {
2013-04-12 22:12:01 +04:00
pgd = pgdp + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
2013-01-21 03:28:06 +04:00
if ( pud_none_or_clear_bad ( pud ) ) {
2013-04-12 22:12:01 +04:00
pmd = pmd_alloc_one ( NULL , addr ) ;
2013-01-21 03:28:06 +04:00
if ( ! pmd ) {
kvm_err ( " Cannot allocate Hyp pmd \n " ) ;
err = - ENOMEM ;
goto out ;
}
pud_populate ( NULL , pud , pmd ) ;
2013-04-12 22:12:05 +04:00
get_page ( virt_to_page ( pud ) ) ;
2013-01-21 03:28:06 +04:00
}
next = pgd_addr_end ( addr , end ) ;
2013-04-12 22:12:01 +04:00
err = create_hyp_pmd_mappings ( pud , addr , next , pfn , prot ) ;
2013-01-21 03:28:06 +04:00
if ( err )
goto out ;
2013-04-12 22:12:01 +04:00
pfn + = ( next - addr ) > > PAGE_SHIFT ;
2013-04-12 22:12:02 +04:00
} while ( addr = next , addr ! = end ) ;
2013-01-21 03:28:06 +04:00
out :
mutex_unlock ( & kvm_hyp_pgd_mutex ) ;
return err ;
}
/**
2012-10-28 04:09:14 +04:00
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
2013-01-21 03:28:06 +04:00
* @ from : The virtual kernel start address of the range
* @ to : The virtual kernel end address of the range ( exclusive )
*
2012-10-28 04:09:14 +04:00
* The same virtual address as the kernel virtual address is also used
* in Hyp - mode mapping ( modulo HYP_PAGE_OFFSET ) to the same underlying
* physical pages .
2013-01-21 03:28:06 +04:00
*/
int create_hyp_mappings ( void * from , void * to )
{
2013-04-12 22:12:01 +04:00
unsigned long phys_addr = virt_to_phys ( from ) ;
unsigned long start = KERN_TO_HYP ( ( unsigned long ) from ) ;
unsigned long end = KERN_TO_HYP ( ( unsigned long ) to ) ;
/* Check for a valid kernel memory mapping */
if ( ! virt_addr_valid ( from ) | | ! virt_addr_valid ( to - 1 ) )
return - EINVAL ;
return __create_hyp_mappings ( hyp_pgd , start , end ,
__phys_to_pfn ( phys_addr ) , PAGE_HYP ) ;
2013-01-21 03:28:06 +04:00
}
/**
2012-10-28 04:09:14 +04:00
* create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
* @ from : The kernel start VA of the range
* @ to : The kernel end VA of the range ( exclusive )
2013-04-12 22:12:01 +04:00
* @ phys_addr : The physical start address which gets mapped
2012-10-28 04:09:14 +04:00
*
* The resulting HYP VA is the same as the kernel VA , modulo
* HYP_PAGE_OFFSET .
2013-01-21 03:28:06 +04:00
*/
2013-04-12 22:12:01 +04:00
int create_hyp_io_mappings ( void * from , void * to , phys_addr_t phys_addr )
2013-01-21 03:28:06 +04:00
{
2013-04-12 22:12:01 +04:00
unsigned long start = KERN_TO_HYP ( ( unsigned long ) from ) ;
unsigned long end = KERN_TO_HYP ( ( unsigned long ) to ) ;
/* Check for a valid kernel IO mapping */
if ( ! is_vmalloc_addr ( from ) | | ! is_vmalloc_addr ( to - 1 ) )
return - EINVAL ;
return __create_hyp_mappings ( hyp_pgd , start , end ,
__phys_to_pfn ( phys_addr ) , PAGE_HYP_DEVICE ) ;
2013-01-21 03:28:06 +04:00
}
2013-01-21 03:28:07 +04:00
/**
* kvm_alloc_stage2_pgd - allocate level - 1 table for stage - 2 translation .
* @ kvm : The KVM struct pointer for the VM .
*
* Allocates the 1 st level table only of size defined by S2_PGD_ORDER ( can
* support either full 40 - bit input addresses or limited to 32 - bit input
* addresses ) . Clears the allocated pages .
*
* Note we don ' t need locking here as this is only called when the VM is
* created , which can only be done once .
*/
int kvm_alloc_stage2_pgd ( struct kvm * kvm )
{
pgd_t * pgd ;
if ( kvm - > arch . pgd ! = NULL ) {
kvm_err ( " kvm_arch already initialized? \n " ) ;
return - EINVAL ;
}
pgd = ( pgd_t * ) __get_free_pages ( GFP_KERNEL , S2_PGD_ORDER ) ;
if ( ! pgd )
return - ENOMEM ;
/* stage-2 pgd must be aligned to its size */
VM_BUG_ON ( ( unsigned long ) pgd & ( S2_PGD_SIZE - 1 ) ) ;
memset ( pgd , 0 , PTRS_PER_S2_PGD * sizeof ( pgd_t ) ) ;
2012-10-15 14:27:37 +04:00
kvm_clean_pgd ( pgd ) ;
2013-01-21 03:28:07 +04:00
kvm - > arch . pgd = pgd ;
return 0 ;
}
/**
* unmap_stage2_range - - Clear stage2 page table entries to unmap a range
* @ kvm : The VM pointer
* @ start : The intermediate physical base address of the range to unmap
* @ size : The size of the area to unmap
*
* Clear a range of stage - 2 mappings , lowering the various ref - counts . Must
* be called while holding mmu_lock ( unless for freeing the stage2 pgd before
* destroying the VM ) , otherwise another faulting VCPU may come in and mess
* with things behind our backs .
*/
static void unmap_stage2_range ( struct kvm * kvm , phys_addr_t start , u64 size )
{
2013-04-12 22:12:05 +04:00
unmap_range ( kvm - > arch . pgd , start , size ) ;
2013-01-21 03:28:07 +04:00
}
/**
* kvm_free_stage2_pgd - free all stage - 2 tables
* @ kvm : The KVM struct pointer for the VM .
*
* Walks the level - 1 page table pointed to by kvm - > arch . pgd and frees all
* underlying level - 2 and level - 3 tables before freeing the actual level - 1 table
* and setting the struct pointer to NULL .
*
* Note we don ' t need locking here as this is only called when the VM is
* destroyed , which can only be done once .
*/
void kvm_free_stage2_pgd ( struct kvm * kvm )
{
if ( kvm - > arch . pgd = = NULL )
return ;
unmap_stage2_range ( kvm , 0 , KVM_PHYS_SIZE ) ;
free_pages ( ( unsigned long ) kvm - > arch . pgd , S2_PGD_ORDER ) ;
kvm - > arch . pgd = NULL ;
}
static int stage2_set_pte ( struct kvm * kvm , struct kvm_mmu_memory_cache * cache ,
phys_addr_t addr , const pte_t * new_pte , bool iomap )
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * pte , old_pte ;
/* Create 2nd stage page table mapping - Level 1 */
pgd = kvm - > arch . pgd + pgd_index ( addr ) ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none ( * pud ) ) {
if ( ! cache )
return 0 ; /* ignore calls from kvm_set_spte_hva */
pmd = mmu_memory_cache_alloc ( cache ) ;
pud_populate ( NULL , pud , pmd ) ;
get_page ( virt_to_page ( pud ) ) ;
2012-10-15 14:27:37 +04:00
}
pmd = pmd_offset ( pud , addr ) ;
2013-01-21 03:28:07 +04:00
/* Create 2nd stage page table mapping - Level 2 */
if ( pmd_none ( * pmd ) ) {
if ( ! cache )
return 0 ; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc ( cache ) ;
2012-10-15 14:27:37 +04:00
kvm_clean_pte ( pte ) ;
2013-01-21 03:28:07 +04:00
pmd_populate_kernel ( NULL , pmd , pte ) ;
get_page ( virt_to_page ( pmd ) ) ;
2012-10-15 14:27:37 +04:00
}
pte = pte_offset_kernel ( pmd , addr ) ;
2013-01-21 03:28:07 +04:00
if ( iomap & & pte_present ( * pte ) )
return - EFAULT ;
/* Create 2nd stage page table mapping - Level 3 */
old_pte = * pte ;
kvm_set_pte ( pte , * new_pte ) ;
if ( pte_present ( old_pte ) )
2013-01-28 19:27:00 +04:00
kvm_tlb_flush_vmid_ipa ( kvm , addr ) ;
2013-01-21 03:28:07 +04:00
else
get_page ( virt_to_page ( pte ) ) ;
return 0 ;
}
/**
* kvm_phys_addr_ioremap - map a device range to guest IPA
*
* @ kvm : The KVM pointer
* @ guest_ipa : The IPA at which to insert the mapping
* @ pa : The physical address of the device
* @ size : The size of the mapping
*/
int kvm_phys_addr_ioremap ( struct kvm * kvm , phys_addr_t guest_ipa ,
phys_addr_t pa , unsigned long size )
{
phys_addr_t addr , end ;
int ret = 0 ;
unsigned long pfn ;
struct kvm_mmu_memory_cache cache = { 0 , } ;
end = ( guest_ipa + size + PAGE_SIZE - 1 ) & PAGE_MASK ;
pfn = __phys_to_pfn ( pa ) ;
for ( addr = guest_ipa ; addr < end ; addr + = PAGE_SIZE ) {
2012-10-15 14:27:37 +04:00
pte_t pte = pfn_pte ( pfn , PAGE_S2_DEVICE ) ;
kvm_set_s2pte_writable ( & pte ) ;
2013-01-21 03:28:07 +04:00
ret = mmu_topup_memory_cache ( & cache , 2 , 2 ) ;
if ( ret )
goto out ;
spin_lock ( & kvm - > mmu_lock ) ;
ret = stage2_set_pte ( kvm , & cache , addr , & pte , true ) ;
spin_unlock ( & kvm - > mmu_lock ) ;
if ( ret )
goto out ;
pfn + + ;
}
out :
mmu_free_memory_cache ( & cache ) ;
return ret ;
}
2013-01-21 03:28:12 +04:00
static int user_mem_abort ( struct kvm_vcpu * vcpu , phys_addr_t fault_ipa ,
gfn_t gfn , struct kvm_memory_slot * memslot ,
unsigned long fault_status )
{
pte_t new_pte ;
pfn_t pfn ;
int ret ;
bool write_fault , writable ;
unsigned long mmu_seq ;
struct kvm_mmu_memory_cache * memcache = & vcpu - > arch . mmu_page_cache ;
2012-09-17 22:27:09 +04:00
write_fault = kvm_is_write_fault ( kvm_vcpu_get_hsr ( vcpu ) ) ;
2013-01-21 03:28:12 +04:00
if ( fault_status = = FSC_PERM & & ! write_fault ) {
kvm_err ( " Unexpected L2 read permission error \n " ) ;
return - EFAULT ;
}
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache ( memcache , 2 , KVM_NR_MEM_OBJS ) ;
if ( ret )
return ret ;
mmu_seq = vcpu - > kvm - > mmu_notifier_seq ;
/*
* Ensure the read of mmu_notifier_seq happens before we call
* gfn_to_pfn_prot ( which calls get_user_pages ) , so that we don ' t risk
* the page we just got a reference to gets unmapped before we have a
* chance to grab the mmu_lock , which ensure that if the page gets
* unmapped afterwards , the call to kvm_unmap_hva will take it away
* from us again properly . This smp_rmb ( ) interacts with the smp_wmb ( )
* in kvm_mmu_notifier_invalidate_ < page | range_end > .
*/
smp_rmb ( ) ;
pfn = gfn_to_pfn_prot ( vcpu - > kvm , gfn , write_fault , & writable ) ;
if ( is_error_pfn ( pfn ) )
return - EFAULT ;
new_pte = pfn_pte ( pfn , PAGE_S2 ) ;
coherent_icache_guest_page ( vcpu - > kvm , gfn ) ;
spin_lock ( & vcpu - > kvm - > mmu_lock ) ;
if ( mmu_notifier_retry ( vcpu - > kvm , mmu_seq ) )
goto out_unlock ;
if ( writable ) {
2012-10-15 14:27:37 +04:00
kvm_set_s2pte_writable ( & new_pte ) ;
2013-01-21 03:28:12 +04:00
kvm_set_pfn_dirty ( pfn ) ;
}
stage2_set_pte ( vcpu - > kvm , memcache , fault_ipa , & new_pte , false ) ;
out_unlock :
spin_unlock ( & vcpu - > kvm - > mmu_lock ) ;
kvm_release_pfn_clean ( pfn ) ;
return 0 ;
}
/**
* kvm_handle_guest_abort - handles all 2 nd stage aborts
* @ vcpu : the VCPU pointer
* @ run : the kvm_run structure
*
* Any abort that gets to the host is almost guaranteed to be caused by a
* missing second stage translation table entry , which can mean that either the
* guest simply needs more memory and we must allocate an appropriate page or it
* can mean that the guest tried to access I / O memory , which is emulated by user
* space . The distinction is based on the IPA causing the fault and whether this
* memory region has been registered as standard RAM by user space .
*/
2013-01-21 03:28:06 +04:00
int kvm_handle_guest_abort ( struct kvm_vcpu * vcpu , struct kvm_run * run )
{
2013-01-21 03:28:12 +04:00
unsigned long fault_status ;
phys_addr_t fault_ipa ;
struct kvm_memory_slot * memslot ;
bool is_iabt ;
gfn_t gfn ;
int ret , idx ;
2012-10-15 13:33:38 +04:00
is_iabt = kvm_vcpu_trap_is_iabt ( vcpu ) ;
2012-09-17 22:27:09 +04:00
fault_ipa = kvm_vcpu_get_fault_ipa ( vcpu ) ;
2013-01-21 03:28:12 +04:00
2012-09-17 22:27:09 +04:00
trace_kvm_guest_fault ( * vcpu_pc ( vcpu ) , kvm_vcpu_get_hsr ( vcpu ) ,
kvm_vcpu_get_hfar ( vcpu ) , fault_ipa ) ;
2013-01-21 03:28:12 +04:00
/* Check the stage-2 fault is trans. fault or write fault */
2012-09-18 17:14:35 +04:00
fault_status = kvm_vcpu_trap_get_fault ( vcpu ) ;
2013-01-21 03:28:12 +04:00
if ( fault_status ! = FSC_FAULT & & fault_status ! = FSC_PERM ) {
2012-10-15 13:33:38 +04:00
kvm_err ( " Unsupported fault status: EC=%#x DFCS=%#lx \n " ,
kvm_vcpu_trap_get_class ( vcpu ) , fault_status ) ;
2013-01-21 03:28:12 +04:00
return - EFAULT ;
}
idx = srcu_read_lock ( & vcpu - > kvm - > srcu ) ;
gfn = fault_ipa > > PAGE_SHIFT ;
if ( ! kvm_is_visible_gfn ( vcpu - > kvm , gfn ) ) {
if ( is_iabt ) {
/* Prefetch Abort on I/O address */
2012-09-17 22:27:09 +04:00
kvm_inject_pabt ( vcpu , kvm_vcpu_get_hfar ( vcpu ) ) ;
2013-01-21 03:28:12 +04:00
ret = 1 ;
goto out_unlock ;
}
if ( fault_status ! = FSC_FAULT ) {
kvm_err ( " Unsupported fault status on io memory: %#lx \n " ,
fault_status ) ;
ret = - EFAULT ;
goto out_unlock ;
}
2012-12-12 18:42:09 +04:00
/*
* The IPA is reported as [ MAX : 12 ] , so we need to
* complement it with the bottom 12 bits from the
* faulting VA . This is always 12 bits , irrespective
* of the page size .
*/
fault_ipa | = kvm_vcpu_get_hfar ( vcpu ) & ( ( 1 < < 12 ) - 1 ) ;
2013-01-21 03:43:58 +04:00
ret = io_mem_abort ( vcpu , run , fault_ipa ) ;
2013-01-21 03:28:12 +04:00
goto out_unlock ;
}
memslot = gfn_to_memslot ( vcpu - > kvm , gfn ) ;
ret = user_mem_abort ( vcpu , fault_ipa , gfn , memslot , fault_status ) ;
if ( ret = = 0 )
ret = 1 ;
out_unlock :
srcu_read_unlock ( & vcpu - > kvm - > srcu , idx ) ;
return ret ;
2013-01-21 03:28:06 +04:00
}
2013-01-21 03:28:07 +04:00
static void handle_hva_to_gpa ( struct kvm * kvm ,
unsigned long start ,
unsigned long end ,
void ( * handler ) ( struct kvm * kvm ,
gpa_t gpa , void * data ) ,
void * data )
{
struct kvm_memslots * slots ;
struct kvm_memory_slot * memslot ;
slots = kvm_memslots ( kvm ) ;
/* we only care about the pages that the guest sees */
kvm_for_each_memslot ( memslot , slots ) {
unsigned long hva_start , hva_end ;
gfn_t gfn , gfn_end ;
hva_start = max ( start , memslot - > userspace_addr ) ;
hva_end = min ( end , memslot - > userspace_addr +
( memslot - > npages < < PAGE_SHIFT ) ) ;
if ( hva_start > = hva_end )
continue ;
/*
* { gfn ( page ) | page intersects with [ hva_start , hva_end ) } =
* { gfn_start , gfn_start + 1 , . . . , gfn_end - 1 } .
*/
gfn = hva_to_gfn_memslot ( hva_start , memslot ) ;
gfn_end = hva_to_gfn_memslot ( hva_end + PAGE_SIZE - 1 , memslot ) ;
for ( ; gfn < gfn_end ; + + gfn ) {
gpa_t gpa = gfn < < PAGE_SHIFT ;
handler ( kvm , gpa , data ) ;
}
}
}
static void kvm_unmap_hva_handler ( struct kvm * kvm , gpa_t gpa , void * data )
{
unmap_stage2_range ( kvm , gpa , PAGE_SIZE ) ;
2013-01-28 19:27:00 +04:00
kvm_tlb_flush_vmid_ipa ( kvm , gpa ) ;
2013-01-21 03:28:07 +04:00
}
int kvm_unmap_hva ( struct kvm * kvm , unsigned long hva )
{
unsigned long end = hva + PAGE_SIZE ;
if ( ! kvm - > arch . pgd )
return 0 ;
trace_kvm_unmap_hva ( hva ) ;
handle_hva_to_gpa ( kvm , hva , end , & kvm_unmap_hva_handler , NULL ) ;
return 0 ;
}
int kvm_unmap_hva_range ( struct kvm * kvm ,
unsigned long start , unsigned long end )
{
if ( ! kvm - > arch . pgd )
return 0 ;
trace_kvm_unmap_hva_range ( start , end ) ;
handle_hva_to_gpa ( kvm , start , end , & kvm_unmap_hva_handler , NULL ) ;
return 0 ;
}
static void kvm_set_spte_handler ( struct kvm * kvm , gpa_t gpa , void * data )
{
pte_t * pte = ( pte_t * ) data ;
stage2_set_pte ( kvm , NULL , gpa , pte , false ) ;
}
void kvm_set_spte_hva ( struct kvm * kvm , unsigned long hva , pte_t pte )
{
unsigned long end = hva + PAGE_SIZE ;
pte_t stage2_pte ;
if ( ! kvm - > arch . pgd )
return ;
trace_kvm_set_spte_hva ( hva ) ;
stage2_pte = pfn_pte ( pte_pfn ( pte ) , PAGE_S2 ) ;
handle_hva_to_gpa ( kvm , hva , end , & kvm_set_spte_handler , & stage2_pte ) ;
}
void kvm_mmu_free_memory_caches ( struct kvm_vcpu * vcpu )
{
mmu_free_memory_cache ( & vcpu - > arch . mmu_page_cache ) ;
}
2013-01-21 03:28:06 +04:00
phys_addr_t kvm_mmu_get_httbr ( void )
{
VM_BUG_ON ( ! virt_addr_valid ( hyp_pgd ) ) ;
return virt_to_phys ( hyp_pgd ) ;
}
int kvm_mmu_init ( void )
{
2013-04-12 22:12:03 +04:00
unsigned long hyp_idmap_start = virt_to_phys ( __hyp_idmap_text_start ) ;
unsigned long hyp_idmap_end = virt_to_phys ( __hyp_idmap_text_end ) ;
int err ;
hyp_pgd = kzalloc ( PTRS_PER_PGD * sizeof ( pgd_t ) , GFP_KERNEL ) ;
2013-01-21 03:28:07 +04:00
if ( ! hyp_pgd ) {
kvm_err ( " Hyp mode PGD not allocated \n " ) ;
2013-04-12 22:12:03 +04:00
err = - ENOMEM ;
goto out ;
}
/* Create the idmap in the boot page tables */
err = __create_hyp_mappings ( boot_hyp_pgd ,
hyp_idmap_start , hyp_idmap_end ,
__phys_to_pfn ( hyp_idmap_start ) ,
PAGE_HYP ) ;
if ( err ) {
kvm_err ( " Failed to idmap %lx-%lx \n " ,
hyp_idmap_start , hyp_idmap_end ) ;
goto out ;
2013-01-21 03:28:07 +04:00
}
return 0 ;
2013-04-12 22:12:03 +04:00
out :
2013-04-12 22:12:05 +04:00
free_hyp_pgds ( ) ;
2013-04-12 22:12:03 +04:00
return err ;
2013-01-21 03:28:06 +04:00
}
/**
* kvm_clear_idmap - remove all idmaps from the hyp pgd
*
* Free the underlying pmds for all pgds in range and clear the pgds ( but
* don ' t free them ) afterwards .
*/
void kvm_clear_hyp_idmap ( void )
{
unsigned long addr , end ;
unsigned long next ;
pgd_t * pgd = hyp_pgd ;
pud_t * pud ;
pmd_t * pmd ;
addr = virt_to_phys ( __hyp_idmap_text_start ) ;
end = virt_to_phys ( __hyp_idmap_text_end ) ;
pgd + = pgd_index ( addr ) ;
do {
next = pgd_addr_end ( addr , end ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
continue ;
pud = pud_offset ( pgd , addr ) ;
pmd = pmd_offset ( pud , addr ) ;
pud_clear ( pud ) ;
2012-10-15 14:27:37 +04:00
kvm_clean_pmd_entry ( pmd ) ;
2013-01-21 03:28:06 +04:00
pmd_free ( NULL , ( pmd_t * ) ( ( unsigned long ) pmd & PAGE_MASK ) ) ;
} while ( pgd + + , addr = next , addr < end ) ;
}