2009-10-30 08:47:13 +03:00
/*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License , version 2 , as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*
* Copyright SUSE Linux Products GmbH 2009
*
* Authors : Alexander Graf < agraf @ suse . de >
*/
# include <linux/types.h>
# include <linux/string.h>
# include <linux/kvm.h>
# include <linux/kvm_host.h>
# include <linux/highmem.h>
# include <asm/tlbflush.h>
# include <asm/kvm_ppc.h>
# include <asm/kvm_book3s.h>
/* #define DEBUG_MMU */
/* #define DEBUG_MMU_PTE */
/* #define DEBUG_MMU_PTE_IP 0xfff14c40 */
# ifdef DEBUG_MMU
# define dprintk(X...) printk(KERN_INFO X)
# else
# define dprintk(X...) do { } while(0)
# endif
2010-02-19 13:00:36 +03:00
# ifdef DEBUG_MMU_PTE
2009-10-30 08:47:13 +03:00
# define dprintk_pte(X...) printk(KERN_INFO X)
# else
# define dprintk_pte(X...) do { } while(0)
# endif
# define PTEG_FLAG_ACCESSED 0x00000100
# define PTEG_FLAG_DIRTY 0x00000080
2010-04-16 02:11:53 +04:00
# ifndef SID_SHIFT
# define SID_SHIFT 28
# endif
2009-10-30 08:47:13 +03:00
static inline bool check_debug_ip ( struct kvm_vcpu * vcpu )
{
# ifdef DEBUG_MMU_PTE_IP
return vcpu - > arch . pc = = DEBUG_MMU_PTE_IP ;
# else
return true ;
# endif
}
2010-08-03 03:06:11 +04:00
static inline u32 sr_vsid ( u32 sr_raw )
{
return sr_raw & 0x0fffffff ;
}
static inline bool sr_valid ( u32 sr_raw )
{
return ( sr_raw & 0x80000000 ) ? false : true ;
}
static inline bool sr_ks ( u32 sr_raw )
{
return ( sr_raw & 0x40000000 ) ? true : false ;
}
static inline bool sr_kp ( u32 sr_raw )
{
return ( sr_raw & 0x20000000 ) ? true : false ;
}
static inline bool sr_nx ( u32 sr_raw )
{
return ( sr_raw & 0x10000000 ) ? true : false ;
}
2009-10-30 08:47:13 +03:00
static int kvmppc_mmu_book3s_32_xlate_bat ( struct kvm_vcpu * vcpu , gva_t eaddr ,
struct kvmppc_pte * pte , bool data ) ;
2010-04-20 04:49:46 +04:00
static int kvmppc_mmu_book3s_32_esid_to_vsid ( struct kvm_vcpu * vcpu , ulong esid ,
2010-03-24 23:48:20 +03:00
u64 * vsid ) ;
2009-10-30 08:47:13 +03:00
2010-08-03 04:29:27 +04:00
static u32 find_sr ( struct kvm_vcpu * vcpu , gva_t eaddr )
2009-10-30 08:47:13 +03:00
{
2010-08-03 04:29:27 +04:00
return vcpu - > arch . shared - > sr [ ( eaddr > > 28 ) & 0xf ] ;
2009-10-30 08:47:13 +03:00
}
static u64 kvmppc_mmu_book3s_32_ea_to_vp ( struct kvm_vcpu * vcpu , gva_t eaddr ,
bool data )
{
2010-03-24 23:48:20 +03:00
u64 vsid ;
2009-10-30 08:47:13 +03:00
struct kvmppc_pte pte ;
if ( ! kvmppc_mmu_book3s_32_xlate_bat ( vcpu , eaddr , & pte , data ) )
return pte . vpage ;
2010-03-24 23:48:20 +03:00
kvmppc_mmu_book3s_32_esid_to_vsid ( vcpu , eaddr > > SID_SHIFT , & vsid ) ;
return ( ( ( u64 ) eaddr > > 12 ) & 0xffff ) | ( vsid < < 16 ) ;
2009-10-30 08:47:13 +03:00
}
static void kvmppc_mmu_book3s_32_reset_msr ( struct kvm_vcpu * vcpu )
{
kvmppc_set_msr ( vcpu , 0 ) ;
}
2013-09-20 08:52:49 +04:00
static hva_t kvmppc_mmu_book3s_32_get_pteg ( struct kvm_vcpu * vcpu ,
2010-08-03 03:06:11 +04:00
u32 sre , gva_t eaddr ,
2009-10-30 08:47:13 +03:00
bool primary )
{
2013-09-20 08:52:49 +04:00
struct kvmppc_vcpu_book3s * vcpu_book3s = to_book3s ( vcpu ) ;
2009-10-30 08:47:13 +03:00
u32 page , hash , pteg , htabmask ;
hva_t r ;
page = ( eaddr & 0x0FFFFFFF ) > > 12 ;
htabmask = ( ( vcpu_book3s - > sdr1 & 0x1FF ) < < 16 ) | 0xFFC0 ;
2010-08-03 03:06:11 +04:00
hash = ( ( sr_vsid ( sre ) ^ page ) < < 6 ) ;
2009-10-30 08:47:13 +03:00
if ( ! primary )
hash = ~ hash ;
hash & = htabmask ;
pteg = ( vcpu_book3s - > sdr1 & 0xffff0000 ) | hash ;
dprintk ( " MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x \n " ,
2010-07-29 17:04:16 +04:00
kvmppc_get_pc ( & vcpu_book3s - > vcpu ) , eaddr , vcpu_book3s - > sdr1 , pteg ,
2010-08-03 03:06:11 +04:00
sr_vsid ( sre ) ) ;
2009-10-30 08:47:13 +03:00
2013-09-20 08:52:49 +04:00
r = gfn_to_hva ( vcpu - > kvm , pteg > > PAGE_SHIFT ) ;
2009-10-30 08:47:13 +03:00
if ( kvm_is_error_hva ( r ) )
return r ;
return r | ( pteg & ~ PAGE_MASK ) ;
}
2010-08-03 03:06:11 +04:00
static u32 kvmppc_mmu_book3s_32_get_ptem ( u32 sre , gva_t eaddr , bool primary )
2009-10-30 08:47:13 +03:00
{
2010-08-03 03:06:11 +04:00
return ( ( eaddr & 0x0fffffff ) > > 22 ) | ( sr_vsid ( sre ) < < 7 ) |
2009-10-30 08:47:13 +03:00
( primary ? 0 : 0x40 ) | 0x80000000 ;
}
static int kvmppc_mmu_book3s_32_xlate_bat ( struct kvm_vcpu * vcpu , gva_t eaddr ,
struct kvmppc_pte * pte , bool data )
{
struct kvmppc_vcpu_book3s * vcpu_book3s = to_book3s ( vcpu ) ;
struct kvmppc_bat * bat ;
int i ;
for ( i = 0 ; i < 8 ; i + + ) {
if ( data )
bat = & vcpu_book3s - > dbat [ i ] ;
else
bat = & vcpu_book3s - > ibat [ i ] ;
2010-07-29 16:47:43 +04:00
if ( vcpu - > arch . shared - > msr & MSR_PR ) {
2009-10-30 08:47:13 +03:00
if ( ! bat - > vp )
continue ;
} else {
if ( ! bat - > vs )
continue ;
}
if ( check_debug_ip ( vcpu ) )
{
dprintk_pte ( " %cBAT %02d: 0x%lx - 0x%x (0x%x) \n " ,
data ? ' d ' : ' i ' , i , eaddr , bat - > bepi ,
bat - > bepi_mask ) ;
}
if ( ( eaddr & bat - > bepi_mask ) = = bat - > bepi ) {
2010-03-24 23:48:20 +03:00
u64 vsid ;
kvmppc_mmu_book3s_32_esid_to_vsid ( vcpu ,
eaddr > > SID_SHIFT , & vsid ) ;
vsid < < = 16 ;
pte - > vpage = ( ( ( u64 ) eaddr > > 12 ) & 0xffff ) | vsid ;
2009-10-30 08:47:13 +03:00
pte - > raddr = bat - > brpn | ( eaddr & ~ bat - > bepi_mask ) ;
pte - > may_read = bat - > pp ;
pte - > may_write = bat - > pp > 1 ;
pte - > may_execute = true ;
if ( ! pte - > may_read ) {
printk ( KERN_INFO " BAT is not readable! \n " ) ;
continue ;
}
if ( ! pte - > may_write ) {
/* let's treat r/o BATs as not-readable for now */
dprintk_pte ( " BAT is read-only! \n " ) ;
continue ;
}
return 0 ;
}
}
return - ENOENT ;
}
static int kvmppc_mmu_book3s_32_xlate_pte ( struct kvm_vcpu * vcpu , gva_t eaddr ,
struct kvmppc_pte * pte , bool data ,
bool primary )
{
2010-08-03 03:06:11 +04:00
u32 sre ;
2009-10-30 08:47:13 +03:00
hva_t ptegp ;
u32 pteg [ 16 ] ;
2010-04-20 04:49:46 +04:00
u32 ptem = 0 ;
2009-10-30 08:47:13 +03:00
int i ;
int found = 0 ;
2010-08-03 04:29:27 +04:00
sre = find_sr ( vcpu , eaddr ) ;
2009-10-30 08:47:13 +03:00
dprintk_pte ( " SR 0x%lx: vsid=0x%x, raw=0x%x \n " , eaddr > > 28 ,
2010-08-03 03:06:11 +04:00
sr_vsid ( sre ) , sre ) ;
2009-10-30 08:47:13 +03:00
pte - > vpage = kvmppc_mmu_book3s_32_ea_to_vp ( vcpu , eaddr , data ) ;
2013-09-20 08:52:49 +04:00
ptegp = kvmppc_mmu_book3s_32_get_pteg ( vcpu , sre , eaddr , primary ) ;
2009-10-30 08:47:13 +03:00
if ( kvm_is_error_hva ( ptegp ) ) {
printk ( KERN_INFO " KVM: Invalid PTEG! \n " ) ;
goto no_page_found ;
}
ptem = kvmppc_mmu_book3s_32_get_ptem ( sre , eaddr , primary ) ;
if ( copy_from_user ( pteg , ( void __user * ) ptegp , sizeof ( pteg ) ) ) {
printk ( KERN_ERR " KVM: Can't copy data from 0x%lx! \n " , ptegp ) ;
goto no_page_found ;
}
for ( i = 0 ; i < 16 ; i + = 2 ) {
if ( ptem = = pteg [ i ] ) {
u8 pp ;
pte - > raddr = ( pteg [ i + 1 ] & ~ ( 0xFFFULL ) ) | ( eaddr & 0xFFF ) ;
pp = pteg [ i + 1 ] & 3 ;
2010-08-03 03:06:11 +04:00
if ( ( sr_kp ( sre ) & & ( vcpu - > arch . shared - > msr & MSR_PR ) ) | |
( sr_ks ( sre ) & & ! ( vcpu - > arch . shared - > msr & MSR_PR ) ) )
2009-10-30 08:47:13 +03:00
pp | = 4 ;
pte - > may_write = false ;
pte - > may_read = false ;
pte - > may_execute = true ;
switch ( pp ) {
case 0 :
case 1 :
case 2 :
case 6 :
pte - > may_write = true ;
case 3 :
case 5 :
case 7 :
pte - > may_read = true ;
break ;
}
if ( ! pte - > may_read )
continue ;
dprintk_pte ( " MMU: Found PTE -> %x %x - %x \n " ,
pteg [ i ] , pteg [ i + 1 ] , pp ) ;
found = 1 ;
break ;
}
}
/* Update PTE C and A bits, so the guest's swapper knows we used the
page */
if ( found ) {
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 08:52:48 +04:00
u32 pte_r = pteg [ i + 1 ] ;
char __user * addr = ( char __user * ) & pteg [ i + 1 ] ;
/*
* Use single - byte writes to update the HPTE , to
* conform to what real hardware does .
*/
if ( pte - > may_read & & ! ( pte_r & PTEG_FLAG_ACCESSED ) ) {
pte_r | = PTEG_FLAG_ACCESSED ;
put_user ( pte_r > > 8 , addr + 2 ) ;
}
if ( pte - > may_write & & ! ( pte_r & PTEG_FLAG_DIRTY ) ) {
/* XXX should only set this for stores */
pte_r | = PTEG_FLAG_DIRTY ;
put_user ( pte_r , addr + 3 ) ;
}
2009-10-30 08:47:13 +03:00
return 0 ;
}
no_page_found :
if ( check_debug_ip ( vcpu ) ) {
dprintk_pte ( " KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx) \n " ,
to_book3s ( vcpu ) - > sdr1 , ptegp ) ;
for ( i = 0 ; i < 16 ; i + = 2 ) {
2010-07-29 17:04:16 +04:00
dprintk_pte ( " %02d: 0x%x - 0x%x (0x%x) \n " ,
2009-10-30 08:47:13 +03:00
i , pteg [ i ] , pteg [ i + 1 ] , ptem ) ;
}
}
return - ENOENT ;
}
static int kvmppc_mmu_book3s_32_xlate ( struct kvm_vcpu * vcpu , gva_t eaddr ,
struct kvmppc_pte * pte , bool data )
{
int r ;
2010-07-29 16:47:54 +04:00
ulong mp_ea = vcpu - > arch . magic_page_ea ;
2009-10-30 08:47:13 +03:00
pte - > eaddr = eaddr ;
2013-09-20 08:52:45 +04:00
pte - > page_size = MMU_PAGE_4K ;
2010-07-29 16:47:54 +04:00
/* Magic page override */
if ( unlikely ( mp_ea ) & &
unlikely ( ( eaddr & ~ 0xfffULL ) = = ( mp_ea & ~ 0xfffULL ) ) & &
! ( vcpu - > arch . shared - > msr & MSR_PR ) ) {
pte - > vpage = kvmppc_mmu_book3s_32_ea_to_vp ( vcpu , eaddr , data ) ;
pte - > raddr = vcpu - > arch . magic_page_pa | ( pte - > raddr & 0xfff ) ;
pte - > raddr & = KVM_PAM ;
pte - > may_execute = true ;
pte - > may_read = true ;
pte - > may_write = true ;
return 0 ;
}
2009-10-30 08:47:13 +03:00
r = kvmppc_mmu_book3s_32_xlate_bat ( vcpu , eaddr , pte , data ) ;
if ( r < 0 )
r = kvmppc_mmu_book3s_32_xlate_pte ( vcpu , eaddr , pte , data , true ) ;
if ( r < 0 )
r = kvmppc_mmu_book3s_32_xlate_pte ( vcpu , eaddr , pte , data , false ) ;
return r ;
}
static u32 kvmppc_mmu_book3s_32_mfsrin ( struct kvm_vcpu * vcpu , u32 srnum )
{
2010-08-03 04:29:27 +04:00
return vcpu - > arch . shared - > sr [ srnum ] ;
2009-10-30 08:47:13 +03:00
}
static void kvmppc_mmu_book3s_32_mtsrin ( struct kvm_vcpu * vcpu , u32 srnum ,
ulong value )
{
2010-08-03 04:29:27 +04:00
vcpu - > arch . shared - > sr [ srnum ] = value ;
2009-10-30 08:47:13 +03:00
kvmppc_mmu_map_segment ( vcpu , srnum < < SID_SHIFT ) ;
}
static void kvmppc_mmu_book3s_32_tlbie ( struct kvm_vcpu * vcpu , ulong ea , bool large )
{
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 08:52:48 +04:00
int i ;
struct kvm_vcpu * v ;
/* flush this VA on all cpus */
kvm_for_each_vcpu ( i , v , vcpu - > kvm )
kvmppc_mmu_pte_flush ( v , ea , 0x0FFFF000 ) ;
2009-10-30 08:47:13 +03:00
}
2010-04-20 04:49:46 +04:00
static int kvmppc_mmu_book3s_32_esid_to_vsid ( struct kvm_vcpu * vcpu , ulong esid ,
2009-10-30 08:47:13 +03:00
u64 * vsid )
{
2010-04-20 04:49:48 +04:00
ulong ea = esid < < SID_SHIFT ;
2010-08-03 03:06:11 +04:00
u32 sr ;
2010-04-20 04:49:48 +04:00
u64 gvsid = esid ;
2010-07-29 16:47:43 +04:00
if ( vcpu - > arch . shared - > msr & ( MSR_DR | MSR_IR ) ) {
2010-08-03 04:29:27 +04:00
sr = find_sr ( vcpu , ea ) ;
2010-08-03 03:06:11 +04:00
if ( sr_valid ( sr ) )
gvsid = sr_vsid ( sr ) ;
2010-04-20 04:49:48 +04:00
}
2009-10-30 08:47:13 +03:00
/* In case we only have one of MSR_IR or MSR_DR set, let's put
that in the real - mode context ( and hope RM doesn ' t access
high memory ) */
2010-07-29 16:47:43 +04:00
switch ( vcpu - > arch . shared - > msr & ( MSR_DR | MSR_IR ) ) {
2009-10-30 08:47:13 +03:00
case 0 :
2010-04-20 04:49:48 +04:00
* vsid = VSID_REAL | esid ;
2009-10-30 08:47:13 +03:00
break ;
case MSR_IR :
2010-04-20 04:49:48 +04:00
* vsid = VSID_REAL_IR | gvsid ;
2009-10-30 08:47:13 +03:00
break ;
case MSR_DR :
2010-04-20 04:49:48 +04:00
* vsid = VSID_REAL_DR | gvsid ;
2009-10-30 08:47:13 +03:00
break ;
case MSR_DR | MSR_IR :
2010-08-03 03:06:11 +04:00
if ( sr_valid ( sr ) )
* vsid = sr_vsid ( sr ) ;
2010-06-21 17:24:55 +04:00
else
* vsid = VSID_BAT | gvsid ;
2009-10-30 08:47:13 +03:00
break ;
default :
BUG ( ) ;
}
2010-07-29 16:47:43 +04:00
if ( vcpu - > arch . shared - > msr & MSR_PR )
2010-03-24 23:48:20 +03:00
* vsid | = VSID_PR ;
2009-10-30 08:47:13 +03:00
return 0 ;
}
static bool kvmppc_mmu_book3s_32_is_dcbz32 ( struct kvm_vcpu * vcpu )
{
return true ;
}
void kvmppc_mmu_book3s_32_init ( struct kvm_vcpu * vcpu )
{
struct kvmppc_mmu * mmu = & vcpu - > arch . mmu ;
mmu - > mtsrin = kvmppc_mmu_book3s_32_mtsrin ;
mmu - > mfsrin = kvmppc_mmu_book3s_32_mfsrin ;
mmu - > xlate = kvmppc_mmu_book3s_32_xlate ;
mmu - > reset_msr = kvmppc_mmu_book3s_32_reset_msr ;
mmu - > tlbie = kvmppc_mmu_book3s_32_tlbie ;
mmu - > esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid ;
mmu - > ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp ;
mmu - > is_dcbz32 = kvmppc_mmu_book3s_32_is_dcbz32 ;
mmu - > slbmte = NULL ;
mmu - > slbmfee = NULL ;
mmu - > slbmfev = NULL ;
mmu - > slbie = NULL ;
mmu - > slbia = NULL ;
}