2009-10-30 08:47:05 +03:00
/*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License , version 2 , as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*
* Copyright SUSE Linux Products GmbH 2009
*
* Authors : Alexander Graf < agraf @ suse . de >
*/
# ifndef __ASM_KVM_BOOK3S_H__
# define __ASM_KVM_BOOK3S_H__
# include <linux/types.h>
# include <linux/kvm_host.h>
2010-04-16 02:11:32 +04:00
# include <asm/kvm_book3s_asm.h>
2009-10-30 08:47:05 +03:00
struct kvmppc_bat {
2009-11-30 06:02:02 +03:00
u64 raw ;
2009-10-30 08:47:05 +03:00
u32 bepi ;
u32 bepi_mask ;
u32 brpn ;
u8 wimg ;
u8 pp ;
2010-03-24 23:48:36 +03:00
bool vs : 1 ;
bool vp : 1 ;
2009-10-30 08:47:05 +03:00
} ;
struct kvmppc_sid_map {
u64 guest_vsid ;
u64 guest_esid ;
u64 host_vsid ;
2010-03-24 23:48:36 +03:00
bool valid : 1 ;
2009-10-30 08:47:05 +03:00
} ;
# define SID_MAP_BITS 9
# define SID_MAP_NUM (1 << SID_MAP_BITS)
# define SID_MAP_MASK (SID_MAP_NUM - 1)
2010-08-15 10:04:24 +04:00
# ifdef CONFIG_PPC_BOOK3S_64
# define SID_CONTEXTS 1
# else
# define SID_CONTEXTS 128
# define VSID_POOL_SIZE (SID_CONTEXTS * 16)
# endif
2011-06-29 04:17:33 +04:00
struct hpte_cache {
struct hlist_node list_pte ;
struct hlist_node list_pte_long ;
struct hlist_node list_vpte ;
struct hlist_node list_vpte_long ;
struct rcu_head rcu_head ;
2012-09-10 06:52:50 +04:00
u64 host_vpn ;
2011-06-29 04:17:33 +04:00
u64 pfn ;
ulong slot ;
struct kvmppc_pte pte ;
} ;
2009-10-30 08:47:05 +03:00
struct kvmppc_vcpu_book3s {
struct kvm_vcpu vcpu ;
2010-04-16 02:11:40 +04:00
struct kvmppc_book3s_shadow_vcpu * shadow_vcpu ;
2009-10-30 08:47:05 +03:00
struct kvmppc_sid_map sid_map [ SID_MAP_NUM ] ;
struct {
u64 esid ;
u64 vsid ;
} slb_shadow [ 64 ] ;
u8 slb_shadow_max ;
struct kvmppc_bat ibat [ 8 ] ;
struct kvmppc_bat dbat [ 8 ] ;
u64 hid [ 6 ] ;
2010-02-19 13:00:33 +03:00
u64 gqr [ 8 ] ;
2009-10-30 08:47:05 +03:00
u64 sdr1 ;
u64 hior ;
u64 msr_mask ;
2012-11-04 22:15:43 +04:00
u64 purr_offset ;
u64 spurr_offset ;
2010-08-15 10:04:24 +04:00
# ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool [ VSID_POOL_SIZE ] ;
2012-03-23 04:21:14 +04:00
u32 vsid_next ;
2010-08-15 10:04:24 +04:00
# else
2012-03-23 04:21:14 +04:00
u64 proto_vsid_first ;
u64 proto_vsid_max ;
u64 proto_vsid_next ;
2010-08-15 10:04:24 +04:00
# endif
int context_id [ SID_CONTEXTS ] ;
2011-06-29 04:17:33 +04:00
2011-09-14 23:45:23 +04:00
bool hior_explicit ; /* HIOR is set by ioctl, not PVR */
2011-06-29 04:17:33 +04:00
struct hlist_head hpte_hash_pte [ HPTEG_HASH_NUM_PTE ] ;
struct hlist_head hpte_hash_pte_long [ HPTEG_HASH_NUM_PTE_LONG ] ;
struct hlist_head hpte_hash_vpte [ HPTEG_HASH_NUM_VPTE ] ;
struct hlist_head hpte_hash_vpte_long [ HPTEG_HASH_NUM_VPTE_LONG ] ;
int hpte_cache_count ;
spinlock_t mmu_lock ;
2009-10-30 08:47:05 +03:00
} ;
# define CONTEXT_HOST 0
# define CONTEXT_GUEST 1
# define CONTEXT_GUEST_END 2
2010-04-20 04:49:48 +04:00
# define VSID_REAL 0x1fffffffffc00000ULL
# define VSID_BAT 0x1fffffffffb00000ULL
# define VSID_REAL_DR 0x2000000000000000ULL
# define VSID_REAL_IR 0x4000000000000000ULL
2010-03-24 23:48:35 +03:00
# define VSID_PR 0x8000000000000000ULL
2009-10-30 08:47:05 +03:00
2010-04-20 04:49:46 +04:00
extern void kvmppc_mmu_pte_flush ( struct kvm_vcpu * vcpu , ulong ea , ulong ea_mask ) ;
2009-10-30 08:47:05 +03:00
extern void kvmppc_mmu_pte_vflush ( struct kvm_vcpu * vcpu , u64 vp , u64 vp_mask ) ;
2010-04-20 04:49:46 +04:00
extern void kvmppc_mmu_pte_pflush ( struct kvm_vcpu * vcpu , ulong pa_start , ulong pa_end ) ;
2009-10-30 08:47:05 +03:00
extern void kvmppc_set_msr ( struct kvm_vcpu * vcpu , u64 new_msr ) ;
2011-06-29 04:17:58 +04:00
extern void kvmppc_set_pvr ( struct kvm_vcpu * vcpu , u32 pvr ) ;
2009-10-30 08:47:05 +03:00
extern void kvmppc_mmu_book3s_64_init ( struct kvm_vcpu * vcpu ) ;
extern void kvmppc_mmu_book3s_32_init ( struct kvm_vcpu * vcpu ) ;
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 04:21:34 +04:00
extern void kvmppc_mmu_book3s_hv_init ( struct kvm_vcpu * vcpu ) ;
2009-10-30 08:47:05 +03:00
extern int kvmppc_mmu_map_page ( struct kvm_vcpu * vcpu , struct kvmppc_pte * pte ) ;
extern int kvmppc_mmu_map_segment ( struct kvm_vcpu * vcpu , ulong eaddr ) ;
extern void kvmppc_mmu_flush_segments ( struct kvm_vcpu * vcpu ) ;
KVM: PPC: Implement MMIO emulation support for Book3S HV guests
This provides the low-level support for MMIO emulation in Book3S HV
guests. When the guest tries to map a page which is not covered by
any memslot, that page is taken to be an MMIO emulation page. Instead
of inserting a valid HPTE, we insert an HPTE that has the valid bit
clear but another hypervisor software-use bit set, which we call
HPTE_V_ABSENT, to indicate that this is an absent page. An
absent page is treated much like a valid page as far as guest hcalls
(H_ENTER, H_REMOVE, H_READ etc.) are concerned, except of course that
an absent HPTE doesn't need to be invalidated with tlbie since it
was never valid as far as the hardware is concerned.
When the guest accesses a page for which there is an absent HPTE, it
will take a hypervisor data storage interrupt (HDSI) since we now set
the VPM1 bit in the LPCR. Our HDSI handler for HPTE-not-present faults
looks up the hash table and if it finds an absent HPTE mapping the
requested virtual address, will switch to kernel mode and handle the
fault in kvmppc_book3s_hv_page_fault(), which at present just calls
kvmppc_hv_emulate_mmio() to set up the MMIO emulation.
This is based on an earlier patch by Benjamin Herrenschmidt, but since
heavily reworked.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-12 16:36:37 +04:00
extern int kvmppc_book3s_hv_page_fault ( struct kvm_run * run ,
struct kvm_vcpu * vcpu , unsigned long addr ,
unsigned long status ) ;
extern long kvmppc_hv_find_lock_hpte ( struct kvm * kvm , gva_t eaddr ,
unsigned long slb_v , unsigned long valid ) ;
2010-06-30 17:18:46 +04:00
extern void kvmppc_mmu_hpte_cache_map ( struct kvm_vcpu * vcpu , struct hpte_cache * pte ) ;
extern struct hpte_cache * kvmppc_mmu_hpte_cache_next ( struct kvm_vcpu * vcpu ) ;
extern void kvmppc_mmu_hpte_destroy ( struct kvm_vcpu * vcpu ) ;
extern int kvmppc_mmu_hpte_init ( struct kvm_vcpu * vcpu ) ;
extern void kvmppc_mmu_invalidate_pte ( struct kvm_vcpu * vcpu , struct hpte_cache * pte ) ;
extern int kvmppc_mmu_hpte_sysinit ( void ) ;
extern void kvmppc_mmu_hpte_sysexit ( void ) ;
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 04:21:34 +04:00
extern int kvmppc_mmu_hv_init ( void ) ;
2010-06-30 17:18:46 +04:00
2010-02-19 13:00:38 +03:00
extern int kvmppc_ld ( struct kvm_vcpu * vcpu , ulong * eaddr , int size , void * ptr , bool data ) ;
extern int kvmppc_st ( struct kvm_vcpu * vcpu , ulong * eaddr , int size , void * ptr , bool data ) ;
2009-10-30 08:47:05 +03:00
extern void kvmppc_book3s_queue_irqprio ( struct kvm_vcpu * vcpu , unsigned int vec ) ;
2013-04-18 00:30:26 +04:00
extern void kvmppc_book3s_dequeue_irqprio ( struct kvm_vcpu * vcpu ,
unsigned int vec ) ;
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 04:21:34 +04:00
extern void kvmppc_inject_interrupt ( struct kvm_vcpu * vcpu , int vec , u64 flags ) ;
2009-11-30 06:02:02 +03:00
extern void kvmppc_set_bat ( struct kvm_vcpu * vcpu , struct kvmppc_bat * bat ,
bool upper , u32 val ) ;
2010-02-19 13:00:39 +03:00
extern void kvmppc_giveup_ext ( struct kvm_vcpu * vcpu , ulong msr ) ;
2010-02-19 13:00:44 +03:00
extern int kvmppc_emulate_paired_single ( struct kvm_run * run , struct kvm_vcpu * vcpu ) ;
2010-07-29 16:47:54 +04:00
extern pfn_t kvmppc_gfn_to_pfn ( struct kvm_vcpu * vcpu , gfn_t gfn ) ;
KVM: PPC: Implement MMU notifiers for Book3S HV guests
This adds the infrastructure to enable us to page out pages underneath
a Book3S HV guest, on processors that support virtualized partition
memory, that is, POWER7. Instead of pinning all the guest's pages,
we now look in the host userspace Linux page tables to find the
mapping for a given guest page. Then, if the userspace Linux PTE
gets invalidated, kvm_unmap_hva() gets called for that address, and
we replace all the guest HPTEs that refer to that page with absent
HPTEs, i.e. ones with the valid bit clear and the HPTE_V_ABSENT bit
set, which will cause an HDSI when the guest tries to access them.
Finally, the page fault handler is extended to reinstantiate the
guest HPTE when the guest tries to access a page which has been paged
out.
Since we can't intercept the guest DSI and ISI interrupts on PPC970,
we still have to pin all the guest pages on PPC970. We have a new flag,
kvm->arch.using_mmu_notifiers, that indicates whether we can page
guest pages out. If it is not set, the MMU notifier callbacks do
nothing and everything operates as before.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-12 16:38:05 +04:00
extern void kvmppc_add_revmap_chain ( struct kvm * kvm , struct revmap_entry * rev ,
unsigned long * rmap , long pte_index , int realmode ) ;
extern void kvmppc_invalidate_hpte ( struct kvm * kvm , unsigned long * hptep ,
unsigned long pte_index ) ;
2011-12-15 06:02:47 +04:00
void kvmppc_clear_ref_hpte ( struct kvm * kvm , unsigned long * hptep ,
unsigned long pte_index ) ;
2011-12-12 16:28:55 +04:00
extern void * kvmppc_pin_guest_page ( struct kvm * kvm , unsigned long addr ,
unsigned long * nb_ret ) ;
KVM: PPC: Book3S HV: Report VPA and DTL modifications in dirty map
At present, the KVM_GET_DIRTY_LOG ioctl doesn't report modifications
done by the host to the virtual processor areas (VPAs) and dispatch
trace logs (DTLs) registered by the guest. This is because those
modifications are done either in real mode or in the host kernel
context, and in neither case does the access go through the guest's
HPT, and thus no change (C) bit gets set in the guest's HPT.
However, the changes done by the host do need to be tracked so that
the modified pages get transferred when doing live migration. In
order to track these modifications, this adds a dirty flag to the
struct representing the VPA/DTL areas, and arranges to set the flag
when the VPA/DTL gets modified by the host. Then, when we are
collecting the dirty log, we also check the dirty flags for the
VPA and DTL for each vcpu and set the relevant bit in the dirty log
if necessary. Doing this also means we now need to keep track of
the guest physical address of the VPA/DTL areas.
So as not to lose track of modifications to a VPA/DTL area when it gets
unregistered, or when a new area gets registered in its place, we need
to transfer the dirty state to the rmap chain. This adds code to
kvmppc_unpin_guest_page() to do that if the area was dirty. To simplify
that code, we now require that all VPA, DTL and SLB shadow buffer areas
fit within a single host page. Guests already comply with this
requirement because pHyp requires that these areas not cross a 4k
boundary.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-04-18 23:51:04 +04:00
extern void kvmppc_unpin_guest_page ( struct kvm * kvm , void * addr ,
unsigned long gpa , bool dirty ) ;
KVM: PPC: Only get pages when actually needed, not in prepare_memory_region()
This removes the code from kvmppc_core_prepare_memory_region() that
looked up the VMA for the region being added and called hva_to_page
to get the pfns for the memory. We have no guarantee that there will
be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION
ioctl call; userspace can do that ioctl and then map memory into the
region later.
Instead we defer looking up the pfn for each memory page until it is
needed, which generally means when the guest does an H_ENTER hcall on
the page. Since we can't call get_user_pages in real mode, if we don't
already have the pfn for the page, kvmppc_h_enter() will return
H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back
to kernel context. That calls kvmppc_get_guest_page() to get the pfn
for the page, and then calls back to kvmppc_h_enter() to redo the HPTE
insertion.
When the first vcpu starts executing, we need to have the RMO or VRMA
region mapped so that the guest's real mode accesses will work. Thus
we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set
up and if not, call kvmppc_hv_setup_rma(). It checks if the memslot
starting at guest physical 0 now has RMO memory mapped there; if so it
sets it up for the guest, otherwise on POWER7 it sets up the VRMA.
The function that does that, kvmppc_map_vrma, is now a bit simpler,
as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself.
Since we are now potentially updating entries in the slot_phys[]
arrays from multiple vcpu threads, we now have a spinlock protecting
those updates to ensure that we don't lose track of any references
to pages.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-12 16:31:00 +04:00
extern long kvmppc_virtmode_h_enter ( struct kvm_vcpu * vcpu , unsigned long flags ,
long pte_index , unsigned long pteh , unsigned long ptel ) ;
KVM: PPC: Book3S HV: Restructure HPT entry creation code
This restructures the code that creates HPT (hashed page table)
entries so that it can be called in situations where we don't have a
struct vcpu pointer, only a struct kvm pointer. It also fixes a bug
where kvmppc_map_vrma() would corrupt the guest R4 value.
Most of the work of kvmppc_virtmode_h_enter is now done by a new
function, kvmppc_virtmode_do_h_enter, which itself calls another new
function, kvmppc_do_h_enter, which contains most of the old
kvmppc_h_enter. The new kvmppc_do_h_enter takes explicit arguments
for the place to return the HPTE index, the Linux page tables to use,
and whether it is being called in real mode, thus removing the need
for it to have the vcpu as an argument.
Currently kvmppc_map_vrma creates the VRMA (virtual real mode area)
HPTEs by calling kvmppc_virtmode_h_enter, which is designed primarily
to handle H_ENTER hcalls from the guest that need to pin a page of
memory. Since H_ENTER returns the index of the created HPTE in R4,
kvmppc_virtmode_h_enter updates the guest R4, corrupting the guest R4
in the case when it gets called from kvmppc_map_vrma on the first
VCPU_RUN ioctl. With this, kvmppc_map_vrma instead calls
kvmppc_virtmode_do_h_enter with the address of a dummy word as the
place to store the HPTE index, thus avoiding corrupting the guest R4.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-11-13 22:31:32 +04:00
extern long kvmppc_do_h_enter ( struct kvm * kvm , unsigned long flags ,
long pte_index , unsigned long pteh , unsigned long ptel ,
pgd_t * pgdir , bool realmode , unsigned long * idx_ret ) ;
2012-11-20 02:55:44 +04:00
extern long kvmppc_do_h_remove ( struct kvm * kvm , unsigned long flags ,
unsigned long pte_index , unsigned long avpn ,
unsigned long * hpret ) ;
2011-12-15 06:03:22 +04:00
extern long kvmppc_hv_get_dirty_log ( struct kvm * kvm ,
2012-09-11 17:28:18 +04:00
struct kvm_memory_slot * memslot , unsigned long * map ) ;
2009-10-30 08:47:05 +03:00
2011-07-23 11:41:44 +04:00
extern void kvmppc_entry_trampoline ( void ) ;
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 04:21:34 +04:00
extern void kvmppc_hv_entry_trampoline ( void ) ;
2010-01-15 16:49:10 +03:00
extern void kvmppc_load_up_fpu ( void ) ;
extern void kvmppc_load_up_altivec ( void ) ;
extern void kvmppc_load_up_vsx ( void ) ;
2010-03-24 23:48:28 +03:00
extern u32 kvmppc_alignment_dsisr ( struct kvm_vcpu * vcpu , unsigned int inst ) ;
extern ulong kvmppc_alignment_dar ( struct kvm_vcpu * vcpu , unsigned int inst ) ;
2011-08-08 19:21:15 +04:00
extern int kvmppc_h_pr ( struct kvm_vcpu * vcpu , unsigned long cmd ) ;
2009-10-30 08:47:05 +03:00
static inline struct kvmppc_vcpu_book3s * to_book3s ( struct kvm_vcpu * vcpu )
{
return container_of ( vcpu , struct kvmppc_vcpu_book3s , vcpu ) ;
}
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 04:21:34 +04:00
extern void kvm_return_point ( void ) ;
/* Also add subarch specific defines */
# ifdef CONFIG_KVM_BOOK3S_32_HANDLER
# include <asm/kvm_book3s_32.h>
# endif
# ifdef CONFIG_KVM_BOOK3S_64_HANDLER
# include <asm/kvm_book3s_64.h>
# endif
# ifdef CONFIG_KVM_BOOK3S_PR
2011-06-29 04:17:58 +04:00
static inline unsigned long kvmppc_interrupt_offset ( struct kvm_vcpu * vcpu )
{
return to_book3s ( vcpu ) - > hior ;
}
static inline void kvmppc_update_int_pending ( struct kvm_vcpu * vcpu ,
unsigned long pending_now , unsigned long old_pending )
{
if ( pending_now )
vcpu - > arch . shared - > int_pending = 1 ;
else if ( old_pending )
vcpu - > arch . shared - > int_pending = 0 ;
}
2010-04-16 02:11:40 +04:00
static inline void kvmppc_set_gpr ( struct kvm_vcpu * vcpu , int num , ulong val )
{
if ( num < 14 ) {
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
svcpu - > gpr [ num ] = val ;
svcpu_put ( svcpu ) ;
2010-04-16 02:11:40 +04:00
to_book3s ( vcpu ) - > shadow_vcpu - > gpr [ num ] = val ;
} else
vcpu - > arch . gpr [ num ] = val ;
}
static inline ulong kvmppc_get_gpr ( struct kvm_vcpu * vcpu , int num )
{
2011-12-09 17:44:13 +04:00
if ( num < 14 ) {
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
ulong r = svcpu - > gpr [ num ] ;
svcpu_put ( svcpu ) ;
return r ;
} else
2010-04-16 02:11:40 +04:00
return vcpu - > arch . gpr [ num ] ;
}
static inline void kvmppc_set_cr ( struct kvm_vcpu * vcpu , u32 val )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
svcpu - > cr = val ;
svcpu_put ( svcpu ) ;
2010-04-16 02:11:40 +04:00
to_book3s ( vcpu ) - > shadow_vcpu - > cr = val ;
}
static inline u32 kvmppc_get_cr ( struct kvm_vcpu * vcpu )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
u32 r ;
r = svcpu - > cr ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
static inline void kvmppc_set_xer ( struct kvm_vcpu * vcpu , u32 val )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
svcpu - > xer = val ;
2010-04-16 02:11:40 +04:00
to_book3s ( vcpu ) - > shadow_vcpu - > xer = val ;
2011-12-09 17:44:13 +04:00
svcpu_put ( svcpu ) ;
2010-04-16 02:11:40 +04:00
}
static inline u32 kvmppc_get_xer ( struct kvm_vcpu * vcpu )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
u32 r ;
r = svcpu - > xer ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
static inline void kvmppc_set_ctr ( struct kvm_vcpu * vcpu , ulong val )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
svcpu - > ctr = val ;
svcpu_put ( svcpu ) ;
2010-04-16 02:11:40 +04:00
}
static inline ulong kvmppc_get_ctr ( struct kvm_vcpu * vcpu )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
ulong r ;
r = svcpu - > ctr ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
static inline void kvmppc_set_lr ( struct kvm_vcpu * vcpu , ulong val )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
svcpu - > lr = val ;
svcpu_put ( svcpu ) ;
2010-04-16 02:11:40 +04:00
}
static inline ulong kvmppc_get_lr ( struct kvm_vcpu * vcpu )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
ulong r ;
r = svcpu - > lr ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
static inline void kvmppc_set_pc ( struct kvm_vcpu * vcpu , ulong val )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
svcpu - > pc = val ;
svcpu_put ( svcpu ) ;
2010-04-16 02:11:40 +04:00
}
static inline ulong kvmppc_get_pc ( struct kvm_vcpu * vcpu )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
ulong r ;
r = svcpu - > pc ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
static inline u32 kvmppc_get_last_inst ( struct kvm_vcpu * vcpu )
{
ulong pc = kvmppc_get_pc ( vcpu ) ;
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
u32 r ;
2010-04-16 02:11:40 +04:00
/* Load the instruction manually if it failed to do so in the
* exit path */
if ( svcpu - > last_inst = = KVM_INST_FETCH_FAILED )
kvmppc_ld ( vcpu , & pc , sizeof ( u32 ) , & svcpu - > last_inst , false ) ;
2011-12-09 17:44:13 +04:00
r = svcpu - > last_inst ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
static inline ulong kvmppc_get_fault_dar ( struct kvm_vcpu * vcpu )
{
2011-12-09 17:44:13 +04:00
struct kvmppc_book3s_shadow_vcpu * svcpu = svcpu_get ( vcpu ) ;
ulong r ;
r = svcpu - > fault_dar ;
svcpu_put ( svcpu ) ;
return r ;
2010-04-16 02:11:40 +04:00
}
2009-10-30 08:47:05 +03:00
2011-06-29 04:17:58 +04:00
static inline bool kvmppc_critical_section ( struct kvm_vcpu * vcpu )
{
ulong crit_raw = vcpu - > arch . shared - > critical ;
ulong crit_r1 = kvmppc_get_gpr ( vcpu , 1 ) ;
bool crit ;
/* Truncate crit indicators in 32 bit mode */
if ( ! ( vcpu - > arch . shared - > msr & MSR_SF ) ) {
crit_raw & = 0xffffffff ;
crit_r1 & = 0xffffffff ;
}
/* Critical section when crit == r1 */
crit = ( crit_raw = = crit_r1 ) ;
/* ... and we're in supervisor mode */
crit = crit & & ! ( vcpu - > arch . shared - > msr & MSR_PR ) ;
return crit ;
}
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 04:21:34 +04:00
# else /* CONFIG_KVM_BOOK3S_PR */
static inline unsigned long kvmppc_interrupt_offset ( struct kvm_vcpu * vcpu )
{
return 0 ;
}
static inline void kvmppc_update_int_pending ( struct kvm_vcpu * vcpu ,
unsigned long pending_now , unsigned long old_pending )
{
}
static inline void kvmppc_set_gpr ( struct kvm_vcpu * vcpu , int num , ulong val )
{
vcpu - > arch . gpr [ num ] = val ;
}
static inline ulong kvmppc_get_gpr ( struct kvm_vcpu * vcpu , int num )
{
return vcpu - > arch . gpr [ num ] ;
}
static inline void kvmppc_set_cr ( struct kvm_vcpu * vcpu , u32 val )
{
vcpu - > arch . cr = val ;
}
static inline u32 kvmppc_get_cr ( struct kvm_vcpu * vcpu )
{
return vcpu - > arch . cr ;
}
static inline void kvmppc_set_xer ( struct kvm_vcpu * vcpu , u32 val )
{
vcpu - > arch . xer = val ;
}
static inline u32 kvmppc_get_xer ( struct kvm_vcpu * vcpu )
{
return vcpu - > arch . xer ;
}
static inline void kvmppc_set_ctr ( struct kvm_vcpu * vcpu , ulong val )
{
vcpu - > arch . ctr = val ;
}
static inline ulong kvmppc_get_ctr ( struct kvm_vcpu * vcpu )
{
return vcpu - > arch . ctr ;
}
static inline void kvmppc_set_lr ( struct kvm_vcpu * vcpu , ulong val )
{
vcpu - > arch . lr = val ;
}
static inline ulong kvmppc_get_lr ( struct kvm_vcpu * vcpu )
{
return vcpu - > arch . lr ;
}
static inline void kvmppc_set_pc ( struct kvm_vcpu * vcpu , ulong val )
{
vcpu - > arch . pc = val ;
}
static inline ulong kvmppc_get_pc ( struct kvm_vcpu * vcpu )
{
return vcpu - > arch . pc ;
}
static inline u32 kvmppc_get_last_inst ( struct kvm_vcpu * vcpu )
{
ulong pc = kvmppc_get_pc ( vcpu ) ;
/* Load the instruction manually if it failed to do so in the
* exit path */
if ( vcpu - > arch . last_inst = = KVM_INST_FETCH_FAILED )
kvmppc_ld ( vcpu , & pc , sizeof ( u32 ) , & vcpu - > arch . last_inst , false ) ;
return vcpu - > arch . last_inst ;
}
static inline ulong kvmppc_get_fault_dar ( struct kvm_vcpu * vcpu )
{
return vcpu - > arch . fault_dar ;
}
static inline bool kvmppc_critical_section ( struct kvm_vcpu * vcpu )
{
return false ;
}
# endif
2011-06-29 04:17:58 +04:00
2010-03-24 23:48:30 +03:00
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
# define OSI_SC_MAGIC_R3 0x113724FA
# define OSI_SC_MAGIC_R4 0x77810F9B
2009-10-30 08:47:05 +03:00
# define INS_DCBZ 0x7c0007ec
2013-03-21 00:24:58 +04:00
/* TO = 31 for unconditional trap */
# define INS_TW 0x7fe00008
2009-10-30 08:47:05 +03:00
2011-12-20 19:34:20 +04:00
/* LPIDs we support with this build -- runtime limit may be lower */
# define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
2009-10-30 08:47:05 +03:00
# endif /* __ASM_KVM_BOOK3S_H__ */