Merge branch 'kvm-x86-mmu-6.6' into HEAD
KVM x86 MMU changes for 6.6: - Rip out the ancient MMU_DEBUG crud and replace the useful bits with CONFIG_KVM_PROVE_MMU - Overhaul KVM's page-track APIs, and KVMGT's usage, to reduce the API surface that is needed by external users (currently only KVMGT), and fix a variety of issues in the process - Fix KVM's handling of !visible guest roots to avoid premature triple fault injection by loading a dummy root backed by the zero page
This commit is contained in:
commit
d011151616
@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
|
||||
* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
|
||||
* also includes TDP pages) to determine whether or not a page can be used in
|
||||
* the given MMU context. This is a subset of the overall kvm_cpu_role to
|
||||
* minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
|
||||
* 2 bytes per gfn instead of 4 bytes per gfn.
|
||||
* minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
|
||||
* allocating 2 bytes per gfn instead of 4 bytes per gfn.
|
||||
*
|
||||
* Upper-level shadow pages having gptes are tracked for write-protection via
|
||||
* gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
|
||||
* more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
|
||||
* gfn_track will overflow and explosions will ensure.
|
||||
* gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must
|
||||
* not create more than 2^16-1 upper-level shadow pages at a single gfn,
|
||||
* otherwise gfn_write_track will overflow and explosions will ensue.
|
||||
*
|
||||
* A unique shadow page (SP) for a gfn is created if and only if an existing SP
|
||||
* cannot be reused. The ability to reuse a SP is tracked by its role, which
|
||||
@ -1023,7 +1023,7 @@ struct kvm_lpage_info {
|
||||
struct kvm_arch_memory_slot {
|
||||
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
|
||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
|
||||
unsigned short *gfn_write_track;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1265,8 +1265,9 @@ struct kvm_arch {
|
||||
* create an NX huge page (without hanging the guest).
|
||||
*/
|
||||
struct list_head possible_nx_huge_pages;
|
||||
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
struct kvm_page_track_notifier_head track_notifier_head;
|
||||
#endif
|
||||
/*
|
||||
* Protects marking pages unsync during page faults, as TDP MMU page
|
||||
* faults only take mmu_lock for read. For simplicity, the unsync
|
||||
@ -1853,7 +1854,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
|
||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
|
||||
|
||||
|
@ -2,11 +2,9 @@
|
||||
#ifndef _ASM_X86_KVM_PAGE_TRACK_H
|
||||
#define _ASM_X86_KVM_PAGE_TRACK_H
|
||||
|
||||
enum kvm_page_track_mode {
|
||||
KVM_PAGE_TRACK_WRITE,
|
||||
KVM_PAGE_TRACK_MAX,
|
||||
};
|
||||
#include <linux/kvm_types.h>
|
||||
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
/*
|
||||
* The notifier represented by @kvm_page_track_notifier_node is linked into
|
||||
* the head which will be notified when guest is triggering the track event.
|
||||
@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node {
|
||||
* It is called when guest is writing the write-tracked page
|
||||
* and write emulation is finished at that time.
|
||||
*
|
||||
* @vcpu: the vcpu where the write access happened.
|
||||
* @gpa: the physical address written by guest.
|
||||
* @new: the data was written to the address.
|
||||
* @bytes: the written length.
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes, struct kvm_page_track_notifier_node *node);
|
||||
/*
|
||||
* It is called when memory slot is being moved or removed
|
||||
* users can drop write-protection for the pages in that memory slot
|
||||
*
|
||||
* @kvm: the kvm where memory slot being moved or removed
|
||||
* @slot: the memory slot being moved or removed
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
|
||||
/*
|
||||
* Invoked when a memory region is removed from the guest. Or in KVM
|
||||
* terms, when a memslot is deleted.
|
||||
*
|
||||
* @gfn: base gfn of the region being removed
|
||||
* @nr_pages: number of pages in the to-be-removed region
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
};
|
||||
|
||||
int kvm_page_track_init(struct kvm *kvm);
|
||||
void kvm_page_track_cleanup(struct kvm *kvm);
|
||||
int kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
|
||||
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
#else
|
||||
/*
|
||||
* Allow defining a node in a structure even if page tracking is disabled, e.g.
|
||||
* to play nice with testing headers via direct inclusion from the command line.
|
||||
*/
|
||||
struct kvm_page_track_notifier_node {};
|
||||
#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
|
||||
void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
bool kvm_slot_page_track_is_active(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, enum kvm_page_track_mode mode);
|
||||
|
||||
void
|
||||
kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void
|
||||
kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes);
|
||||
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
|
||||
#endif
|
||||
|
@ -138,6 +138,19 @@ config KVM_XEN
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_PROVE_MMU
|
||||
bool "Prove KVM MMU correctness"
|
||||
depends on DEBUG_KERNEL
|
||||
depends on KVM
|
||||
depends on EXPERT
|
||||
help
|
||||
Enables runtime assertions in KVM's MMU that are too costly to enable
|
||||
in anything remotely resembling a production environment, e.g. this
|
||||
gates code that verifies a to-be-freed page table doesn't have any
|
||||
present SPTEs.
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_EXTERNAL_WRITE_TRACKING
|
||||
bool
|
||||
|
||||
|
@ -121,6 +121,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes);
|
||||
|
||||
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "smm.h"
|
||||
#include "kvm_emulate.h"
|
||||
#include "page_track.h"
|
||||
#include "cpuid.h"
|
||||
#include "spte.h"
|
||||
|
||||
@ -53,7 +54,7 @@
|
||||
#include <asm/io.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
extern bool itlb_multihit_kvm_mitigation;
|
||||
@ -115,11 +116,6 @@ static int max_huge_page_level __read_mostly;
|
||||
static int tdp_root_level __read_mostly;
|
||||
static int max_tdp_level __read_mostly;
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
bool dbg = 0;
|
||||
module_param(dbg, bool, 0644);
|
||||
#endif
|
||||
|
||||
#define PTE_PREFETCH_NUM 8
|
||||
|
||||
#include <trace/events/kvm.h>
|
||||
@ -486,7 +482,7 @@ retry:
|
||||
*/
|
||||
static void mmu_spte_set(u64 *sptep, u64 new_spte)
|
||||
{
|
||||
WARN_ON(is_shadow_present_pte(*sptep));
|
||||
WARN_ON_ONCE(is_shadow_present_pte(*sptep));
|
||||
__set_spte(sptep, new_spte);
|
||||
}
|
||||
|
||||
@ -498,7 +494,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
|
||||
{
|
||||
u64 old_spte = *sptep;
|
||||
|
||||
WARN_ON(!is_shadow_present_pte(new_spte));
|
||||
WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
|
||||
check_spte_writable_invariants(new_spte);
|
||||
|
||||
if (!is_shadow_present_pte(old_spte)) {
|
||||
@ -511,7 +507,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
|
||||
else
|
||||
old_spte = __update_clear_spte_slow(sptep, new_spte);
|
||||
|
||||
WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
|
||||
WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
|
||||
|
||||
return old_spte;
|
||||
}
|
||||
@ -593,7 +589,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
|
||||
* by a refcounted page, the refcount is elevated.
|
||||
*/
|
||||
page = kvm_pfn_to_refcounted_page(pfn);
|
||||
WARN_ON(page && !page_count(page));
|
||||
WARN_ON_ONCE(page && !page_count(page));
|
||||
|
||||
if (is_accessed_spte(old_spte))
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
@ -808,7 +804,7 @@ static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
|
||||
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
|
||||
linfo = lpage_info_slot(gfn, slot, i);
|
||||
linfo->disallow_lpage += count;
|
||||
WARN_ON(linfo->disallow_lpage < 0);
|
||||
WARN_ON_ONCE(linfo->disallow_lpage < 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -835,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
||||
/* the non-leaf shadow pages are keeping readonly. */
|
||||
if (sp->role.level > PG_LEVEL_4K)
|
||||
return kvm_slot_page_track_add_page(kvm, slot, gfn,
|
||||
KVM_PAGE_TRACK_WRITE);
|
||||
return __kvm_write_track_add_gfn(kvm, slot, gfn);
|
||||
|
||||
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
||||
|
||||
@ -882,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, gfn);
|
||||
if (sp->role.level > PG_LEVEL_4K)
|
||||
return kvm_slot_page_track_remove_page(kvm, slot, gfn,
|
||||
KVM_PAGE_TRACK_WRITE);
|
||||
return __kvm_write_track_remove_gfn(kvm, slot, gfn);
|
||||
|
||||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
@ -937,10 +931,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
|
||||
int count = 0;
|
||||
|
||||
if (!rmap_head->val) {
|
||||
rmap_printk("%p %llx 0->1\n", spte, *spte);
|
||||
rmap_head->val = (unsigned long)spte;
|
||||
} else if (!(rmap_head->val & 1)) {
|
||||
rmap_printk("%p %llx 1->many\n", spte, *spte);
|
||||
desc = kvm_mmu_memory_cache_alloc(cache);
|
||||
desc->sptes[0] = (u64 *)rmap_head->val;
|
||||
desc->sptes[1] = spte;
|
||||
@ -949,7 +941,6 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
|
||||
rmap_head->val = (unsigned long)desc | 1;
|
||||
++count;
|
||||
} else {
|
||||
rmap_printk("%p %llx many->many\n", spte, *spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
count = desc->tail_count + desc->spte_count;
|
||||
|
||||
@ -969,7 +960,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
|
||||
return count;
|
||||
}
|
||||
|
||||
static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
|
||||
static void pte_list_desc_remove_entry(struct kvm *kvm,
|
||||
struct kvm_rmap_head *rmap_head,
|
||||
struct pte_list_desc *desc, int i)
|
||||
{
|
||||
struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
@ -980,7 +972,7 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
|
||||
* when adding an entry and the previous head is full, and heads are
|
||||
* removed (this flow) when they become empty.
|
||||
*/
|
||||
BUG_ON(j < 0);
|
||||
KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
|
||||
|
||||
/*
|
||||
* Replace the to-be-freed SPTE with the last valid entry from the head
|
||||
@ -1005,35 +997,34 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
|
||||
mmu_free_pte_list_desc(head_desc);
|
||||
}
|
||||
|
||||
static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
|
||||
static void pte_list_remove(struct kvm *kvm, u64 *spte,
|
||||
struct kvm_rmap_head *rmap_head)
|
||||
{
|
||||
struct pte_list_desc *desc;
|
||||
int i;
|
||||
|
||||
if (!rmap_head->val) {
|
||||
pr_err("%s: %p 0->BUG\n", __func__, spte);
|
||||
BUG();
|
||||
} else if (!(rmap_head->val & 1)) {
|
||||
rmap_printk("%p 1->0\n", spte);
|
||||
if ((u64 *)rmap_head->val != spte) {
|
||||
pr_err("%s: %p 1->BUG\n", __func__, spte);
|
||||
BUG();
|
||||
}
|
||||
if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
|
||||
return;
|
||||
|
||||
if (!(rmap_head->val & 1)) {
|
||||
if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
|
||||
return;
|
||||
|
||||
rmap_head->val = 0;
|
||||
} else {
|
||||
rmap_printk("%p many->many\n", spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
while (desc) {
|
||||
for (i = 0; i < desc->spte_count; ++i) {
|
||||
if (desc->sptes[i] == spte) {
|
||||
pte_list_desc_remove_entry(rmap_head, desc, i);
|
||||
pte_list_desc_remove_entry(kvm, rmap_head,
|
||||
desc, i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
desc = desc->more;
|
||||
}
|
||||
pr_err("%s: %p many->many\n", __func__, spte);
|
||||
BUG();
|
||||
|
||||
KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1041,7 +1032,7 @@ static void kvm_zap_one_rmap_spte(struct kvm *kvm,
|
||||
struct kvm_rmap_head *rmap_head, u64 *sptep)
|
||||
{
|
||||
mmu_spte_clear_track_bits(kvm, sptep);
|
||||
pte_list_remove(sptep, rmap_head);
|
||||
pte_list_remove(kvm, sptep, rmap_head);
|
||||
}
|
||||
|
||||
/* Return true if at least one SPTE was zapped, false otherwise */
|
||||
@ -1116,7 +1107,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
|
||||
slot = __gfn_to_memslot(slots, gfn);
|
||||
rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
|
||||
|
||||
pte_list_remove(spte, rmap_head);
|
||||
pte_list_remove(kvm, spte, rmap_head);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1208,7 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
sp = sptep_to_sp(sptep);
|
||||
WARN_ON(sp->role.level == PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
|
||||
|
||||
drop_spte(kvm, sptep);
|
||||
|
||||
@ -1237,8 +1228,6 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
|
||||
!(pt_protect && is_mmu_writable_spte(spte)))
|
||||
return false;
|
||||
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
if (pt_protect)
|
||||
spte &= ~shadow_mmu_writable_mask;
|
||||
spte = spte & ~PT_WRITABLE_MASK;
|
||||
@ -1263,9 +1252,7 @@ static bool spte_clear_dirty(u64 *sptep)
|
||||
{
|
||||
u64 spte = *sptep;
|
||||
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
MMU_WARN_ON(!spte_ad_enabled(spte));
|
||||
KVM_MMU_WARN_ON(!spte_ad_enabled(spte));
|
||||
spte &= ~shadow_dirty_mask;
|
||||
return mmu_spte_update(sptep, spte);
|
||||
}
|
||||
@ -1471,14 +1458,11 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||
u64 new_spte;
|
||||
kvm_pfn_t new_pfn;
|
||||
|
||||
WARN_ON(pte_huge(pte));
|
||||
WARN_ON_ONCE(pte_huge(pte));
|
||||
new_pfn = pte_pfn(pte);
|
||||
|
||||
restart:
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
rmap_printk("spte %p %llx gfn %llx (%d)\n",
|
||||
sptep, *sptep, gfn, level);
|
||||
|
||||
need_flush = true;
|
||||
|
||||
if (pte_write(pte)) {
|
||||
@ -1706,21 +1690,19 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
return young;
|
||||
}
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
static int is_empty_shadow_page(u64 *spt)
|
||||
static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
|
||||
{
|
||||
u64 *pos;
|
||||
u64 *end;
|
||||
#ifdef CONFIG_KVM_PROVE_MMU
|
||||
int i;
|
||||
|
||||
for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
|
||||
if (is_shadow_present_pte(*pos)) {
|
||||
printk(KERN_ERR "%s: %p %llx\n", __func__,
|
||||
pos, *pos);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
|
||||
if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i])))
|
||||
pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
|
||||
sp->spt[i], &sp->spt[i],
|
||||
kvm_mmu_page_get_gfn(sp, i));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This value is the sum of all of the kvm instances's
|
||||
@ -1748,7 +1730,8 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
||||
static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
|
||||
kvm_mmu_check_sptes_at_free(sp);
|
||||
|
||||
hlist_del(&sp->hash_link);
|
||||
list_del(&sp->link);
|
||||
free_page((unsigned long)sp->spt);
|
||||
@ -1771,16 +1754,16 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
|
||||
pte_list_add(cache, parent_pte, &sp->parent_ptes);
|
||||
}
|
||||
|
||||
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
|
||||
static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
u64 *parent_pte)
|
||||
{
|
||||
pte_list_remove(parent_pte, &sp->parent_ptes);
|
||||
pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
|
||||
}
|
||||
|
||||
static void drop_parent_pte(struct kvm_mmu_page *sp,
|
||||
static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
u64 *parent_pte)
|
||||
{
|
||||
mmu_page_remove_parent_pte(sp, parent_pte);
|
||||
mmu_page_remove_parent_pte(kvm, sp, parent_pte);
|
||||
mmu_spte_clear_no_track(parent_pte);
|
||||
}
|
||||
|
||||
@ -1836,7 +1819,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
|
||||
static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
|
||||
{
|
||||
--sp->unsync_children;
|
||||
WARN_ON((int)sp->unsync_children < 0);
|
||||
WARN_ON_ONCE((int)sp->unsync_children < 0);
|
||||
__clear_bit(idx, sp->unsync_child_bitmap);
|
||||
}
|
||||
|
||||
@ -1894,7 +1877,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
|
||||
|
||||
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
WARN_ON(!sp->unsync);
|
||||
WARN_ON_ONCE(!sp->unsync);
|
||||
trace_kvm_mmu_sync_page(sp);
|
||||
sp->unsync = 0;
|
||||
--kvm->stat.mmu_unsync;
|
||||
@ -2069,11 +2052,11 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
|
||||
if (pvec->nr == 0)
|
||||
return 0;
|
||||
|
||||
WARN_ON(pvec->page[0].idx != INVALID_INDEX);
|
||||
WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
|
||||
|
||||
sp = pvec->page[0].sp;
|
||||
level = sp->role.level;
|
||||
WARN_ON(level == PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(level == PG_LEVEL_4K);
|
||||
|
||||
parents->parent[level-2] = sp;
|
||||
|
||||
@ -2095,7 +2078,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
|
||||
if (!sp)
|
||||
return;
|
||||
|
||||
WARN_ON(idx == INVALID_INDEX);
|
||||
WARN_ON_ONCE(idx == INVALID_INDEX);
|
||||
clear_unsync_child_bit(sp, idx);
|
||||
level++;
|
||||
} while (!sp->unsync_children);
|
||||
@ -2216,7 +2199,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
WARN_ON(!list_empty(&invalid_list));
|
||||
WARN_ON_ONCE(!list_empty(&invalid_list));
|
||||
if (ret > 0)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
@ -2495,7 +2478,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
if (child->role.access == direct_access)
|
||||
return;
|
||||
|
||||
drop_parent_pte(child, sptep);
|
||||
drop_parent_pte(vcpu->kvm, child, sptep);
|
||||
kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
|
||||
}
|
||||
}
|
||||
@ -2513,7 +2496,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
drop_spte(kvm, spte);
|
||||
} else {
|
||||
child = spte_to_child_sp(pte);
|
||||
drop_parent_pte(child, spte);
|
||||
drop_parent_pte(kvm, child, spte);
|
||||
|
||||
/*
|
||||
* Recursively zap nested TDP SPs, parentless SPs are
|
||||
@ -2544,13 +2527,13 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm,
|
||||
return zapped;
|
||||
}
|
||||
|
||||
static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
|
||||
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
|
||||
while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
|
||||
drop_parent_pte(sp, sptep);
|
||||
drop_parent_pte(kvm, sp, sptep);
|
||||
}
|
||||
|
||||
static int mmu_zap_unsync_children(struct kvm *kvm,
|
||||
@ -2589,7 +2572,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
|
||||
++kvm->stat.mmu_shadow_zapped;
|
||||
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
|
||||
*nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
|
||||
kvm_mmu_unlink_parents(sp);
|
||||
kvm_mmu_unlink_parents(kvm, sp);
|
||||
|
||||
/* Zapping children means active_mmu_pages has become unstable. */
|
||||
list_unstable = *nr_zapped;
|
||||
@ -2671,7 +2654,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
list_for_each_entry_safe(sp, nsp, invalid_list, link) {
|
||||
WARN_ON(!sp->role.invalid || sp->root_count);
|
||||
WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
|
||||
kvm_mmu_free_shadow_page(sp);
|
||||
}
|
||||
}
|
||||
@ -2771,12 +2754,9 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
LIST_HEAD(invalid_list);
|
||||
int r;
|
||||
|
||||
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
|
||||
r = 0;
|
||||
write_lock(&kvm->mmu_lock);
|
||||
for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
|
||||
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
|
||||
sp->role.word);
|
||||
r = 1;
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
}
|
||||
@ -2827,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
|
||||
* track machinery is used to write-protect upper-level shadow pages,
|
||||
* i.e. this guards the role.level == 4K assertion below!
|
||||
*/
|
||||
if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
|
||||
if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
@ -2869,7 +2849,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
|
||||
continue;
|
||||
}
|
||||
|
||||
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
|
||||
kvm_unsync_page(kvm, sp);
|
||||
}
|
||||
if (locked)
|
||||
@ -2934,9 +2914,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
bool prefetch = !fault || fault->prefetch;
|
||||
bool write_fault = fault && fault->write;
|
||||
|
||||
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
|
||||
*sptep, write_fault, gfn);
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
vcpu->stat.pf_mmio_spte_created++;
|
||||
mark_mmio_spte(vcpu, sptep, gfn, pte_access);
|
||||
@ -2953,11 +2930,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
u64 pte = *sptep;
|
||||
|
||||
child = spte_to_child_sp(pte);
|
||||
drop_parent_pte(child, sptep);
|
||||
drop_parent_pte(vcpu->kvm, child, sptep);
|
||||
flush = true;
|
||||
} else if (pfn != spte_to_pfn(*sptep)) {
|
||||
pgprintk("hfn old %llx new %llx\n",
|
||||
spte_to_pfn(*sptep), pfn);
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
flush = true;
|
||||
} else
|
||||
@ -2982,8 +2957,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
|
||||
|
||||
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
|
||||
|
||||
if (!was_rmapped) {
|
||||
WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
|
||||
rmap_add(vcpu, slot, sptep, gfn, pte_access);
|
||||
@ -3029,7 +3002,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
|
||||
u64 *spte, *start = NULL;
|
||||
int i;
|
||||
|
||||
WARN_ON(!sp->role.direct);
|
||||
WARN_ON_ONCE(!sp->role.direct);
|
||||
|
||||
i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
|
||||
spte = sp->spt + i;
|
||||
@ -3570,12 +3543,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||
if (!VALID_PAGE(*root_hpa))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The "root" may be a special root, e.g. a PAE entry, treat it as a
|
||||
* SPTE to ensure any non-PA bits are dropped.
|
||||
*/
|
||||
sp = spte_to_child_sp(*root_hpa);
|
||||
if (WARN_ON(!sp))
|
||||
sp = root_to_sp(*root_hpa);
|
||||
if (WARN_ON_ONCE(!sp))
|
||||
return;
|
||||
|
||||
if (is_tdp_mmu_page(sp))
|
||||
@ -3620,7 +3589,9 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
|
||||
&invalid_list);
|
||||
|
||||
if (free_active_root) {
|
||||
if (to_shadow_page(mmu->root.hpa)) {
|
||||
if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
|
||||
/* Nothing to cleanup for dummy roots. */
|
||||
} else if (root_to_sp(mmu->root.hpa)) {
|
||||
mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
|
||||
} else if (mmu->pae_root) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -3644,6 +3615,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
|
||||
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
|
||||
{
|
||||
unsigned long roots_to_free = 0;
|
||||
struct kvm_mmu_page *sp;
|
||||
hpa_t root_hpa;
|
||||
int i;
|
||||
|
||||
@ -3658,8 +3630,8 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
|
||||
if (!VALID_PAGE(root_hpa))
|
||||
continue;
|
||||
|
||||
if (!to_shadow_page(root_hpa) ||
|
||||
to_shadow_page(root_hpa)->role.guest_mode)
|
||||
sp = root_to_sp(root_hpa);
|
||||
if (!sp || sp->role.guest_mode)
|
||||
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
|
||||
}
|
||||
|
||||
@ -3667,19 +3639,6 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
|
||||
|
||||
|
||||
static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
|
||||
u8 level)
|
||||
{
|
||||
@ -3817,8 +3776,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
|
||||
root_gfn = root_pgd >> PAGE_SHIFT;
|
||||
|
||||
if (mmu_check_root(vcpu, root_gfn))
|
||||
return 1;
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
|
||||
mmu->root.hpa = kvm_mmu_get_dummy_root();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* On SVM, reading PDPTRs might access guest memory, which might fault
|
||||
@ -3830,8 +3791,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
if (!(pdptrs[i] & PT_PRESENT_MASK))
|
||||
continue;
|
||||
|
||||
if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT))
|
||||
return 1;
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
|
||||
pdptrs[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3998,7 +3959,7 @@ static bool is_unsync_root(hpa_t root)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!VALID_PAGE(root))
|
||||
if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -4014,7 +3975,7 @@ static bool is_unsync_root(hpa_t root)
|
||||
* requirement isn't satisfied.
|
||||
*/
|
||||
smp_rmb();
|
||||
sp = to_shadow_page(root);
|
||||
sp = root_to_sp(root);
|
||||
|
||||
/*
|
||||
* PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
|
||||
@ -4044,11 +4005,12 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu->root.hpa;
|
||||
sp = to_shadow_page(root);
|
||||
|
||||
if (!is_unsync_root(root))
|
||||
return;
|
||||
|
||||
sp = root_to_sp(root);
|
||||
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
mmu_sync_children(vcpu, sp, true);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
@ -4190,7 +4152,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
reserved = get_mmio_spte(vcpu, addr, &spte);
|
||||
if (WARN_ON(reserved))
|
||||
if (WARN_ON_ONCE(reserved))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_mmio_spte(spte)) {
|
||||
@ -4228,7 +4190,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
|
||||
* guest is writing the page which is write tracked which can
|
||||
* not be fixed by page fault handler.
|
||||
*/
|
||||
if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
|
||||
if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -4378,7 +4340,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
|
||||
struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
|
||||
|
||||
/* Special roots, e.g. pae_root, are not backed by shadow pages. */
|
||||
if (sp && is_obsolete_sp(vcpu->kvm, sp))
|
||||
@ -4403,6 +4365,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
{
|
||||
int r;
|
||||
|
||||
/* Dummy roots are used only for shadowing bad guest roots. */
|
||||
if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
if (page_fault_handle_page_track(vcpu, fault))
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
@ -4439,8 +4405,6 @@ out_unlock:
|
||||
static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
|
||||
|
||||
/* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
|
||||
fault->max_level = PG_LEVEL_2M;
|
||||
return direct_page_fault(vcpu, fault);
|
||||
@ -4558,9 +4522,19 @@ static void nonpaging_init_context(struct kvm_mmu *context)
|
||||
static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
|
||||
union kvm_mmu_page_role role)
|
||||
{
|
||||
return (role.direct || pgd == root->pgd) &&
|
||||
VALID_PAGE(root->hpa) &&
|
||||
role.word == to_shadow_page(root->hpa)->role.word;
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!VALID_PAGE(root->hpa))
|
||||
return false;
|
||||
|
||||
if (!role.direct && pgd != root->pgd)
|
||||
return false;
|
||||
|
||||
sp = root_to_sp(root->hpa);
|
||||
if (WARN_ON_ONCE(!sp))
|
||||
return false;
|
||||
|
||||
return role.word == sp->role.word;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4630,11 +4604,10 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu,
|
||||
gpa_t new_pgd, union kvm_mmu_page_role new_role)
|
||||
{
|
||||
/*
|
||||
* For now, limit the caching to 64-bit hosts+VMs in order to avoid
|
||||
* having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
|
||||
* later if necessary.
|
||||
* Limit reuse to 64-bit hosts+VMs without "special" roots in order to
|
||||
* avoid having to deal with PDPTEs and other complexities.
|
||||
*/
|
||||
if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
|
||||
if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
|
||||
kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
|
||||
|
||||
if (VALID_PAGE(mmu->root.hpa))
|
||||
@ -4680,9 +4653,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
|
||||
* If this is a direct root page, it doesn't have a write flooding
|
||||
* count. Otherwise, clear the write flooding count.
|
||||
*/
|
||||
if (!new_role.direct)
|
||||
__clear_sp_write_flooding_count(
|
||||
to_shadow_page(vcpu->arch.mmu->root.hpa));
|
||||
if (!new_role.direct) {
|
||||
struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
|
||||
|
||||
if (!WARN_ON_ONCE(!sp))
|
||||
__clear_sp_write_flooding_count(sp);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
|
||||
|
||||
@ -5449,8 +5425,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
* physical address properties) in a single VM would require tracking
|
||||
* all relevant CPUID information in kvm_mmu_page_role. That is very
|
||||
* undesirable as it would increase the memory requirements for
|
||||
* gfn_track (see struct kvm_mmu_page_role comments). For now that
|
||||
* problem is swept under the rug; KVM's CPUID API is horrific and
|
||||
* gfn_write_track (see struct kvm_mmu_page_role comments). For now
|
||||
* that problem is swept under the rug; KVM's CPUID API is horrific and
|
||||
* it's all but impossible to solve it without introducing a new API.
|
||||
*/
|
||||
vcpu->arch.root_mmu.root_role.word = 0;
|
||||
@ -5513,9 +5489,9 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
|
||||
WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
|
||||
WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
|
||||
kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
|
||||
WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
|
||||
WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
|
||||
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
|
||||
}
|
||||
|
||||
@ -5528,16 +5504,21 @@ static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
|
||||
|
||||
/*
|
||||
* When freeing obsolete roots, treat roots as obsolete if they don't
|
||||
* have an associated shadow page. This does mean KVM will get false
|
||||
* have an associated shadow page, as it's impossible to determine if
|
||||
* such roots are fresh or stale. This does mean KVM will get false
|
||||
* positives and free roots that don't strictly need to be freed, but
|
||||
* such false positives are relatively rare:
|
||||
*
|
||||
* (a) only PAE paging and nested NPT has roots without shadow pages
|
||||
* (a) only PAE paging and nested NPT have roots without shadow pages
|
||||
* (or any shadow paging flavor with a dummy root, see note below)
|
||||
* (b) remote reloads due to a memslot update obsoletes _all_ roots
|
||||
* (c) KVM doesn't track previous roots for PAE paging, and the guest
|
||||
* is unlikely to zap an in-use PGD.
|
||||
*
|
||||
* Note! Dummy roots are unique in that they are obsoleted by memslot
|
||||
* _creation_! See also FNAME(fetch).
|
||||
*/
|
||||
sp = to_shadow_page(root_hpa);
|
||||
sp = root_to_sp(root_hpa);
|
||||
return !sp || is_obsolete_sp(kvm, sp);
|
||||
}
|
||||
|
||||
@ -5616,9 +5597,6 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
|
||||
{
|
||||
unsigned offset, pte_size, misaligned;
|
||||
|
||||
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
|
||||
gpa, bytes, sp->role.word);
|
||||
|
||||
offset = offset_in_page(gpa);
|
||||
pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
|
||||
|
||||
@ -5666,9 +5644,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
|
||||
return spte;
|
||||
}
|
||||
|
||||
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *new, int bytes,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct kvm_mmu_page *sp;
|
||||
@ -5684,8 +5661,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
|
||||
return;
|
||||
|
||||
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
|
||||
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
|
||||
@ -5724,7 +5699,18 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
|
||||
int r, emulation_type = EMULTYPE_PF;
|
||||
bool direct = vcpu->arch.mmu->root_role.direct;
|
||||
|
||||
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
/*
|
||||
* IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
|
||||
* checks when emulating instructions that triggers implicit access.
|
||||
* WARN if hardware generates a fault with an error code that collides
|
||||
* with the KVM-defined value. Clear the flag and continue on, i.e.
|
||||
* don't terminate the VM, as KVM can't possibly be relying on a flag
|
||||
* that KVM doesn't know about.
|
||||
*/
|
||||
if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
|
||||
error_code &= ~PFERR_IMPLICIT_ACCESS;
|
||||
|
||||
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
r = RET_PF_INVALID;
|
||||
@ -6081,7 +6067,7 @@ restart:
|
||||
* pages. Skip the bogus page, otherwise we'll get stuck in an
|
||||
* infinite loop if the page gets put back on the list (again).
|
||||
*/
|
||||
if (WARN_ON(sp->role.invalid))
|
||||
if (WARN_ON_ONCE(sp->role.invalid))
|
||||
continue;
|
||||
|
||||
/*
|
||||
@ -6181,16 +6167,8 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
||||
}
|
||||
|
||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
kvm_mmu_zap_all_fast(kvm);
|
||||
}
|
||||
|
||||
int kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
@ -6204,10 +6182,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
|
||||
return r;
|
||||
}
|
||||
|
||||
node->track_write = kvm_mmu_pte_write;
|
||||
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
|
||||
kvm_page_track_register_notifier(kvm, node);
|
||||
|
||||
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
|
||||
kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
|
||||
|
||||
@ -6228,10 +6202,6 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
|
||||
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
kvm_page_track_unregister_notifier(kvm, node);
|
||||
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_mmu_uninit_tdp_mmu(kvm);
|
||||
|
||||
@ -6700,7 +6670,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
*/
|
||||
}
|
||||
|
||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
static void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_mmu_page *sp, *node;
|
||||
LIST_HEAD(invalid_list);
|
||||
@ -6709,7 +6679,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
write_lock(&kvm->mmu_lock);
|
||||
restart:
|
||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
||||
if (WARN_ON(sp->role.invalid))
|
||||
if (WARN_ON_ONCE(sp->role.invalid))
|
||||
continue;
|
||||
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
|
||||
goto restart;
|
||||
@ -6725,9 +6695,20 @@ restart:
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_mmu_zap_all(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_mmu_zap_all_fast(kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||
{
|
||||
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
||||
WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
||||
|
||||
gen &= MMIO_SPTE_GEN_MASK;
|
||||
|
||||
|
@ -6,18 +6,10 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#undef MMU_DEBUG
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
extern bool dbg;
|
||||
|
||||
#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
|
||||
#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
|
||||
#define MMU_WARN_ON(x) WARN_ON(x)
|
||||
#ifdef CONFIG_KVM_PROVE_MMU
|
||||
#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
|
||||
#else
|
||||
#define pgprintk(x...) do { } while (0)
|
||||
#define rmap_printk(x...) do { } while (0)
|
||||
#define MMU_WARN_ON(x) do { } while (0)
|
||||
#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x)
|
||||
#endif
|
||||
|
||||
/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
|
||||
@ -44,6 +36,16 @@ extern bool dbg;
|
||||
#define INVALID_PAE_ROOT 0
|
||||
#define IS_VALID_PAE_ROOT(x) (!!(x))
|
||||
|
||||
static inline hpa_t kvm_mmu_get_dummy_root(void)
|
||||
{
|
||||
return my_zero_pfn(0) << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
|
||||
{
|
||||
return is_zero_pfn(shadow_page >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
typedef u64 __rcu *tdp_ptep_t;
|
||||
|
||||
struct kvm_mmu_page {
|
||||
|
@ -12,13 +12,13 @@
|
||||
*/
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/rculist.h>
|
||||
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
#include "page_track.h"
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
||||
{
|
||||
@ -28,103 +28,64 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
||||
{
|
||||
int i;
|
||||
kvfree(slot->arch.gfn_write_track);
|
||||
slot->arch.gfn_write_track = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
|
||||
kvfree(slot->arch.gfn_track[i]);
|
||||
slot->arch.gfn_track[i] = NULL;
|
||||
}
|
||||
static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
const size_t size = sizeof(*slot->arch.gfn_write_track);
|
||||
|
||||
if (!slot->arch.gfn_write_track)
|
||||
slot->arch.gfn_write_track = __vcalloc(npages, size,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
return slot->arch.gfn_write_track ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
int i;
|
||||
if (!kvm_page_track_write_tracking_enabled(kvm))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
|
||||
if (i == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm))
|
||||
continue;
|
||||
|
||||
slot->arch.gfn_track[i] =
|
||||
__vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!slot->arch.gfn_track[i])
|
||||
goto track_free;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
track_free:
|
||||
kvm_page_track_free_memslot(slot);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
|
||||
{
|
||||
if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return __kvm_page_track_write_tracking_alloc(slot, npages);
|
||||
}
|
||||
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
|
||||
{
|
||||
unsigned short *gfn_track;
|
||||
|
||||
if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
|
||||
return 0;
|
||||
|
||||
gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (gfn_track == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
|
||||
return 0;
|
||||
return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
|
||||
}
|
||||
|
||||
static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode, short count)
|
||||
static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
short count)
|
||||
{
|
||||
int index, val;
|
||||
|
||||
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
||||
|
||||
val = slot->arch.gfn_track[mode][index];
|
||||
val = slot->arch.gfn_write_track[index];
|
||||
|
||||
if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
|
||||
if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
|
||||
return;
|
||||
|
||||
slot->arch.gfn_track[mode][index] += count;
|
||||
slot->arch.gfn_write_track[index] += count;
|
||||
}
|
||||
|
||||
/*
|
||||
* add guest page to the tracking pool so that corresponding access on that
|
||||
* page will be intercepted.
|
||||
*
|
||||
* It should be called under the protection both of mmu-lock and kvm->srcu
|
||||
* or kvm->slots_lock.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @slot: the @gfn belongs to.
|
||||
* @gfn: the guest page.
|
||||
* @mode: tracking mode, currently only write track is supported.
|
||||
*/
|
||||
void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode)
|
||||
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
|
||||
srcu_read_lock_held(&kvm->srcu));
|
||||
|
||||
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
|
||||
return;
|
||||
|
||||
if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm)))
|
||||
return;
|
||||
|
||||
update_gfn_track(slot, gfn, mode, 1);
|
||||
update_gfn_write_track(slot, gfn, 1);
|
||||
|
||||
/*
|
||||
* new track stops large page mapping for the
|
||||
@ -132,37 +93,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
*/
|
||||
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
||||
|
||||
if (mode == KVM_PAGE_TRACK_WRITE)
|
||||
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
|
||||
|
||||
/*
|
||||
* remove the guest page from the tracking pool which stops the interception
|
||||
* of corresponding access on that page. It is the opposed operation of
|
||||
* kvm_slot_page_track_add_page().
|
||||
*
|
||||
* It should be called under the protection both of mmu-lock and kvm->srcu
|
||||
* or kvm->slots_lock.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @slot: the @gfn belongs to.
|
||||
* @gfn: the guest page.
|
||||
* @mode: tracking mode, currently only write track is supported.
|
||||
*/
|
||||
void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode)
|
||||
void __kvm_write_track_remove_gfn(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
|
||||
srcu_read_lock_held(&kvm->srcu));
|
||||
|
||||
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
|
||||
return;
|
||||
|
||||
if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm)))
|
||||
return;
|
||||
|
||||
update_gfn_track(slot, gfn, mode, -1);
|
||||
update_gfn_write_track(slot, gfn, -1);
|
||||
|
||||
/*
|
||||
* allow large page mapping for the tracked page
|
||||
@ -170,31 +116,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
*/
|
||||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
|
||||
|
||||
/*
|
||||
* check if the corresponding access on the specified guest page is tracked.
|
||||
*/
|
||||
bool kvm_slot_page_track_is_active(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, enum kvm_page_track_mode mode)
|
||||
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
int index;
|
||||
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
return false;
|
||||
|
||||
if (!slot)
|
||||
return false;
|
||||
|
||||
if (mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm))
|
||||
if (!kvm_page_track_write_tracking_enabled(kvm))
|
||||
return false;
|
||||
|
||||
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
||||
return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
|
||||
return !!READ_ONCE(slot->arch.gfn_write_track[index]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
void kvm_page_track_cleanup(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
@ -216,17 +157,22 @@ int kvm_page_track_init(struct kvm *kvm)
|
||||
* register the notifier so that event interception for the tracked guest
|
||||
* pages can be received.
|
||||
*/
|
||||
void
|
||||
kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
int kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
|
||||
if (!kvm || kvm->mm != current->mm)
|
||||
return -ESRCH;
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
||||
|
||||
@ -234,9 +180,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
||||
* stop receiving the event interception. It is the opposed operation of
|
||||
* kvm_page_track_register_notifier().
|
||||
*/
|
||||
void
|
||||
kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
void kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
|
||||
@ -246,6 +191,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
hlist_del_rcu(&n->node);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
synchronize_srcu(&head->track_srcu);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
||||
|
||||
@ -256,34 +203,7 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
||||
* The node should figure out if the written page is the one that node is
|
||||
* interested in by itself.
|
||||
*/
|
||||
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
int idx;
|
||||
|
||||
head = &vcpu->kvm->arch.track_notifier_head;
|
||||
|
||||
if (hlist_empty(&head->track_notifier_list))
|
||||
return;
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_write)
|
||||
n->track_write(vcpu, gpa, new, bytes, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify the node that memory slot is being removed or moved so that it can
|
||||
* drop write-protection for the pages in the memory slot.
|
||||
*
|
||||
* The node should figure out it has any write-protected pages in this slot
|
||||
* by itself.
|
||||
*/
|
||||
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
@ -296,8 +216,92 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_flush_slot)
|
||||
n->track_flush_slot(kvm, slot, n);
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_write)
|
||||
n->track_write(gpa, new, bytes, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify external page track nodes that a memory region is being removed from
|
||||
* the VM, e.g. so that users can free any associated metadata.
|
||||
*/
|
||||
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
int idx;
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
if (hlist_empty(&head->track_notifier_list))
|
||||
return;
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_remove_region)
|
||||
n->track_remove_region(slot->base_gfn, slot->npages, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* add guest page to the tracking pool so that corresponding access on that
|
||||
* page will be intercepted.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @gfn: the guest page.
|
||||
*/
|
||||
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
__kvm_write_track_add_gfn(kvm, slot, gfn);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
|
||||
|
||||
/*
|
||||
* remove the guest page from the tracking pool which stops the interception
|
||||
* of corresponding access on that page.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @gfn: the guest page.
|
||||
*/
|
||||
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
__kvm_write_track_remove_gfn(kvm, slot, gfn);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
|
||||
#endif
|
||||
|
58
arch/x86/kvm/mmu/page_track.h
Normal file
58
arch/x86/kvm/mmu/page_track.h
Normal file
@ -0,0 +1,58 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_PAGE_TRACK_H
|
||||
#define __KVM_X86_PAGE_TRACK_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
|
||||
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn);
|
||||
void __kvm_write_track_remove_gfn(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
|
||||
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
int kvm_page_track_init(struct kvm *kvm);
|
||||
void kvm_page_track_cleanup(struct kvm *kvm);
|
||||
|
||||
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes);
|
||||
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
|
||||
|
||||
static inline bool kvm_page_track_has_external_user(struct kvm *kvm)
|
||||
{
|
||||
return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
|
||||
}
|
||||
#else
|
||||
static inline int kvm_page_track_init(struct kvm *kvm) { return 0; }
|
||||
static inline void kvm_page_track_cleanup(struct kvm *kvm) { }
|
||||
|
||||
static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa,
|
||||
const u8 *new, int bytes) { }
|
||||
static inline void kvm_page_track_delete_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) { }
|
||||
|
||||
static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; }
|
||||
|
||||
#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
|
||||
|
||||
static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *new, int bytes)
|
||||
{
|
||||
__kvm_page_track_write(vcpu->kvm, gpa, new, bytes);
|
||||
|
||||
kvm_mmu_track_write(vcpu, gpa, new, bytes);
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_PAGE_TRACK_H */
|
@ -338,7 +338,6 @@ retry_walk:
|
||||
}
|
||||
#endif
|
||||
walker->max_level = walker->level;
|
||||
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
|
||||
|
||||
/*
|
||||
* FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
|
||||
@ -348,9 +347,21 @@ retry_walk:
|
||||
nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
|
||||
|
||||
pte_access = ~0;
|
||||
|
||||
/*
|
||||
* Queue a page fault for injection if this assertion fails, as callers
|
||||
* assume that walker.fault contains sane info on a walk failure. I.e.
|
||||
* avoid making the situation worse by inducing even worse badness
|
||||
* between when the assertion fails and when KVM kicks the vCPU out to
|
||||
* userspace (because the VM is bugged).
|
||||
*/
|
||||
if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm))
|
||||
goto error;
|
||||
|
||||
++walker->level;
|
||||
|
||||
do {
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long host_addr;
|
||||
|
||||
pt_access = pte_access;
|
||||
@ -381,7 +392,11 @@ retry_walk:
|
||||
if (unlikely(real_gpa == INVALID_GPA))
|
||||
return 0;
|
||||
|
||||
host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa));
|
||||
if (!kvm_is_visible_memslot(slot))
|
||||
goto error;
|
||||
|
||||
host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa),
|
||||
&walker->pte_writable[walker->level - 1]);
|
||||
if (unlikely(kvm_is_error_hva(host_addr)))
|
||||
goto error;
|
||||
@ -456,9 +471,6 @@ retry_walk:
|
||||
goto retry_walk;
|
||||
}
|
||||
|
||||
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
|
||||
__func__, (u64)pte, walker->pte_access,
|
||||
walker->pt_access[walker->level - 1]);
|
||||
return 1;
|
||||
|
||||
error:
|
||||
@ -529,8 +541,6 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
return false;
|
||||
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
|
||||
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
|
||||
@ -638,9 +648,20 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
if (FNAME(gpte_changed)(vcpu, gw, top_level))
|
||||
goto out_gpte_changed;
|
||||
|
||||
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
goto out_gpte_changed;
|
||||
|
||||
/*
|
||||
* Load a new root and retry the faulting instruction in the extremely
|
||||
* unlikely scenario that the guest root gfn became visible between
|
||||
* loading a dummy root and handling the resulting page fault, e.g. if
|
||||
* userspace create a memslot in the interim.
|
||||
*/
|
||||
if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
|
||||
kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
|
||||
goto out_gpte_changed;
|
||||
}
|
||||
|
||||
for_each_shadow_entry(vcpu, fault->addr, it) {
|
||||
gfn_t table_gfn;
|
||||
|
||||
@ -758,7 +779,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
struct guest_walker walker;
|
||||
int r;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
|
||||
WARN_ON_ONCE(fault->is_tdp);
|
||||
|
||||
/*
|
||||
@ -773,7 +793,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
* The page is not mapped by the guest. Let the guest handle it.
|
||||
*/
|
||||
if (!r) {
|
||||
pgprintk("%s: guest page fault\n", __func__);
|
||||
if (!fault->prefetch)
|
||||
kvm_inject_emulated_page_fault(vcpu, &walker.fault);
|
||||
|
||||
@ -837,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
|
||||
{
|
||||
int offset = 0;
|
||||
|
||||
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
|
||||
|
||||
if (PTTYPE == 32)
|
||||
offset = sp->role.quadrant << SPTE_LEVEL_BITS;
|
||||
|
@ -61,7 +61,7 @@ static u64 generation_mmio_spte_mask(u64 gen)
|
||||
{
|
||||
u64 mask;
|
||||
|
||||
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
|
||||
WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
|
||||
|
||||
mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
|
||||
mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
|
||||
@ -221,8 +221,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
* shadow pages and unsync'ing pages is not allowed.
|
||||
*/
|
||||
if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
|
||||
pgprintk("%s: found shadow page for %llx, marking ro\n",
|
||||
__func__, gfn);
|
||||
wrprot = true;
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
|
||||
@ -242,7 +240,7 @@ out:
|
||||
|
||||
if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
|
||||
/* Enforced by kvm_mmu_hugepage_adjust. */
|
||||
WARN_ON(level > PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(level > PG_LEVEL_4K);
|
||||
mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#ifndef KVM_X86_MMU_SPTE_H
|
||||
#define KVM_X86_MMU_SPTE_H
|
||||
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
|
||||
/*
|
||||
@ -236,6 +237,18 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
|
||||
return to_shadow_page(__pa(sptep));
|
||||
}
|
||||
|
||||
static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
|
||||
{
|
||||
if (kvm_mmu_is_dummy_root(root))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* The "root" may be a special root, e.g. a PAE entry, treat it as a
|
||||
* SPTE to ensure any non-PA bits are dropped.
|
||||
*/
|
||||
return spte_to_child_sp(root);
|
||||
}
|
||||
|
||||
static inline bool is_mmio_spte(u64 spte)
|
||||
{
|
||||
return (spte & shadow_mmio_mask) == shadow_mmio_value &&
|
||||
@ -265,13 +278,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
|
||||
|
||||
static inline bool spte_ad_enabled(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
|
||||
}
|
||||
|
||||
static inline bool spte_ad_need_write_protect(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
/*
|
||||
* This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
|
||||
* and non-TDP SPTEs will never set these bits. Optimize for 64-bit
|
||||
@ -282,13 +295,13 @@ static inline bool spte_ad_need_write_protect(u64 spte)
|
||||
|
||||
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
|
||||
}
|
||||
|
||||
static inline u64 spte_shadow_dirty_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
|
||||
}
|
||||
|
||||
|
@ -39,13 +39,14 @@ void tdp_iter_restart(struct tdp_iter *iter)
|
||||
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
|
||||
int min_level, gfn_t next_last_level_gfn)
|
||||
{
|
||||
int root_level = root->role.level;
|
||||
|
||||
WARN_ON(root_level < 1);
|
||||
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
|
||||
if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
|
||||
(root->role.level > PT64_ROOT_MAX_LEVEL))) {
|
||||
iter->valid = false;
|
||||
return;
|
||||
}
|
||||
|
||||
iter->next_last_level_gfn = next_last_level_gfn;
|
||||
iter->root_level = root_level;
|
||||
iter->root_level = root->role.level;
|
||||
iter->min_level = min_level;
|
||||
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
|
||||
iter->as_id = kvm_mmu_page_as_id(root);
|
||||
|
@ -475,9 +475,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
bool is_leaf = is_present && is_last_spte(new_spte, level);
|
||||
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
|
||||
|
||||
WARN_ON(level > PT64_ROOT_MAX_LEVEL);
|
||||
WARN_ON(level < PG_LEVEL_4K);
|
||||
WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
|
||||
WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
|
||||
WARN_ON_ONCE(level < PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
|
||||
|
||||
/*
|
||||
* If this warning were to trigger it would indicate that there was a
|
||||
@ -522,9 +522,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
* impact the guest since both the former and current SPTEs
|
||||
* are nonpresent.
|
||||
*/
|
||||
if (WARN_ON(!is_mmio_spte(old_spte) &&
|
||||
!is_mmio_spte(new_spte) &&
|
||||
!is_removed_spte(new_spte)))
|
||||
if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
|
||||
!is_mmio_spte(new_spte) &&
|
||||
!is_removed_spte(new_spte)))
|
||||
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
|
||||
"should not be replaced with another,\n"
|
||||
"different nonpresent SPTE, unless one or both\n"
|
||||
@ -661,7 +661,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
|
||||
* should be used. If operating under the MMU lock in write mode, the
|
||||
* use of the removed SPTE should not be necessary.
|
||||
*/
|
||||
WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
|
||||
WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
|
||||
|
||||
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
|
||||
|
||||
@ -689,7 +689,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
||||
else
|
||||
|
||||
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \
|
||||
for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
|
||||
for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
|
||||
|
||||
/*
|
||||
* Yield if the MMU lock is contended or this thread needs to return control
|
||||
@ -709,7 +709,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
bool flush, bool shared)
|
||||
{
|
||||
WARN_ON(iter->yielded);
|
||||
WARN_ON_ONCE(iter->yielded);
|
||||
|
||||
/* Ensure forward progress has been made before yielding. */
|
||||
if (iter->next_last_level_gfn == iter->yielded_gfn)
|
||||
@ -728,7 +728,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
WARN_ON(iter->gfn > iter->next_last_level_gfn);
|
||||
WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
|
||||
|
||||
iter->yielded = true;
|
||||
}
|
||||
@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
|
||||
u64 new_spte;
|
||||
|
||||
/* Huge pages aren't expected to be modified without first being zapped. */
|
||||
WARN_ON(pte_huge(range->arg.pte) || range->start + 1 != range->end);
|
||||
WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
|
||||
|
||||
if (iter->level != PG_LEVEL_4K ||
|
||||
!is_shadow_present_pte(iter->old_spte))
|
||||
@ -1548,8 +1548,8 @@ retry:
|
||||
if (!is_shadow_present_pte(iter.old_spte))
|
||||
continue;
|
||||
|
||||
MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
|
||||
if (!(iter.old_spte & dbit))
|
||||
continue;
|
||||
@ -1600,6 +1600,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
shadow_dirty_mask;
|
||||
struct tdp_iter iter;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
|
||||
@ -1607,8 +1609,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
if (!mask)
|
||||
break;
|
||||
|
||||
MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
|
||||
if (iter.level > PG_LEVEL_4K ||
|
||||
!(mask & (1UL << (iter.gfn - gfn))))
|
||||
@ -1646,7 +1648,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
for_each_tdp_mmu_root(kvm, root, slot->as_id)
|
||||
clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "tss.h"
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "kvm_emulate.h"
|
||||
#include "mmu/page_track.h"
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
#include "pmu.h"
|
||||
@ -12632,6 +12633,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
/*
|
||||
* KVM doesn't support moving memslots when there are external page
|
||||
* trackers attached to the VM, i.e. if KVMGT is in use.
|
||||
*/
|
||||
if (change == KVM_MR_MOVE && kvm_page_track_has_external_user(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
|
||||
if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
|
||||
return -EINVAL;
|
||||
@ -12786,6 +12794,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
if (change == KVM_MR_DELETE)
|
||||
kvm_page_track_delete_slot(kvm, old);
|
||||
|
||||
if (!kvm->arch.n_requested_mmu_pages &&
|
||||
(change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
|
||||
unsigned long nr_mmu_pages;
|
||||
@ -12802,17 +12813,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
kvm_arch_free_memslot(kvm, old);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_mmu_zap_all(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_page_track_flush_slot(kvm, slot);
|
||||
}
|
||||
|
||||
static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (is_guest_mode(vcpu) &&
|
||||
|
@ -49,22 +49,6 @@
|
||||
static bool enable_out_of_sync = false;
|
||||
static int preallocated_oos_pages = 8192;
|
||||
|
||||
static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
|
||||
{
|
||||
struct kvm *kvm = vgpu->vfio_device.kvm;
|
||||
int idx;
|
||||
bool ret;
|
||||
|
||||
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
|
||||
return false;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
ret = kvm_is_visible_gfn(kvm, gfn);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* validate a gm address and related range size,
|
||||
* translate it to host gm address
|
||||
@ -1161,31 +1145,6 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
|
||||
ops->set_pfn(se, s->shadow_page.mfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if can do 2M page
|
||||
* @vgpu: target vgpu
|
||||
* @entry: target pfn's gtt entry
|
||||
*
|
||||
* Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
|
||||
* negative if found err.
|
||||
*/
|
||||
static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
|
||||
struct intel_gvt_gtt_entry *entry)
|
||||
{
|
||||
const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
|
||||
kvm_pfn_t pfn;
|
||||
|
||||
if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
|
||||
return 0;
|
||||
|
||||
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
|
||||
return -EINVAL;
|
||||
pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return -EINVAL;
|
||||
return PageTransHuge(pfn_to_page(pfn));
|
||||
}
|
||||
|
||||
static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
|
||||
struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
|
||||
struct intel_gvt_gtt_entry *se)
|
||||
@ -1279,7 +1238,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
|
||||
{
|
||||
const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
|
||||
struct intel_gvt_gtt_entry se = *ge;
|
||||
unsigned long gfn, page_size = PAGE_SIZE;
|
||||
unsigned long gfn;
|
||||
dma_addr_t dma_addr;
|
||||
int ret;
|
||||
|
||||
@ -1291,6 +1250,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
|
||||
switch (ge->type) {
|
||||
case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
|
||||
gvt_vdbg_mm("shadow 4K gtt entry\n");
|
||||
ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
|
||||
if (ret)
|
||||
return -ENXIO;
|
||||
break;
|
||||
case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
|
||||
gvt_vdbg_mm("shadow 64K gtt entry\n");
|
||||
@ -1302,25 +1264,20 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
|
||||
return split_64KB_gtt_entry(vgpu, spt, index, &se);
|
||||
case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
|
||||
gvt_vdbg_mm("shadow 2M gtt entry\n");
|
||||
ret = is_2MB_gtt_possible(vgpu, ge);
|
||||
if (ret == 0)
|
||||
if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
|
||||
intel_gvt_dma_map_guest_page(vgpu, gfn,
|
||||
I915_GTT_PAGE_SIZE_2M, &dma_addr))
|
||||
return split_2MB_gtt_entry(vgpu, spt, index, &se);
|
||||
else if (ret < 0)
|
||||
return ret;
|
||||
page_size = I915_GTT_PAGE_SIZE_2M;
|
||||
break;
|
||||
case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
|
||||
gvt_vgpu_err("GVT doesn't support 1GB entry\n");
|
||||
return -EINVAL;
|
||||
default:
|
||||
GEM_BUG_ON(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* direct shadow */
|
||||
ret = intel_gvt_dma_map_guest_page(vgpu, gfn, page_size, &dma_addr);
|
||||
if (ret)
|
||||
return -ENXIO;
|
||||
|
||||
/* Successfully shadowed a 4K or 2M page (without splitting). */
|
||||
pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
|
||||
ppgtt_set_shadow_entry(spt, &se, index);
|
||||
return 0;
|
||||
@ -1329,11 +1286,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
|
||||
static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
|
||||
{
|
||||
struct intel_vgpu *vgpu = spt->vgpu;
|
||||
struct intel_gvt *gvt = vgpu->gvt;
|
||||
const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
|
||||
struct intel_vgpu_ppgtt_spt *s;
|
||||
struct intel_gvt_gtt_entry se, ge;
|
||||
unsigned long gfn, i;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
trace_spt_change(spt->vgpu->id, "born", spt,
|
||||
@ -1350,13 +1305,6 @@ static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
|
||||
ppgtt_generate_shadow_entry(&se, s, &ge);
|
||||
ppgtt_set_shadow_entry(spt, &se, i);
|
||||
} else {
|
||||
gfn = ops->get_pfn(&ge);
|
||||
if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
|
||||
ops->set_pfn(&se, gvt->gtt.scratch_mfn);
|
||||
ppgtt_set_shadow_entry(spt, &se, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
|
||||
if (ret)
|
||||
goto fail;
|
||||
@ -1845,6 +1793,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
|
||||
if (mm->ppgtt_mm.shadowed)
|
||||
return 0;
|
||||
|
||||
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
|
||||
return -EINVAL;
|
||||
|
||||
mm->ppgtt_mm.shadowed = true;
|
||||
|
||||
for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
|
||||
@ -2331,14 +2282,6 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
|
||||
m.val64 = e.val64;
|
||||
m.type = e.type;
|
||||
|
||||
/* one PTE update may be issued in multiple writes and the
|
||||
* first write may not construct a valid gfn
|
||||
*/
|
||||
if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
|
||||
ops->set_pfn(&m, gvt->gtt.scratch_mfn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
|
||||
&dma_addr);
|
||||
if (ret) {
|
||||
@ -2355,7 +2298,6 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
|
||||
ops->clear_present(&m);
|
||||
}
|
||||
|
||||
out:
|
||||
ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
|
||||
|
||||
ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
|
||||
@ -2875,24 +2817,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
|
||||
ggtt_invalidate(gvt->gt);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_vgpu_reset_gtt - reset the all GTT related status
|
||||
* @vgpu: a vGPU
|
||||
*
|
||||
* This function is called from vfio core to reset reset all
|
||||
* GTT related status, including GGTT, PPGTT, scratch page.
|
||||
*
|
||||
*/
|
||||
void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
|
||||
{
|
||||
/* Shadow pages are only created when there is no page
|
||||
* table tracking data, so remove page tracking data after
|
||||
* removing the shadow pages.
|
||||
*/
|
||||
intel_vgpu_destroy_all_ppgtt_mm(vgpu);
|
||||
intel_vgpu_reset_ggtt(vgpu, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
|
||||
* @gvt: intel gvt device
|
||||
|
@ -224,7 +224,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
|
||||
void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
|
||||
|
||||
int intel_gvt_init_gtt(struct intel_gvt *gvt);
|
||||
void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
|
||||
void intel_gvt_clean_gtt(struct intel_gvt *gvt);
|
||||
|
||||
struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
|
||||
|
@ -34,10 +34,11 @@
|
||||
#define _GVT_H_
|
||||
|
||||
#include <uapi/linux/pci_regs.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/mdev.h>
|
||||
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "intel_gvt.h"
|
||||
|
||||
|
@ -106,12 +106,10 @@ struct gvt_dma {
|
||||
#define vfio_dev_to_vgpu(vfio_dev) \
|
||||
container_of((vfio_dev), struct intel_vgpu, vfio_device)
|
||||
|
||||
static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *val, int len,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
static void kvmgt_page_track_flush_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
|
||||
static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
|
||||
{
|
||||
@ -161,8 +159,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
|
||||
|
||||
if (npage == 0)
|
||||
base_page = cur_page;
|
||||
else if (base_page + npage != cur_page) {
|
||||
gvt_vgpu_err("The pages are not continuous\n");
|
||||
else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
|
||||
ret = -EINVAL;
|
||||
npage++;
|
||||
goto err;
|
||||
@ -172,7 +169,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
|
||||
*page = base_page;
|
||||
return 0;
|
||||
err:
|
||||
gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
|
||||
if (npage)
|
||||
gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -352,6 +350,8 @@ __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
|
||||
{
|
||||
struct kvmgt_pgfn *p, *res = NULL;
|
||||
|
||||
lockdep_assert_held(&info->vgpu_lock);
|
||||
|
||||
hash_for_each_possible(info->ptable, p, hnode, gfn) {
|
||||
if (gfn == p->gfn) {
|
||||
res = p;
|
||||
@ -654,21 +654,19 @@ out:
|
||||
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
|
||||
{
|
||||
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
|
||||
|
||||
if (!vgpu->vfio_device.kvm ||
|
||||
vgpu->vfio_device.kvm->mm != current->mm) {
|
||||
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
|
||||
return -ESRCH;
|
||||
}
|
||||
int ret;
|
||||
|
||||
if (__kvmgt_vgpu_exist(vgpu))
|
||||
return -EEXIST;
|
||||
|
||||
vgpu->track_node.track_write = kvmgt_page_track_write;
|
||||
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
|
||||
kvm_get_kvm(vgpu->vfio_device.kvm);
|
||||
kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
|
||||
&vgpu->track_node);
|
||||
vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region;
|
||||
ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
|
||||
&vgpu->track_node);
|
||||
if (ret) {
|
||||
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
|
||||
|
||||
@ -703,7 +701,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
|
||||
|
||||
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
|
||||
&vgpu->track_node);
|
||||
kvm_put_kvm(vgpu->vfio_device.kvm);
|
||||
|
||||
kvmgt_protect_table_destroy(vgpu);
|
||||
gvt_cache_destroy(vgpu);
|
||||
@ -1546,95 +1543,70 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
|
||||
|
||||
int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
|
||||
{
|
||||
struct kvm *kvm = info->vfio_device.kvm;
|
||||
struct kvm_memory_slot *slot;
|
||||
int idx;
|
||||
int r;
|
||||
|
||||
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
|
||||
return -ESRCH;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
if (kvmgt_gfn_is_write_protected(info, gfn))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
|
||||
kvmgt_protect_table_add(info, gfn);
|
||||
|
||||
out:
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
|
||||
{
|
||||
struct kvm *kvm = info->vfio_device.kvm;
|
||||
struct kvm_memory_slot *slot;
|
||||
int idx;
|
||||
int r;
|
||||
|
||||
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
|
||||
return -ESRCH;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
if (!kvmgt_gfn_is_write_protected(info, gfn))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
|
||||
kvmgt_protect_table_del(info, gfn);
|
||||
|
||||
out:
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *val, int len,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
struct intel_vgpu *info =
|
||||
container_of(node, struct intel_vgpu, track_node);
|
||||
|
||||
if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
|
||||
mutex_lock(&info->vgpu_lock);
|
||||
|
||||
if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT))
|
||||
intel_vgpu_page_track_handler(info, gpa,
|
||||
(void *)val, len);
|
||||
|
||||
mutex_unlock(&info->vgpu_lock);
|
||||
}
|
||||
|
||||
static void kvmgt_page_track_flush_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
int i;
|
||||
gfn_t gfn;
|
||||
unsigned long i;
|
||||
struct intel_vgpu *info =
|
||||
container_of(node, struct intel_vgpu, track_node);
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
for (i = 0; i < slot->npages; i++) {
|
||||
gfn = slot->base_gfn + i;
|
||||
if (kvmgt_gfn_is_write_protected(info, gfn)) {
|
||||
kvm_slot_page_track_remove_page(kvm, slot, gfn,
|
||||
KVM_PAGE_TRACK_WRITE);
|
||||
kvmgt_protect_table_del(info, gfn);
|
||||
}
|
||||
mutex_lock(&info->vgpu_lock);
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if (kvmgt_gfn_is_write_protected(info, gfn + i))
|
||||
kvmgt_protect_table_del(info, gfn + i);
|
||||
}
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
mutex_unlock(&info->vgpu_lock);
|
||||
}
|
||||
|
||||
void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
|
||||
|
@ -162,13 +162,9 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
|
||||
struct intel_vgpu_page_track *page_track;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&vgpu->vgpu_lock);
|
||||
|
||||
page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT);
|
||||
if (!page_track) {
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
if (!page_track)
|
||||
return -ENXIO;
|
||||
|
||||
if (unlikely(vgpu->failsafe)) {
|
||||
/* Remove write protection to prevent furture traps. */
|
||||
@ -179,7 +175,5 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
|
||||
gvt_err("guest page write error, gpa %llx\n", gpa);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&vgpu->vgpu_lock);
|
||||
return ret;
|
||||
}
|
||||
|
@ -867,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
|
||||
unlikely(__ret); \
|
||||
})
|
||||
|
||||
/*
|
||||
* Note, "data corruption" refers to corruption of host kernel data structures,
|
||||
* not guest data. Guest data corruption, suspected or confirmed, that is tied
|
||||
* and contained to a single VM should *never* BUG() and potentially panic the
|
||||
* host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
|
||||
* is corrupted and that corruption can have a cascading effect to other parts
|
||||
* of the hosts and/or to other VMs.
|
||||
*/
|
||||
#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \
|
||||
({ \
|
||||
bool __ret = !!(cond); \
|
||||
\
|
||||
if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \
|
||||
BUG_ON(__ret); \
|
||||
else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
|
||||
kvm_vm_bugged(kvm); \
|
||||
unlikely(__ret); \
|
||||
})
|
||||
|
||||
static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
|
Loading…
Reference in New Issue
Block a user