Merge branch 'kvm-tdpmmu-fixes' into HEAD
Merge topic branch with fixes for 5.14-rc6 and 5.15 merge window.
This commit is contained in:
commit
9a63b4517c
@ -31,10 +31,10 @@ On x86:
|
|||||||
|
|
||||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||||
|
|
||||||
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is
|
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
|
||||||
taken inside kvm->arch.mmu_lock, and cannot be taken without already
|
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
|
||||||
holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
|
cannot be taken without already holding kvm->arch.mmu_lock (typically with
|
||||||
there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
|
``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
|
||||||
|
|
||||||
Everything else is a leaf: no other lock is taken inside the critical
|
Everything else is a leaf: no other lock is taken inside the critical
|
||||||
sections.
|
sections.
|
||||||
|
@ -1038,6 +1038,13 @@ struct kvm_arch {
|
|||||||
struct list_head lpage_disallowed_mmu_pages;
|
struct list_head lpage_disallowed_mmu_pages;
|
||||||
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
||||||
struct kvm_page_track_notifier_head track_notifier_head;
|
struct kvm_page_track_notifier_head track_notifier_head;
|
||||||
|
/*
|
||||||
|
* Protects marking pages unsync during page faults, as TDP MMU page
|
||||||
|
* faults only take mmu_lock for read. For simplicity, the unsync
|
||||||
|
* pages lock is always taken when marking pages unsync regardless of
|
||||||
|
* whether mmu_lock is held for read or write.
|
||||||
|
*/
|
||||||
|
spinlock_t mmu_unsync_pages_lock;
|
||||||
|
|
||||||
struct list_head assigned_dev_head;
|
struct list_head assigned_dev_head;
|
||||||
struct iommu_domain *iommu_domain;
|
struct iommu_domain *iommu_domain;
|
||||||
|
@ -2575,6 +2575,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|||||||
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
|
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp;
|
struct kvm_mmu_page *sp;
|
||||||
|
bool locked = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Force write-protection if the page is being tracked. Note, the page
|
* Force write-protection if the page is being tracked. Note, the page
|
||||||
@ -2597,9 +2598,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
|
|||||||
if (sp->unsync)
|
if (sp->unsync)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TDP MMU page faults require an additional spinlock as they
|
||||||
|
* run with mmu_lock held for read, not write, and the unsync
|
||||||
|
* logic is not thread safe. Take the spinklock regardless of
|
||||||
|
* the MMU type to avoid extra conditionals/parameters, there's
|
||||||
|
* no meaningful penalty if mmu_lock is held for write.
|
||||||
|
*/
|
||||||
|
if (!locked) {
|
||||||
|
locked = true;
|
||||||
|
spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recheck after taking the spinlock, a different vCPU
|
||||||
|
* may have since marked the page unsync. A false
|
||||||
|
* positive on the unprotected check above is not
|
||||||
|
* possible as clearing sp->unsync _must_ hold mmu_lock
|
||||||
|
* for write, i.e. unsync cannot transition from 0->1
|
||||||
|
* while this CPU holds mmu_lock for read (or write).
|
||||||
|
*/
|
||||||
|
if (READ_ONCE(sp->unsync))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
||||||
kvm_unsync_page(vcpu, sp);
|
kvm_unsync_page(vcpu, sp);
|
||||||
}
|
}
|
||||||
|
if (locked)
|
||||||
|
spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to ensure that the marking of unsync pages is visible
|
* We need to ensure that the marking of unsync pages is visible
|
||||||
@ -5605,6 +5631,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
|
|||||||
{
|
{
|
||||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||||
|
|
||||||
|
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
|
||||||
|
|
||||||
if (!kvm_mmu_init_tdp_mmu(kvm))
|
if (!kvm_mmu_init_tdp_mmu(kvm))
|
||||||
/*
|
/*
|
||||||
* No smp_load/store wrappers needed here as we are in
|
* No smp_load/store wrappers needed here as we are in
|
||||||
|
@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
|
|||||||
if (!kvm->arch.tdp_mmu_enabled)
|
if (!kvm->arch.tdp_mmu_enabled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
|
||||||
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
|
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
|
|||||||
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||||
bool shared)
|
bool shared)
|
||||||
{
|
{
|
||||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
|
||||||
|
|
||||||
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
||||||
|
|
||||||
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
|
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
|
||||||
@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
|||||||
list_del_rcu(&root->link);
|
list_del_rcu(&root->link);
|
||||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||||
|
|
||||||
zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
|
zap_gfn_range(kvm, root, 0, -1ull, false, false, shared);
|
||||||
|
|
||||||
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
|
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
|
||||||
}
|
}
|
||||||
@ -753,13 +752,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
|||||||
gfn_t start, gfn_t end, bool can_yield, bool flush,
|
gfn_t start, gfn_t end, bool can_yield, bool flush,
|
||||||
bool shared)
|
bool shared)
|
||||||
{
|
{
|
||||||
|
gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||||
|
bool zap_all = (start == 0 && end >= max_gfn_host);
|
||||||
struct tdp_iter iter;
|
struct tdp_iter iter;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No need to try to step down in the iterator when zapping all SPTEs,
|
||||||
|
* zapping the top-level non-leaf SPTEs will recurse on their children.
|
||||||
|
*/
|
||||||
|
int min_level = zap_all ? root->role.level : PG_LEVEL_4K;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
|
||||||
|
* hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
|
||||||
|
* and so KVM will never install a SPTE for such addresses.
|
||||||
|
*/
|
||||||
|
end = min(end, max_gfn_host);
|
||||||
|
|
||||||
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
tdp_root_for_each_pte(iter, root, start, end) {
|
for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
|
||||||
|
min_level, start, end) {
|
||||||
retry:
|
retry:
|
||||||
if (can_yield &&
|
if (can_yield &&
|
||||||
tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) {
|
tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) {
|
||||||
@ -773,9 +788,10 @@ retry:
|
|||||||
/*
|
/*
|
||||||
* If this is a non-last-level SPTE that covers a larger range
|
* If this is a non-last-level SPTE that covers a larger range
|
||||||
* than should be zapped, continue, and zap the mappings at a
|
* than should be zapped, continue, and zap the mappings at a
|
||||||
* lower level.
|
* lower level, except when zapping all SPTEs.
|
||||||
*/
|
*/
|
||||||
if ((iter.gfn < start ||
|
if (!zap_all &&
|
||||||
|
(iter.gfn < start ||
|
||||||
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
|
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
|
||||||
!is_last_spte(iter.old_spte, iter.level))
|
!is_last_spte(iter.old_spte, iter.level))
|
||||||
continue;
|
continue;
|
||||||
@ -823,12 +839,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
|
|||||||
|
|
||||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
|
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
|
||||||
bool flush = false;
|
bool flush = false;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
|
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
|
||||||
flush, false);
|
flush, false);
|
||||||
|
|
||||||
if (flush)
|
if (flush)
|
||||||
@ -867,7 +882,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
|
|||||||
*/
|
*/
|
||||||
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
|
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
|
||||||
struct kvm_mmu_page *next_root;
|
struct kvm_mmu_page *next_root;
|
||||||
struct kvm_mmu_page *root;
|
struct kvm_mmu_page *root;
|
||||||
bool flush = false;
|
bool flush = false;
|
||||||
@ -883,8 +897,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
|
|||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
|
flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true);
|
||||||
true);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Put the reference acquired in
|
* Put the reference acquired in
|
||||||
|
Loading…
x
Reference in New Issue
Block a user