diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index decf1fb3ce08..8e31627bca59 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -822,14 +822,36 @@ static inline gfn_t tdp_mmu_max_gfn_host(void) return 1ULL << (shadow_phys_bits - PAGE_SHIFT); } -static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, - bool shared) +static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, + bool shared, int zap_level) { struct tdp_iter iter; gfn_t end = tdp_mmu_max_gfn_host(); gfn_t start = 0; + for_each_tdp_pte_min_level(iter, root, zap_level, start, end) { +retry: + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared)) + continue; + + if (!is_shadow_present_pte(iter.old_spte)) + continue; + + if (iter.level > zap_level) + continue; + + if (!shared) + tdp_mmu_set_spte(kvm, &iter, 0); + else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0)) + goto retry; + } +} + +static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, + bool shared) +{ + /* * The root must have an elevated refcount so that it's reachable via * mmu_notifier callbacks, which allows this path to yield and drop @@ -847,22 +869,17 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); /* - * No need to try to step down in the iterator when zapping an entire - * root, zapping an upper-level SPTE will recurse on its children. + * To avoid RCU stalls due to recursively removing huge swaths of SPs, + * split the zap into two passes. On the first pass, zap at the 1gb + * level, and then zap top-level SPs on the second pass. "1gb" is not + * arbitrary, as KVM must be able to zap a 1gb shadow page without + * inducing a stall to allow in-place replacement with a 1gb hugepage. + * + * Because zapping a SP recurses on its children, stepping down to + * PG_LEVEL_4K in the iterator itself is unnecessary. */ - for_each_tdp_pte_min_level(iter, root, root->role.level, start, end) { -retry: - if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared)) - continue; - - if (!is_shadow_present_pte(iter.old_spte)) - continue; - - if (!shared) - tdp_mmu_set_spte(kvm, &iter, 0); - else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0)) - goto retry; - } + __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_1G); + __tdp_mmu_zap_root(kvm, root, shared, root->role.level); rcu_read_unlock(); }