Merge branch 'mm-delete-change-gpte' into HEAD

The .change_pte() MMU notifier callback was intended as an optimization
and for this reason it was initially called without a surrounding
mmu_notifier_invalidate_range_{start,end}() pair.  It was only ever
implemented by KVM (which was also the original user of MMU notifiers)
and the rules on when to call set_pte_at_notify() rather than set_pte_at()
have always been pretty obscure.

It may seem a miracle that it has never caused any hard to trigger
bugs, but there's a good reason for that: KVM's implementation has
been nonfunctional for a good part of its existence.  Already in
2012, commit 6bdb913f0a70 ("mm: wrap calls to set_pte_at_notify with
invalidate_range_start and invalidate_range_end", 2012-10-09) changed the
.change_pte() callback to occur within an invalidate_range_start/end()
pair; and because KVM unmaps the sPTEs during .invalidate_range_start(),
.change_pte() has no hope of finding a sPTE to change.

Therefore, all the code for .change_pte() can be removed from both KVM
and mm/, and set_pte_at_notify() can be replaced with just set_pte_at().

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2024-04-11 13:06:24 -04:00
commit 531f520024
26 changed files with 16 additions and 419 deletions

View File

@ -1768,40 +1768,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
/*
* If the page isn't tagged, defer to user_mem_abort() for sanitising
* the MTE tags. The S2 pte should have been unmapped by
* mmu_notifier_invalidate_range_end().
*/
if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
return false;
/*
* We've moved a page around, probably through CoW, so let's treat
* it just like a translation fault and the map handler will clean
* the cache to the PoC.
*
* The MMU notifiers will have unmapped a huge PMD before calling
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
* therefore we never need to clear out a huge PMD through this
* calling path and a memcache is not required.
*/
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
PAGE_SIZE, __pfn_to_phys(pfn),
KVM_PGTABLE_PROT_R, NULL, 0);
return false;
}
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;

View File

@ -203,7 +203,6 @@ void kvm_flush_tlb_all(void);
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);

View File

@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
range->end << PAGE_SHIFT, &ctx);
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
unsigned long prot_bits;
kvm_pte_t *ptep;
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
gpa_t gpa = range->start << PAGE_SHIFT;
ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
if (!ptep)
return false;
/* Replacing an absent or old page doesn't need flushes */
if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) {
kvm_set_pte(ptep, 0);
return false;
}
/* Fill new pte if write protected or page migrated */
prot_bits = _PAGE_PRESENT | __READABLE;
prot_bits |= _CACHE_MASK & pte_val(range->arg.pte);
/*
* Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support
* _PAGE_WRITE for map_page_fast if next page write fault
* _PAGE_DIRTY since gpa has already recorded as dirty page
*/
prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte);
kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits)));
return true;
}
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_ptw_ctx ctx;

View File

@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return true;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
gpa_t gpa = range->start << PAGE_SHIFT;
pte_t hva_pte = range->arg.pte;
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
pte_t old_pte;
if (!gpa_pte)
return false;
/* Mapping may need adjusting depending on memslot flags */
old_pte = *gpa_pte;
if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
hva_pte = pte_mkclean(hva_pte);
else if (range->slot->flags & KVM_MEM_READONLY)
hva_pte = pte_wrprotect(hva_pte);
set_pte(gpa_pte, hva_pte);
/* Replacing an absent or old page doesn't need flushes */
if (!pte_present(old_pte) || !pte_young(old_pte))
return false;
/* Pages swapped, aged, moved, or cleaned require flushes */
return !pte_present(hva_pte) ||
!pte_young(hva_pte) ||
pte_pfn(old_pte) != pte_pfn(hva_pte) ||
(pte_dirty(old_pte) && !pte_dirty(hva_pte));
}
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);

View File

@ -287,7 +287,6 @@ struct kvmppc_ops {
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
void (*free_memslot)(struct kvm_memory_slot *slot);
int (*init_vm)(struct kvm *kvm);
void (*destroy_vm)(struct kvm *kvm);

View File

@ -899,11 +899,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
}
int kvmppc_core_init_vm(struct kvm *kvm)
{

View File

@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);

View File

@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
WARN_ON(range->start + 1 != range->end);
if (kvm_is_radix(kvm))
kvm_unmap_radix(kvm, range->slot, range->start);
else
kvm_unmap_rmapp(kvm, range->slot, range->start);
return false;
}
static int vcpus_running(struct kvm *kvm)
{
return atomic_read(&kvm->arch.vcpus_running) != 0;

View File

@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
.age_gfn = kvm_age_gfn_hv,
.test_age_gfn = kvm_test_age_gfn_hv,
.set_spte_gfn = kvm_set_spte_gfn_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,

View File

@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* The page will get remapped properly on its next fault */
return do_kvm_unmap_gfn(kvm, range);
}
/*****************************************/
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = {
.unmap_gfn_range = kvm_unmap_gfn_range_pr,
.age_gfn = kvm_age_gfn_pr,
.test_age_gfn = kvm_test_age_gfn_pr,
.set_spte_gfn = kvm_set_spte_gfn_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,

View File

@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* The page will get remapped properly on its next fault */
return kvm_e500_mmu_unmap_gfn(kvm, range);
}
/*****************************************/
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)

View File

@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
int ret;
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
if (!kvm->arch.pgd)
return false;
WARN_ON(range->end - range->start != 1);
ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
__pfn_to_phys(pfn), PAGE_SIZE, true, true);
if (ret) {
kvm_debug("Failed to map G-stage page (error %d)\n", ret);
return true;
}
return false;
}
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
pte_t *ptep;

View File

@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte (mm/gup.c).
*
* An spte tlb flush may be pending, because kvm_set_pte_rmap
* coalesces them and we are running out of the MMU lock. Therefore
* An spte tlb flush may be pending, because they are coalesced and
* we are running out of the MMU lock. Therefore
* we need to protect against in-progress updates of the spte.
*
* Reading the spte while an update is in progress may get the old value
@ -1448,49 +1448,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
pte_t unused)
struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
return __kvm_zap_rmap(kvm, rmap_head, slot);
}
static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
pte_t pte)
{
u64 *sptep;
struct rmap_iterator iter;
bool need_flush = false;
u64 new_spte;
kvm_pfn_t new_pfn;
WARN_ON_ONCE(pte_huge(pte));
new_pfn = pte_pfn(pte);
restart:
for_each_rmap_spte(rmap_head, &iter, sptep) {
need_flush = true;
if (pte_write(pte)) {
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
goto restart;
} else {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(
*sptep, new_pfn);
mmu_spte_clear_track_bits(kvm, sptep);
mmu_spte_set(sptep, new_spte);
}
}
if (need_flush && kvm_available_flush_remote_tlbs_range()) {
kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
return false;
}
return need_flush;
}
struct slot_rmap_walk_iterator {
/* input fields. */
const struct kvm_memory_slot *slot;
@ -1562,7 +1524,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
int level, pte_t pte);
int level);
static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
struct kvm_gfn_range *range,
@ -1574,7 +1536,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
range->start, range->end - 1, &iterator)
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
iterator.level, range->arg.pte);
iterator.level);
return ret;
}
@ -1596,22 +1558,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return flush;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool flush = false;
if (kvm_memslots_have_rmaps(kvm))
flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);
if (tdp_mmu_enabled)
flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
return flush;
}
static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
pte_t unused)
struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
@ -1624,8 +1572,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
int level, pte_t unused)
struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;

View File

@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
return spte;
}
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
{
u64 new_spte;
new_spte = old_spte & ~SPTE_BASE_ADDR_MASK;
new_spte |= (u64)new_pfn << PAGE_SHIFT;
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~shadow_host_writable_mask;
new_spte &= ~shadow_mmu_writable_mask;
new_spte = mark_spte_for_access_track(new_spte);
return new_spte;
}
u64 mark_spte_for_access_track(u64 spte)
{
if (spte_ad_enabled(spte))

View File

@ -496,8 +496,6 @@ static inline u64 restore_acc_track_spte(u64 spte)
return spte;
}
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);
void __init kvm_mmu_spte_module_init(void);
void kvm_mmu_reset_all_pte_masks(void);

View File

@ -1258,52 +1258,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
}
static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_gfn_range *range)
{
u64 new_spte;
/* Huge pages aren't expected to be modified without first being zapped. */
WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
if (iter->level != PG_LEVEL_4K ||
!is_shadow_present_pte(iter->old_spte))
return false;
/*
* Note, when changing a read-only SPTE, it's not strictly necessary to
* zero the SPTE before setting the new PFN, but doing so preserves the
* invariant that the PFN of a present * leaf SPTE can never change.
* See handle_changed_spte().
*/
tdp_mmu_iter_set_spte(kvm, iter, 0);
if (!pte_write(range->arg.pte)) {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
pte_pfn(range->arg.pte));
tdp_mmu_iter_set_spte(kvm, iter, new_spte);
}
return true;
}
/*
* Handle the changed_pte MMU notifier for the TDP MMU.
* data is a pointer to the new pte_t mapping the HVA specified by the MMU
* notifier.
* Returns non-zero if a flush is needed before releasing the MMU lock.
*/
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
/*
* No need to handle the remote TLB flush under RCU protection, the
* target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
* shadow page. See the WARN on pfn_changed in handle_changed_spte().
*/
return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
}
/*
* Remove write access from all SPTEs at or above min_level that map GFNs
* [start, end). Returns true if an SPTE has been changed and the TLBs need to

View File

@ -31,7 +31,6 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
bool flush);
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot, int min_level);

View File

@ -259,7 +259,6 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
union kvm_mmu_notifier_arg {
pte_t pte;
unsigned long attributes;
};
@ -273,7 +272,6 @@ struct kvm_gfn_range {
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
#endif
enum {

View File

@ -122,15 +122,6 @@ struct mmu_notifier_ops {
struct mm_struct *mm,
unsigned long address);
/*
* change_pte is called in cases that pte mapping to page is changed:
* for example, when ksm remaps pte to point to a new shared page.
*/
void (*change_pte)(struct mmu_notifier *subscription,
struct mm_struct *mm,
unsigned long address,
pte_t pte);
/*
* invalidate_range_start() and invalidate_range_end() must be
* paired and are called only when the mmap_lock and/or the
@ -392,8 +383,6 @@ extern int __mmu_notifier_clear_young(struct mm_struct *mm,
unsigned long end);
extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
@ -439,13 +428,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm,
return 0;
}
static inline void mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte)
{
if (mm_has_notifiers(mm))
__mmu_notifier_change_pte(mm, address, pte);
}
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
@ -581,26 +563,6 @@ static inline void mmu_notifier_range_init_owner(
__young; \
})
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
* pte invalidate must have already happened with a ptep_clear_flush() before
* set_pte_at_notify() has been invoked. Updating the secondary MMUs first is
* required when we change both the protection of the mapping from read-only to
* read-write and the pfn (like during copy on write page faults). Otherwise the
* old page would remain mapped readonly in the secondary MMUs after the new
* page is already writable by some CPU through the primary MMU.
*/
#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
({ \
struct mm_struct *___mm = __mm; \
unsigned long ___address = __address; \
pte_t ___pte = __pte; \
\
mmu_notifier_change_pte(___mm, ___address, ___pte); \
set_pte_at(___mm, ___address, __ptep, ___pte); \
})
#else /* CONFIG_MMU_NOTIFIER */
struct mmu_notifier_range {
@ -650,11 +612,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm,
return 0;
}
static inline void mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte)
{
}
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
@ -693,7 +650,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define set_pte_at_notify set_pte_at
static inline void mmu_notifier_synchronize(void)
{

View File

@ -456,21 +456,6 @@ TRACE_EVENT(kvm_unmap_hva_range,
__entry->start, __entry->end)
);
TRACE_EVENT(kvm_set_spte_hva,
TP_PROTO(unsigned long hva),
TP_ARGS(hva),
TP_STRUCT__entry(
__field( unsigned long, hva )
),
TP_fast_assign(
__entry->hva = hva;
),
TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
);
TRACE_EVENT(kvm_age_hva,
TP_PROTO(unsigned long start, unsigned long end),
TP_ARGS(start, end),

View File

@ -18,7 +18,7 @@
#include <linux/sched/coredump.h>
#include <linux/export.h>
#include <linux/rmap.h> /* anon_vma_prepare */
#include <linux/mmu_notifier.h> /* set_pte_at_notify */
#include <linux/mmu_notifier.h>
#include <linux/swap.h> /* folio_free_swap */
#include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */
@ -195,8 +195,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
ptep_clear_flush(vma, addr, pvmw.pte);
if (new_page)
set_pte_at_notify(mm, addr, pvmw.pte,
mk_pte(new_page, vma->vm_page_prot));
set_pte_at(mm, addr, pvmw.pte,
mk_pte(new_page, vma->vm_page_prot));
folio_remove_rmap_pte(old_folio, old_page, vma);
if (!folio_mapped(old_folio))

View File

@ -1345,7 +1345,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
if (pte_write(entry))
entry = pte_wrprotect(entry);
set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
set_pte_at(mm, pvmw.address, pvmw.pte, entry);
}
*orig_pte = entry;
err = 0;
@ -1447,7 +1447,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
* See Documentation/mm/mmu_notifier.rst
*/
ptep_clear_flush(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, newpte);
set_pte_at(mm, addr, ptep, newpte);
folio = page_folio(page);
folio_remove_rmap_pte(folio, page, vma);

View File

@ -3327,13 +3327,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
ptep_clear_flush(vma, vmf->address, vmf->pte);
folio_add_new_anon_rmap(new_folio, vma, vmf->address);
folio_add_lru_vma(new_folio, vma);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
* new page to be mapped directly into the secondary page table.
*/
BUG_ON(unshare && pte_write(entry));
set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
set_pte_at(mm, vmf->address, vmf->pte, entry);
update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
if (old_folio) {
/*

View File

@ -664,13 +664,9 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
if (flush) {
flush_cache_page(vma, addr, pte_pfn(orig_pte));
ptep_clear_flush(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, entry);
update_mmu_cache(vma, addr, ptep);
} else {
/* No need to invalidate - it was non-present before */
set_pte_at(mm, addr, ptep, entry);
update_mmu_cache(vma, addr, ptep);
}
set_pte_at(mm, addr, ptep, entry);
update_mmu_cache(vma, addr, ptep);
pte_unmap_unlock(ptep, ptl);
*src = MIGRATE_PFN_MIGRATE;

View File

@ -424,23 +424,6 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
return young;
}
void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
pte_t pte)
{
struct mmu_notifier *subscription;
int id;
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
if (subscription->ops->change_pte)
subscription->ops->change_pte(subscription, mm, address,
pte);
}
srcu_read_unlock(&srcu, id);
}
static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
const struct mmu_notifier_range *range)
{

View File

@ -583,8 +583,6 @@ static void kvm_null_fn(void)
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
/* Iterate over each memslot intersecting [start, last] (inclusive) range */
#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
@ -670,14 +668,12 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
unsigned long start,
unsigned long end,
union kvm_mmu_notifier_arg arg,
gfn_handler_t handler)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
const struct kvm_mmu_notifier_range range = {
.start = start,
.end = end,
.arg = arg,
.handler = handler,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = true,
@ -705,48 +701,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
return __kvm_handle_hva_range(kvm, &range).ret;
}
static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
/*
* Skipping invalid memslots is correct if and only change_pte() is
* surrounded by invalidate_range_{start,end}(), which is currently
* guaranteed by the primary MMU. If that ever changes, KVM needs to
* unmap the memslot instead of skipping the memslot to ensure that KVM
* doesn't hold references to the old PFN.
*/
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
if (range->slot->flags & KVM_MEMSLOT_INVALID)
return false;
return kvm_set_spte_gfn(kvm, range);
}
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address,
pte_t pte)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
const union kvm_mmu_notifier_arg arg = { .pte = pte };
trace_kvm_set_spte_hva(address);
/*
* .change_pte() must be surrounded by .invalidate_range_{start,end}().
* If mmu_invalidate_in_progress is zero, then no in-progress
* invalidations, including this one, found a relevant memslot at
* start(); rechecking memslots here is unnecessary. Note, a false
* positive (count elevated by a different invalidation) is sub-optimal
* but functionally ok.
*/
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
return;
kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
}
void kvm_mmu_invalidate_begin(struct kvm *kvm)
{
lockdep_assert_held_write(&kvm->mmu_lock);
@ -910,8 +864,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
{
trace_kvm_age_hva(start, end);
return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG,
kvm_age_gfn);
return kvm_handle_hva_range(mn, start, end, kvm_age_gfn);
}
static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
@ -964,7 +917,6 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
.clear_young = kvm_mmu_notifier_clear_young,
.test_young = kvm_mmu_notifier_test_young,
.change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
};