userfaultfd: wp: support swap and page migration
For either swap and page migration, we all use the bit 2 of the entry to identify whether this entry is uffd write-protected. It plays a similar role as the existing soft dirty bit in swap entries but only for keeping the uffd-wp tracking for a specific PTE/PMD. Something special here is that when we want to recover the uffd-wp bit from a swap/migration entry to the PTE bit we'll also need to take care of the _PAGE_RW bit and make sure it's cleared, otherwise even with the _PAGE_UFFD_WP bit we can't trap it at all. In change_pte_range() we do nothing for uffd if the PTE is a swap entry. That can lead to data mismatch if the page that we are going to write protect is swapped out when sending the UFFDIO_WRITEPROTECT. This patch also applies/removes the uffd-wp bit even for the swap entries. Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bobby Powers <bobbypowers@gmail.com> Cc: Brian Geffon <bgeffon@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Denis Plotnikov <dplotnikov@virtuozzo.com> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Martin Cracauer <cracauer@cons.org> Cc: Marty McFadden <mcfadden8@llnl.gov> Cc: Maya Gokhale <gokhale2@llnl.gov> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@fb.com> Link: http://lkml.kernel.org/r/20200220163112.11409-11-peterx@redhat.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
2e3d5dc508
commit
f45ec5ff16
@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
|
||||
|
||||
if (pte_swp_soft_dirty(pte))
|
||||
pte = pte_swp_clear_soft_dirty(pte);
|
||||
if (pte_swp_uffd_wp(pte))
|
||||
pte = pte_swp_clear_uffd_wp(pte);
|
||||
arch_entry = __pte_to_swp_entry(pte);
|
||||
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
|
||||
}
|
||||
|
@ -2297,6 +2297,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
write = is_write_migration_entry(entry);
|
||||
young = false;
|
||||
soft_dirty = pmd_swp_soft_dirty(old_pmd);
|
||||
uffd_wp = pmd_swp_uffd_wp(old_pmd);
|
||||
} else {
|
||||
page = pmd_page(old_pmd);
|
||||
if (pmd_dirty(old_pmd))
|
||||
@ -2329,6 +2330,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
entry = swp_entry_to_pte(swp_entry);
|
||||
if (soft_dirty)
|
||||
entry = pte_swp_mksoft_dirty(entry);
|
||||
if (uffd_wp)
|
||||
entry = pte_swp_mkuffd_wp(entry);
|
||||
} else {
|
||||
entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
|
||||
entry = maybe_mkwrite(entry, vma);
|
||||
|
@ -733,6 +733,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
pte = swp_entry_to_pte(entry);
|
||||
if (pte_swp_soft_dirty(*src_pte))
|
||||
pte = pte_swp_mksoft_dirty(pte);
|
||||
if (pte_swp_uffd_wp(*src_pte))
|
||||
pte = pte_swp_mkuffd_wp(pte);
|
||||
set_pte_at(src_mm, addr, src_pte, pte);
|
||||
}
|
||||
} else if (is_device_private_entry(entry)) {
|
||||
@ -762,6 +764,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
is_cow_mapping(vm_flags)) {
|
||||
make_device_private_entry_read(&entry);
|
||||
pte = swp_entry_to_pte(entry);
|
||||
if (pte_swp_uffd_wp(*src_pte))
|
||||
pte = pte_swp_mkuffd_wp(pte);
|
||||
set_pte_at(src_mm, addr, src_pte, pte);
|
||||
}
|
||||
}
|
||||
@ -3098,6 +3102,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
flush_icache_page(vma, page);
|
||||
if (pte_swp_soft_dirty(vmf->orig_pte))
|
||||
pte = pte_mksoft_dirty(pte);
|
||||
if (pte_swp_uffd_wp(vmf->orig_pte)) {
|
||||
pte = pte_mkuffd_wp(pte);
|
||||
pte = pte_wrprotect(pte);
|
||||
}
|
||||
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
|
||||
arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
|
||||
vmf->orig_pte = pte;
|
||||
|
@ -243,11 +243,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
|
||||
entry = pte_to_swp_entry(*pvmw.pte);
|
||||
if (is_write_migration_entry(entry))
|
||||
pte = maybe_mkwrite(pte, vma);
|
||||
else if (pte_swp_uffd_wp(*pvmw.pte))
|
||||
pte = pte_mkuffd_wp(pte);
|
||||
|
||||
if (unlikely(is_zone_device_page(new))) {
|
||||
if (is_device_private_page(new)) {
|
||||
entry = make_device_private_entry(new, pte_write(pte));
|
||||
pte = swp_entry_to_pte(entry);
|
||||
if (pte_swp_uffd_wp(*pvmw.pte))
|
||||
pte = pte_mkuffd_wp(pte);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2338,6 +2342,8 @@ again:
|
||||
swp_pte = swp_entry_to_pte(entry);
|
||||
if (pte_soft_dirty(pte))
|
||||
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
||||
if (pte_uffd_wp(pte))
|
||||
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
||||
set_pte_at(mm, addr, ptep, swp_pte);
|
||||
|
||||
/*
|
||||
|
@ -139,11 +139,11 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
}
|
||||
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
|
||||
pages++;
|
||||
} else if (IS_ENABLED(CONFIG_MIGRATION)) {
|
||||
} else if (is_swap_pte(oldpte)) {
|
||||
swp_entry_t entry = pte_to_swp_entry(oldpte);
|
||||
pte_t newpte;
|
||||
|
||||
if (is_write_migration_entry(entry)) {
|
||||
pte_t newpte;
|
||||
/*
|
||||
* A protection check is difficult so
|
||||
* just be safe and disable write
|
||||
@ -152,22 +152,28 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
newpte = swp_entry_to_pte(entry);
|
||||
if (pte_swp_soft_dirty(oldpte))
|
||||
newpte = pte_swp_mksoft_dirty(newpte);
|
||||
set_pte_at(vma->vm_mm, addr, pte, newpte);
|
||||
|
||||
pages++;
|
||||
}
|
||||
|
||||
if (is_write_device_private_entry(entry)) {
|
||||
pte_t newpte;
|
||||
|
||||
if (pte_swp_uffd_wp(oldpte))
|
||||
newpte = pte_swp_mkuffd_wp(newpte);
|
||||
} else if (is_write_device_private_entry(entry)) {
|
||||
/*
|
||||
* We do not preserve soft-dirtiness. See
|
||||
* copy_one_pte() for explanation.
|
||||
*/
|
||||
make_device_private_entry_read(&entry);
|
||||
newpte = swp_entry_to_pte(entry);
|
||||
set_pte_at(vma->vm_mm, addr, pte, newpte);
|
||||
if (pte_swp_uffd_wp(oldpte))
|
||||
newpte = pte_swp_mkuffd_wp(newpte);
|
||||
} else {
|
||||
newpte = oldpte;
|
||||
}
|
||||
|
||||
if (uffd_wp)
|
||||
newpte = pte_swp_mkuffd_wp(newpte);
|
||||
else if (uffd_wp_resolve)
|
||||
newpte = pte_swp_clear_uffd_wp(newpte);
|
||||
|
||||
if (!pte_same(oldpte, newpte)) {
|
||||
set_pte_at(vma->vm_mm, addr, pte, newpte);
|
||||
pages++;
|
||||
}
|
||||
}
|
||||
|
@ -1502,6 +1502,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
||||
swp_pte = swp_entry_to_pte(entry);
|
||||
if (pte_soft_dirty(pteval))
|
||||
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
||||
if (pte_uffd_wp(pteval))
|
||||
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
||||
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
|
||||
/*
|
||||
* No need to invalidate here it will synchronize on
|
||||
@ -1601,6 +1603,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
||||
swp_pte = swp_entry_to_pte(entry);
|
||||
if (pte_soft_dirty(pteval))
|
||||
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
||||
if (pte_uffd_wp(pteval))
|
||||
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
||||
set_pte_at(mm, address, pvmw.pte, swp_pte);
|
||||
/*
|
||||
* No need to invalidate here it will synchronize on
|
||||
@ -1667,6 +1671,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
||||
swp_pte = swp_entry_to_pte(entry);
|
||||
if (pte_soft_dirty(pteval))
|
||||
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
||||
if (pte_uffd_wp(pteval))
|
||||
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
||||
set_pte_at(mm, address, pvmw.pte, swp_pte);
|
||||
/* Invalidate as we cleared the pte */
|
||||
mmu_notifier_invalidate_range(mm, address,
|
||||
|
Loading…
Reference in New Issue
Block a user