mm: allow ->huge_fault() to be called without the mmap_lock held
Remove the checks for the VMA lock being held, allowing the page fault path to call into the filesystem instead of retrying with the mmap_lock held. This will improve scalability for DAX page faults. Also update the documentation to match (and fix some other changes that have happened recently). Link: https://lkml.kernel.org/r/20230818202335.2739663-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
051ddcfeb1
commit
40d49a3c9e
@ -628,26 +628,29 @@ vm_operations_struct
|
||||
|
||||
prototypes::
|
||||
|
||||
void (*open)(struct vm_area_struct*);
|
||||
void (*close)(struct vm_area_struct*);
|
||||
vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *);
|
||||
void (*open)(struct vm_area_struct *);
|
||||
void (*close)(struct vm_area_struct *);
|
||||
vm_fault_t (*fault)(struct vm_fault *);
|
||||
vm_fault_t (*huge_fault)(struct vm_fault *, unsigned int order);
|
||||
vm_fault_t (*map_pages)(struct vm_fault *, pgoff_t start, pgoff_t end);
|
||||
vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
|
||||
vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
|
||||
int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
|
||||
|
||||
locking rules:
|
||||
|
||||
============= ========= ===========================
|
||||
============= ========== ===========================
|
||||
ops mmap_lock PageLocked(page)
|
||||
============= ========= ===========================
|
||||
open: yes
|
||||
close: yes
|
||||
fault: yes can return with page locked
|
||||
map_pages: read
|
||||
page_mkwrite: yes can return with page locked
|
||||
pfn_mkwrite: yes
|
||||
access: yes
|
||||
============= ========= ===========================
|
||||
============= ========== ===========================
|
||||
open: write
|
||||
close: read/write
|
||||
fault: read can return with page locked
|
||||
huge_fault: maybe-read
|
||||
map_pages: maybe-read
|
||||
page_mkwrite: read can return with page locked
|
||||
pfn_mkwrite: read
|
||||
access: read
|
||||
============= ========== ===========================
|
||||
|
||||
->fault() is called when a previously not present pte is about to be faulted
|
||||
in. The filesystem must find and return the page associated with the passed in
|
||||
@ -657,6 +660,13 @@ then ensure the page is not already truncated (invalidate_lock will block
|
||||
subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
|
||||
locked. The VM will unlock the page.
|
||||
|
||||
->huge_fault() is called when there is no PUD or PMD entry present. This
|
||||
gives the filesystem the opportunity to install a PUD or PMD sized page.
|
||||
Filesystems can also use the ->fault method to return a PMD sized page,
|
||||
so implementing this function may not be necessary. In particular,
|
||||
filesystems should not call filemap_fault() from ->huge_fault().
|
||||
The mmap_lock may not be held when this method is called.
|
||||
|
||||
->map_pages() is called when VM asks to map easy accessible pages.
|
||||
Filesystem should find and map pages associated with offsets from "start_pgoff"
|
||||
till "end_pgoff". ->map_pages() is called with the RCU lock held and must
|
||||
|
@ -943,3 +943,14 @@ file pointer instead of struct dentry pointer. d_tmpfile() is similarly
|
||||
changed to simplify callers. The passed file is in a non-open state and on
|
||||
success must be opened before returning (e.g. by calling
|
||||
finish_open_simple()).
|
||||
|
||||
---
|
||||
|
||||
**mandatory**
|
||||
|
||||
Calling convention for ->huge_fault has changed. It now takes a page
|
||||
order instead of an enum page_entry_size, and it may be called without the
|
||||
mmap_lock held. All in-tree users have been audited and do not seem to
|
||||
depend on the mmap_lock being held, but out of tree users should verify
|
||||
for themselves. If they do need it, they can return VM_FAULT_RETRY to
|
||||
be called with the mmap_lock held.
|
||||
|
22
mm/memory.c
22
mm/memory.c
@ -4854,13 +4854,8 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
if (vma_is_anonymous(vma))
|
||||
return do_huge_pmd_anonymous_page(vmf);
|
||||
if (vma->vm_ops->huge_fault) {
|
||||
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
|
||||
vma_end_read(vma);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
if (vma->vm_ops->huge_fault)
|
||||
return vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
|
||||
}
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
||||
@ -4880,10 +4875,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
|
||||
|
||||
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
|
||||
if (vma->vm_ops->huge_fault) {
|
||||
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
|
||||
vma_end_read(vma);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
return ret;
|
||||
@ -4904,13 +4895,8 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
|
||||
/* No support for anonymous transparent PUD pages yet */
|
||||
if (vma_is_anonymous(vma))
|
||||
return VM_FAULT_FALLBACK;
|
||||
if (vma->vm_ops->huge_fault) {
|
||||
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
|
||||
vma_end_read(vma);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
if (vma->vm_ops->huge_fault)
|
||||
return vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
@ -4927,10 +4913,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
|
||||
goto split;
|
||||
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
|
||||
if (vma->vm_ops->huge_fault) {
|
||||
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
|
||||
vma_end_read(vma);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
return ret;
|
||||
|
Loading…
x
Reference in New Issue
Block a user