mm/mmap: move mmap_region() below do_munmap()
Relocation of code for the next commit. There should be no changes here. Link: https://lkml.kernel.org/r/20220906194824.2110408-28-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
d7c6229557
commit
e99668a564
490
mm/mmap.c
490
mm/mmap.c
@ -1720,251 +1720,6 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
|
||||
return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
|
||||
}
|
||||
|
||||
unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
struct vm_area_struct *next, *prev, *merge;
|
||||
pgoff_t pglen = len >> PAGE_SHIFT;
|
||||
unsigned long charged = 0;
|
||||
unsigned long end = addr + len;
|
||||
unsigned long merge_start = addr, merge_end = end;
|
||||
pgoff_t vm_pgoff;
|
||||
int error;
|
||||
MA_STATE(mas, &mm->mm_mt, addr, end - 1);
|
||||
|
||||
/* Check against address space limit. */
|
||||
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
|
||||
unsigned long nr_pages;
|
||||
|
||||
/*
|
||||
* MAP_FIXED may remove pages of mappings that intersects with
|
||||
* requested mapping. Account for the pages it would unmap.
|
||||
*/
|
||||
nr_pages = count_vma_pages_range(mm, addr, end);
|
||||
|
||||
if (!may_expand_vm(mm, vm_flags,
|
||||
(len >> PAGE_SHIFT) - nr_pages))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Unmap any existing mapping in the area */
|
||||
if (do_munmap(mm, addr, len, uf))
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Private writable mapping: check memory availability
|
||||
*/
|
||||
if (accountable_mapping(file, vm_flags)) {
|
||||
charged = len >> PAGE_SHIFT;
|
||||
if (security_vm_enough_memory_mm(mm, charged))
|
||||
return -ENOMEM;
|
||||
vm_flags |= VM_ACCOUNT;
|
||||
}
|
||||
|
||||
next = mas_next(&mas, ULONG_MAX);
|
||||
prev = mas_prev(&mas, 0);
|
||||
if (vm_flags & VM_SPECIAL)
|
||||
goto cannot_expand;
|
||||
|
||||
/* Attempt to expand an old mapping */
|
||||
/* Check next */
|
||||
if (next && next->vm_start == end && !vma_policy(next) &&
|
||||
can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
|
||||
NULL_VM_UFFD_CTX, NULL)) {
|
||||
merge_end = next->vm_end;
|
||||
vma = next;
|
||||
vm_pgoff = next->vm_pgoff - pglen;
|
||||
}
|
||||
|
||||
/* Check prev */
|
||||
if (prev && prev->vm_end == addr && !vma_policy(prev) &&
|
||||
(vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
|
||||
pgoff, vma->vm_userfaultfd_ctx, NULL) :
|
||||
can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
|
||||
NULL_VM_UFFD_CTX, NULL))) {
|
||||
merge_start = prev->vm_start;
|
||||
vma = prev;
|
||||
vm_pgoff = prev->vm_pgoff;
|
||||
}
|
||||
|
||||
|
||||
/* Actually expand, if possible */
|
||||
if (vma &&
|
||||
!vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
|
||||
khugepaged_enter_vma(vma, vm_flags);
|
||||
goto expanded;
|
||||
}
|
||||
|
||||
mas.index = addr;
|
||||
mas.last = end - 1;
|
||||
cannot_expand:
|
||||
/*
|
||||
* Determine the object being mapped and call the appropriate
|
||||
* specific mapper. the address has already been validated, but
|
||||
* not unmapped, but the maps are removed from the list.
|
||||
*/
|
||||
vma = vm_area_alloc(mm);
|
||||
if (!vma) {
|
||||
error = -ENOMEM;
|
||||
goto unacct_error;
|
||||
}
|
||||
|
||||
vma->vm_start = addr;
|
||||
vma->vm_end = end;
|
||||
vma->vm_flags = vm_flags;
|
||||
vma->vm_page_prot = vm_get_page_prot(vm_flags);
|
||||
vma->vm_pgoff = pgoff;
|
||||
|
||||
if (file) {
|
||||
if (vm_flags & VM_SHARED) {
|
||||
error = mapping_map_writable(file->f_mapping);
|
||||
if (error)
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
vma->vm_file = get_file(file);
|
||||
error = call_mmap(file, vma);
|
||||
if (error)
|
||||
goto unmap_and_free_vma;
|
||||
|
||||
/* Can addr have changed??
|
||||
*
|
||||
* Answer: Yes, several device drivers can do it in their
|
||||
* f_op->mmap method. -DaveM
|
||||
*/
|
||||
WARN_ON_ONCE(addr != vma->vm_start);
|
||||
|
||||
addr = vma->vm_start;
|
||||
mas_reset(&mas);
|
||||
|
||||
/*
|
||||
* If vm_flags changed after call_mmap(), we should try merge
|
||||
* vma again as we may succeed this time.
|
||||
*/
|
||||
if (unlikely(vm_flags != vma->vm_flags && prev)) {
|
||||
merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
|
||||
NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
|
||||
if (merge) {
|
||||
/*
|
||||
* ->mmap() can change vma->vm_file and fput
|
||||
* the original file. So fput the vma->vm_file
|
||||
* here or we would add an extra fput for file
|
||||
* and cause general protection fault
|
||||
* ultimately.
|
||||
*/
|
||||
fput(vma->vm_file);
|
||||
vm_area_free(vma);
|
||||
vma = merge;
|
||||
/* Update vm_flags to pick up the change. */
|
||||
addr = vma->vm_start;
|
||||
vm_flags = vma->vm_flags;
|
||||
goto unmap_writable;
|
||||
}
|
||||
}
|
||||
|
||||
vm_flags = vma->vm_flags;
|
||||
} else if (vm_flags & VM_SHARED) {
|
||||
error = shmem_zero_setup(vma);
|
||||
if (error)
|
||||
goto free_vma;
|
||||
} else {
|
||||
vma_set_anonymous(vma);
|
||||
}
|
||||
|
||||
/* Allow architectures to sanity-check the vm_flags */
|
||||
if (!arch_validate_flags(vma->vm_flags)) {
|
||||
error = -EINVAL;
|
||||
if (file)
|
||||
goto unmap_and_free_vma;
|
||||
else
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
|
||||
error = -ENOMEM;
|
||||
if (file)
|
||||
goto unmap_and_free_vma;
|
||||
else
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (vma->vm_file)
|
||||
i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
|
||||
vma_mas_store(vma, &mas);
|
||||
__vma_link_list(mm, vma, prev);
|
||||
mm->map_count++;
|
||||
if (vma->vm_file) {
|
||||
if (vma->vm_flags & VM_SHARED)
|
||||
mapping_allow_writable(vma->vm_file->f_mapping);
|
||||
|
||||
flush_dcache_mmap_lock(vma->vm_file->f_mapping);
|
||||
vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
|
||||
flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
|
||||
i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
* vma_merge() calls khugepaged_enter_vma() either, the below
|
||||
* call covers the non-merge case.
|
||||
*/
|
||||
khugepaged_enter_vma(vma, vma->vm_flags);
|
||||
|
||||
/* Once vma denies write, undo our temporary denial count */
|
||||
unmap_writable:
|
||||
if (file && vm_flags & VM_SHARED)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
file = vma->vm_file;
|
||||
expanded:
|
||||
perf_event_mmap(vma);
|
||||
|
||||
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
|
||||
if (vm_flags & VM_LOCKED) {
|
||||
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
|
||||
is_vm_hugetlb_page(vma) ||
|
||||
vma == get_gate_vma(current->mm))
|
||||
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
|
||||
else
|
||||
mm->locked_vm += (len >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
if (file)
|
||||
uprobe_mmap(vma);
|
||||
|
||||
/*
|
||||
* New (or expanded) vma always get soft dirty status.
|
||||
* Otherwise user-space soft-dirty page tracker won't
|
||||
* be able to distinguish situation when vma area unmapped,
|
||||
* then new mapped in-place (which must be aimed as
|
||||
* a completely new data area).
|
||||
*/
|
||||
vma->vm_flags |= VM_SOFTDIRTY;
|
||||
|
||||
vma_set_page_prot(vma);
|
||||
|
||||
validate_mm(mm);
|
||||
return addr;
|
||||
|
||||
unmap_and_free_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
|
||||
if (vm_flags & VM_SHARED)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
free_vma:
|
||||
vm_area_free(vma);
|
||||
unacct_error:
|
||||
if (charged)
|
||||
vm_unacct_memory(charged);
|
||||
validate_mm(mm);
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* unmapped_area() - Find an area between the low_limit and the high_limit with
|
||||
* the correct alignment and offset, all from @info. Note: current->mm is used
|
||||
@ -2840,6 +2595,251 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
|
||||
return __do_munmap(mm, start, len, uf, false);
|
||||
}
|
||||
|
||||
unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
struct vm_area_struct *next, *prev, *merge;
|
||||
pgoff_t pglen = len >> PAGE_SHIFT;
|
||||
unsigned long charged = 0;
|
||||
unsigned long end = addr + len;
|
||||
unsigned long merge_start = addr, merge_end = end;
|
||||
pgoff_t vm_pgoff;
|
||||
int error;
|
||||
MA_STATE(mas, &mm->mm_mt, addr, end - 1);
|
||||
|
||||
/* Check against address space limit. */
|
||||
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
|
||||
unsigned long nr_pages;
|
||||
|
||||
/*
|
||||
* MAP_FIXED may remove pages of mappings that intersects with
|
||||
* requested mapping. Account for the pages it would unmap.
|
||||
*/
|
||||
nr_pages = count_vma_pages_range(mm, addr, end);
|
||||
|
||||
if (!may_expand_vm(mm, vm_flags,
|
||||
(len >> PAGE_SHIFT) - nr_pages))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Unmap any existing mapping in the area */
|
||||
if (do_munmap(mm, addr, len, uf))
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Private writable mapping: check memory availability
|
||||
*/
|
||||
if (accountable_mapping(file, vm_flags)) {
|
||||
charged = len >> PAGE_SHIFT;
|
||||
if (security_vm_enough_memory_mm(mm, charged))
|
||||
return -ENOMEM;
|
||||
vm_flags |= VM_ACCOUNT;
|
||||
}
|
||||
|
||||
next = mas_next(&mas, ULONG_MAX);
|
||||
prev = mas_prev(&mas, 0);
|
||||
if (vm_flags & VM_SPECIAL)
|
||||
goto cannot_expand;
|
||||
|
||||
/* Attempt to expand an old mapping */
|
||||
/* Check next */
|
||||
if (next && next->vm_start == end && !vma_policy(next) &&
|
||||
can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
|
||||
NULL_VM_UFFD_CTX, NULL)) {
|
||||
merge_end = next->vm_end;
|
||||
vma = next;
|
||||
vm_pgoff = next->vm_pgoff - pglen;
|
||||
}
|
||||
|
||||
/* Check prev */
|
||||
if (prev && prev->vm_end == addr && !vma_policy(prev) &&
|
||||
(vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
|
||||
pgoff, vma->vm_userfaultfd_ctx, NULL) :
|
||||
can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
|
||||
NULL_VM_UFFD_CTX, NULL))) {
|
||||
merge_start = prev->vm_start;
|
||||
vma = prev;
|
||||
vm_pgoff = prev->vm_pgoff;
|
||||
}
|
||||
|
||||
|
||||
/* Actually expand, if possible */
|
||||
if (vma &&
|
||||
!vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
|
||||
khugepaged_enter_vma(vma, vm_flags);
|
||||
goto expanded;
|
||||
}
|
||||
|
||||
mas.index = addr;
|
||||
mas.last = end - 1;
|
||||
cannot_expand:
|
||||
/*
|
||||
* Determine the object being mapped and call the appropriate
|
||||
* specific mapper. the address has already been validated, but
|
||||
* not unmapped, but the maps are removed from the list.
|
||||
*/
|
||||
vma = vm_area_alloc(mm);
|
||||
if (!vma) {
|
||||
error = -ENOMEM;
|
||||
goto unacct_error;
|
||||
}
|
||||
|
||||
vma->vm_start = addr;
|
||||
vma->vm_end = end;
|
||||
vma->vm_flags = vm_flags;
|
||||
vma->vm_page_prot = vm_get_page_prot(vm_flags);
|
||||
vma->vm_pgoff = pgoff;
|
||||
|
||||
if (file) {
|
||||
if (vm_flags & VM_SHARED) {
|
||||
error = mapping_map_writable(file->f_mapping);
|
||||
if (error)
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
vma->vm_file = get_file(file);
|
||||
error = call_mmap(file, vma);
|
||||
if (error)
|
||||
goto unmap_and_free_vma;
|
||||
|
||||
/* Can addr have changed??
|
||||
*
|
||||
* Answer: Yes, several device drivers can do it in their
|
||||
* f_op->mmap method. -DaveM
|
||||
*/
|
||||
WARN_ON_ONCE(addr != vma->vm_start);
|
||||
|
||||
addr = vma->vm_start;
|
||||
mas_reset(&mas);
|
||||
|
||||
/*
|
||||
* If vm_flags changed after call_mmap(), we should try merge
|
||||
* vma again as we may succeed this time.
|
||||
*/
|
||||
if (unlikely(vm_flags != vma->vm_flags && prev)) {
|
||||
merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
|
||||
NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
|
||||
if (merge) {
|
||||
/*
|
||||
* ->mmap() can change vma->vm_file and fput
|
||||
* the original file. So fput the vma->vm_file
|
||||
* here or we would add an extra fput for file
|
||||
* and cause general protection fault
|
||||
* ultimately.
|
||||
*/
|
||||
fput(vma->vm_file);
|
||||
vm_area_free(vma);
|
||||
vma = merge;
|
||||
/* Update vm_flags to pick up the change. */
|
||||
addr = vma->vm_start;
|
||||
vm_flags = vma->vm_flags;
|
||||
goto unmap_writable;
|
||||
}
|
||||
}
|
||||
|
||||
vm_flags = vma->vm_flags;
|
||||
} else if (vm_flags & VM_SHARED) {
|
||||
error = shmem_zero_setup(vma);
|
||||
if (error)
|
||||
goto free_vma;
|
||||
} else {
|
||||
vma_set_anonymous(vma);
|
||||
}
|
||||
|
||||
/* Allow architectures to sanity-check the vm_flags */
|
||||
if (!arch_validate_flags(vma->vm_flags)) {
|
||||
error = -EINVAL;
|
||||
if (file)
|
||||
goto unmap_and_free_vma;
|
||||
else
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
|
||||
error = -ENOMEM;
|
||||
if (file)
|
||||
goto unmap_and_free_vma;
|
||||
else
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (vma->vm_file)
|
||||
i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
|
||||
vma_mas_store(vma, &mas);
|
||||
__vma_link_list(mm, vma, prev);
|
||||
mm->map_count++;
|
||||
if (vma->vm_file) {
|
||||
if (vma->vm_flags & VM_SHARED)
|
||||
mapping_allow_writable(vma->vm_file->f_mapping);
|
||||
|
||||
flush_dcache_mmap_lock(vma->vm_file->f_mapping);
|
||||
vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
|
||||
flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
|
||||
i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
* vma_merge() calls khugepaged_enter_vma() either, the below
|
||||
* call covers the non-merge case.
|
||||
*/
|
||||
khugepaged_enter_vma(vma, vma->vm_flags);
|
||||
|
||||
/* Once vma denies write, undo our temporary denial count */
|
||||
unmap_writable:
|
||||
if (file && vm_flags & VM_SHARED)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
file = vma->vm_file;
|
||||
expanded:
|
||||
perf_event_mmap(vma);
|
||||
|
||||
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
|
||||
if (vm_flags & VM_LOCKED) {
|
||||
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
|
||||
is_vm_hugetlb_page(vma) ||
|
||||
vma == get_gate_vma(current->mm))
|
||||
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
|
||||
else
|
||||
mm->locked_vm += (len >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
if (file)
|
||||
uprobe_mmap(vma);
|
||||
|
||||
/*
|
||||
* New (or expanded) vma always get soft dirty status.
|
||||
* Otherwise user-space soft-dirty page tracker won't
|
||||
* be able to distinguish situation when vma area unmapped,
|
||||
* then new mapped in-place (which must be aimed as
|
||||
* a completely new data area).
|
||||
*/
|
||||
vma->vm_flags |= VM_SOFTDIRTY;
|
||||
|
||||
vma_set_page_prot(vma);
|
||||
|
||||
validate_mm(mm);
|
||||
return addr;
|
||||
|
||||
unmap_and_free_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
|
||||
if (vm_flags & VM_SHARED)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
free_vma:
|
||||
vm_area_free(vma);
|
||||
unacct_error:
|
||||
if (charged)
|
||||
vm_unacct_memory(charged);
|
||||
validate_mm(mm);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
|
||||
{
|
||||
int ret;
|
||||
|
Loading…
Reference in New Issue
Block a user