mlock: do not hold mmap_sem for extended periods of time
__get_user_pages gets a new 'nonblocking' parameter to signal that the caller is prepared to re-acquire mmap_sem and retry the operation if needed. This is used to split off long operations if they are going to block on a disk transfer, or when we detect contention on the mmap_sem. [akpm@linux-foundation.org: remove ref to rwsem_is_contended()] Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
5fdb200213
commit
53a7706d5e
@ -243,7 +243,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
|
|||||||
|
|
||||||
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
unsigned long start, int len, unsigned int foll_flags,
|
unsigned long start, int len, unsigned int foll_flags,
|
||||||
struct page **pages, struct vm_area_struct **vmas);
|
struct page **pages, struct vm_area_struct **vmas,
|
||||||
|
int *nonblocking);
|
||||||
|
|
||||||
#define ZONE_RECLAIM_NOSCAN -2
|
#define ZONE_RECLAIM_NOSCAN -2
|
||||||
#define ZONE_RECLAIM_FULL -1
|
#define ZONE_RECLAIM_FULL -1
|
||||||
|
23
mm/memory.c
23
mm/memory.c
@ -1363,7 +1363,8 @@ no_page_table:
|
|||||||
|
|
||||||
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
unsigned long start, int nr_pages, unsigned int gup_flags,
|
unsigned long start, int nr_pages, unsigned int gup_flags,
|
||||||
struct page **pages, struct vm_area_struct **vmas)
|
struct page **pages, struct vm_area_struct **vmas,
|
||||||
|
int *nonblocking)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
unsigned long vm_flags;
|
unsigned long vm_flags;
|
||||||
@ -1463,10 +1464,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
while (!(page = follow_page(vma, start, foll_flags))) {
|
while (!(page = follow_page(vma, start, foll_flags))) {
|
||||||
int ret;
|
int ret;
|
||||||
|
unsigned int fault_flags = 0;
|
||||||
|
|
||||||
|
if (foll_flags & FOLL_WRITE)
|
||||||
|
fault_flags |= FAULT_FLAG_WRITE;
|
||||||
|
if (nonblocking)
|
||||||
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
||||||
|
|
||||||
ret = handle_mm_fault(mm, vma, start,
|
ret = handle_mm_fault(mm, vma, start,
|
||||||
(foll_flags & FOLL_WRITE) ?
|
fault_flags);
|
||||||
FAULT_FLAG_WRITE : 0);
|
|
||||||
|
|
||||||
if (ret & VM_FAULT_ERROR) {
|
if (ret & VM_FAULT_ERROR) {
|
||||||
if (ret & VM_FAULT_OOM)
|
if (ret & VM_FAULT_OOM)
|
||||||
@ -1482,6 +1488,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||||||
else
|
else
|
||||||
tsk->min_flt++;
|
tsk->min_flt++;
|
||||||
|
|
||||||
|
if (ret & VM_FAULT_RETRY) {
|
||||||
|
*nonblocking = 0;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The VM_FAULT_WRITE bit tells us that
|
* The VM_FAULT_WRITE bit tells us that
|
||||||
* do_wp_page has broken COW when necessary,
|
* do_wp_page has broken COW when necessary,
|
||||||
@ -1581,7 +1592,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||||||
if (force)
|
if (force)
|
||||||
flags |= FOLL_FORCE;
|
flags |= FOLL_FORCE;
|
||||||
|
|
||||||
return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(get_user_pages);
|
EXPORT_SYMBOL(get_user_pages);
|
||||||
|
|
||||||
@ -1606,7 +1618,8 @@ struct page *get_dump_page(unsigned long addr)
|
|||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
if (__get_user_pages(current, current->mm, addr, 1,
|
if (__get_user_pages(current, current->mm, addr, 1,
|
||||||
FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
|
FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
|
||||||
|
NULL) < 1)
|
||||||
return NULL;
|
return NULL;
|
||||||
flush_cache_page(vma, addr, page_to_pfn(page));
|
flush_cache_page(vma, addr, page_to_pfn(page));
|
||||||
return page;
|
return page;
|
||||||
|
40
mm/mlock.c
40
mm/mlock.c
@ -155,13 +155,13 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
|
|||||||
* vma->vm_mm->mmap_sem must be held for at least read.
|
* vma->vm_mm->mmap_sem must be held for at least read.
|
||||||
*/
|
*/
|
||||||
static long __mlock_vma_pages_range(struct vm_area_struct *vma,
|
static long __mlock_vma_pages_range(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end)
|
unsigned long start, unsigned long end,
|
||||||
|
int *nonblocking)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
unsigned long addr = start;
|
unsigned long addr = start;
|
||||||
int nr_pages = (end - start) / PAGE_SIZE;
|
int nr_pages = (end - start) / PAGE_SIZE;
|
||||||
int gup_flags;
|
int gup_flags;
|
||||||
int ret;
|
|
||||||
|
|
||||||
VM_BUG_ON(start & ~PAGE_MASK);
|
VM_BUG_ON(start & ~PAGE_MASK);
|
||||||
VM_BUG_ON(end & ~PAGE_MASK);
|
VM_BUG_ON(end & ~PAGE_MASK);
|
||||||
@ -187,9 +187,8 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
|
|||||||
nr_pages--;
|
nr_pages--;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags,
|
return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
|
||||||
NULL, NULL);
|
NULL, NULL, nonblocking);
|
||||||
return max(ret, 0); /* 0 or negative error code */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -233,7 +232,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
|
|||||||
is_vm_hugetlb_page(vma) ||
|
is_vm_hugetlb_page(vma) ||
|
||||||
vma == get_gate_vma(current))) {
|
vma == get_gate_vma(current))) {
|
||||||
|
|
||||||
__mlock_vma_pages_range(vma, start, end);
|
__mlock_vma_pages_range(vma, start, end, NULL);
|
||||||
|
|
||||||
/* Hide errors from mmap() and other callers */
|
/* Hide errors from mmap() and other callers */
|
||||||
return 0;
|
return 0;
|
||||||
@ -429,21 +428,23 @@ static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
|
|||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
unsigned long end, nstart, nend;
|
unsigned long end, nstart, nend;
|
||||||
struct vm_area_struct *vma = NULL;
|
struct vm_area_struct *vma = NULL;
|
||||||
|
int locked = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
VM_BUG_ON(start & ~PAGE_MASK);
|
VM_BUG_ON(start & ~PAGE_MASK);
|
||||||
VM_BUG_ON(len != PAGE_ALIGN(len));
|
VM_BUG_ON(len != PAGE_ALIGN(len));
|
||||||
end = start + len;
|
end = start + len;
|
||||||
|
|
||||||
down_read(&mm->mmap_sem);
|
|
||||||
for (nstart = start; nstart < end; nstart = nend) {
|
for (nstart = start; nstart < end; nstart = nend) {
|
||||||
/*
|
/*
|
||||||
* We want to fault in pages for [nstart; end) address range.
|
* We want to fault in pages for [nstart; end) address range.
|
||||||
* Find first corresponding VMA.
|
* Find first corresponding VMA.
|
||||||
*/
|
*/
|
||||||
if (!vma)
|
if (!locked) {
|
||||||
|
locked = 1;
|
||||||
|
down_read(&mm->mmap_sem);
|
||||||
vma = find_vma(mm, nstart);
|
vma = find_vma(mm, nstart);
|
||||||
else
|
} else if (nstart >= vma->vm_end)
|
||||||
vma = vma->vm_next;
|
vma = vma->vm_next;
|
||||||
if (!vma || vma->vm_start >= end)
|
if (!vma || vma->vm_start >= end)
|
||||||
break;
|
break;
|
||||||
@ -457,19 +458,24 @@ static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
|
|||||||
if (nstart < vma->vm_start)
|
if (nstart < vma->vm_start)
|
||||||
nstart = vma->vm_start;
|
nstart = vma->vm_start;
|
||||||
/*
|
/*
|
||||||
* Now fault in a range of pages within the first VMA.
|
* Now fault in a range of pages. __mlock_vma_pages_range()
|
||||||
|
* double checks the vma flags, so that it won't mlock pages
|
||||||
|
* if the vma was already munlocked.
|
||||||
*/
|
*/
|
||||||
ret = __mlock_vma_pages_range(vma, nstart, nend);
|
ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
|
||||||
if (ret < 0 && ignore_errors) {
|
if (ret < 0) {
|
||||||
ret = 0;
|
if (ignore_errors) {
|
||||||
continue; /* continue at next VMA */
|
ret = 0;
|
||||||
}
|
continue; /* continue at next VMA */
|
||||||
if (ret) {
|
}
|
||||||
ret = __mlock_posix_error_return(ret);
|
ret = __mlock_posix_error_return(ret);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
nend = nstart + ret * PAGE_SIZE;
|
||||||
|
ret = 0;
|
||||||
}
|
}
|
||||||
up_read(&mm->mmap_sem);
|
if (locked)
|
||||||
|
up_read(&mm->mmap_sem);
|
||||||
return ret; /* 0 or negative error code */
|
return ret; /* 0 or negative error code */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,7 +127,8 @@ unsigned int kobjsize(const void *objp)
|
|||||||
|
|
||||||
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
unsigned long start, int nr_pages, unsigned int foll_flags,
|
unsigned long start, int nr_pages, unsigned int foll_flags,
|
||||||
struct page **pages, struct vm_area_struct **vmas)
|
struct page **pages, struct vm_area_struct **vmas,
|
||||||
|
int *retry)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
unsigned long vm_flags;
|
unsigned long vm_flags;
|
||||||
@ -185,7 +186,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||||||
if (force)
|
if (force)
|
||||||
flags |= FOLL_FORCE;
|
flags |= FOLL_FORCE;
|
||||||
|
|
||||||
return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(get_user_pages);
|
EXPORT_SYMBOL(get_user_pages);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user