[PATCH] mm: pte_offset_map_lock loops
Convert those common loops using page_table_lock on the outside and pte_offset_map within to use just pte_offset_map_lock within instead. These all hold mmap_sem (some exclusively, some not), so at no level can a page table be whipped away from beneath them. But whereas pte_alloc loops tested with the "atomic" pmd_present, these loops are testing with pmd_none, which on i386 PAE tests both lower and upper halves. That's now unsafe, so add a cast into pmd_none to test only the vital lower half: we lose a little sensitivity to a corrupt middle directory, but not enough to worry about. It appears that i386 and UML were the only architectures vulnerable in this way, and pgd and pud no problem. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
8f4e2101fd
commit
705e87c0c3
@ -203,13 +203,14 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
struct mem_size_stats *mss)
|
struct mem_size_stats *mss)
|
||||||
{
|
{
|
||||||
pte_t *pte, ptent;
|
pte_t *pte, ptent;
|
||||||
|
spinlock_t *ptl;
|
||||||
unsigned long pfn;
|
unsigned long pfn;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
pte = pte_offset_map(pmd, addr);
|
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||||
do {
|
do {
|
||||||
ptent = *pte;
|
ptent = *pte;
|
||||||
if (pte_none(ptent) || !pte_present(ptent))
|
if (!pte_present(ptent))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
mss->resident += PAGE_SIZE;
|
mss->resident += PAGE_SIZE;
|
||||||
@ -230,8 +231,8 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
mss->private_clean += PAGE_SIZE;
|
mss->private_clean += PAGE_SIZE;
|
||||||
}
|
}
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
pte_unmap(pte - 1);
|
pte_unmap_unlock(pte - 1, ptl);
|
||||||
cond_resched_lock(&vma->vm_mm->page_table_lock);
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||||
@ -285,17 +286,11 @@ static inline void smaps_pgd_range(struct vm_area_struct *vma,
|
|||||||
static int show_smap(struct seq_file *m, void *v)
|
static int show_smap(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma = v;
|
struct vm_area_struct *vma = v;
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
struct mem_size_stats mss;
|
struct mem_size_stats mss;
|
||||||
|
|
||||||
memset(&mss, 0, sizeof mss);
|
memset(&mss, 0, sizeof mss);
|
||||||
|
if (vma->vm_mm)
|
||||||
if (mm) {
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
|
smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
return show_map_internal(m, v, &mss);
|
return show_map_internal(m, v, &mss);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,7 +203,8 @@ extern unsigned long pg0[];
|
|||||||
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
|
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
|
||||||
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
|
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
|
||||||
|
|
||||||
#define pmd_none(x) (!pmd_val(x))
|
/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
|
||||||
|
#define pmd_none(x) (!(unsigned long)pmd_val(x))
|
||||||
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
|
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
|
||||||
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
||||||
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
||||||
|
@ -138,7 +138,7 @@ extern unsigned long pg0[1024];
|
|||||||
|
|
||||||
#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
|
#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
|
||||||
|
|
||||||
#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE))
|
#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE))
|
||||||
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
||||||
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
|
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
|
||||||
#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
|
#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
|
||||||
|
@ -228,9 +228,9 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
{
|
{
|
||||||
pte_t *orig_pte;
|
pte_t *orig_pte;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
spin_lock(&vma->vm_mm->page_table_lock);
|
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||||
orig_pte = pte = pte_offset_map(pmd, addr);
|
|
||||||
do {
|
do {
|
||||||
unsigned long pfn;
|
unsigned long pfn;
|
||||||
unsigned int nid;
|
unsigned int nid;
|
||||||
@ -246,8 +246,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
if (!node_isset(nid, *nodes))
|
if (!node_isset(nid, *nodes))
|
||||||
break;
|
break;
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
pte_unmap(orig_pte);
|
pte_unmap_unlock(orig_pte, ptl);
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
|
||||||
return addr != end;
|
return addr != end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,8 +29,9 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|||||||
unsigned long addr, unsigned long end, pgprot_t newprot)
|
unsigned long addr, unsigned long end, pgprot_t newprot)
|
||||||
{
|
{
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
pte = pte_offset_map(pmd, addr);
|
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||||
do {
|
do {
|
||||||
if (pte_present(*pte)) {
|
if (pte_present(*pte)) {
|
||||||
pte_t ptent;
|
pte_t ptent;
|
||||||
@ -44,7 +45,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|||||||
lazy_mmu_prot_update(ptent);
|
lazy_mmu_prot_update(ptent);
|
||||||
}
|
}
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
pte_unmap(pte - 1);
|
pte_unmap_unlock(pte - 1, ptl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
|
static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
|
||||||
@ -88,7 +89,6 @@ static void change_protection(struct vm_area_struct *vma,
|
|||||||
BUG_ON(addr >= end);
|
BUG_ON(addr >= end);
|
||||||
pgd = pgd_offset(mm, addr);
|
pgd = pgd_offset(mm, addr);
|
||||||
flush_cache_range(vma, addr, end);
|
flush_cache_range(vma, addr, end);
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
do {
|
do {
|
||||||
next = pgd_addr_end(addr, end);
|
next = pgd_addr_end(addr, end);
|
||||||
if (pgd_none_or_clear_bad(pgd))
|
if (pgd_none_or_clear_bad(pgd))
|
||||||
@ -96,7 +96,6 @@ static void change_protection(struct vm_area_struct *vma,
|
|||||||
change_pud_range(mm, pgd, addr, next, newprot);
|
change_pud_range(mm, pgd, addr, next, newprot);
|
||||||
} while (pgd++, addr = next, addr != end);
|
} while (pgd++, addr = next, addr != end);
|
||||||
flush_tlb_range(vma, start, end);
|
flush_tlb_range(vma, start, end);
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
21
mm/msync.c
21
mm/msync.c
@ -17,28 +17,22 @@
|
|||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* Called with mm->page_table_lock held to protect against other
|
|
||||||
* threads/the swapper from ripping pte's out from under us.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
unsigned long addr, unsigned long end)
|
unsigned long addr, unsigned long end)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
int progress = 0;
|
int progress = 0;
|
||||||
|
|
||||||
again:
|
again:
|
||||||
pte = pte_offset_map(pmd, addr);
|
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||||
do {
|
do {
|
||||||
unsigned long pfn;
|
unsigned long pfn;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
if (progress >= 64) {
|
if (progress >= 64) {
|
||||||
progress = 0;
|
progress = 0;
|
||||||
if (need_resched() ||
|
if (need_resched() || need_lockbreak(ptl))
|
||||||
need_lockbreak(&mm->page_table_lock))
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
progress++;
|
progress++;
|
||||||
@ -58,8 +52,8 @@ again:
|
|||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
progress += 3;
|
progress += 3;
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
pte_unmap(pte - 1);
|
pte_unmap_unlock(pte - 1, ptl);
|
||||||
cond_resched_lock(&mm->page_table_lock);
|
cond_resched();
|
||||||
if (addr != end)
|
if (addr != end)
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
@ -97,7 +91,6 @@ static inline void msync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
|
|||||||
static void msync_page_range(struct vm_area_struct *vma,
|
static void msync_page_range(struct vm_area_struct *vma,
|
||||||
unsigned long addr, unsigned long end)
|
unsigned long addr, unsigned long end)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
|
|
||||||
@ -110,16 +103,14 @@ static void msync_page_range(struct vm_area_struct *vma,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
BUG_ON(addr >= end);
|
BUG_ON(addr >= end);
|
||||||
pgd = pgd_offset(mm, addr);
|
pgd = pgd_offset(vma->vm_mm, addr);
|
||||||
flush_cache_range(vma, addr, end);
|
flush_cache_range(vma, addr, end);
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
do {
|
do {
|
||||||
next = pgd_addr_end(addr, end);
|
next = pgd_addr_end(addr, end);
|
||||||
if (pgd_none_or_clear_bad(pgd))
|
if (pgd_none_or_clear_bad(pgd))
|
||||||
continue;
|
continue;
|
||||||
msync_pud_range(vma, pgd, addr, next);
|
msync_pud_range(vma, pgd, addr, next);
|
||||||
} while (pgd++, addr = next, addr != end);
|
} while (pgd++, addr = next, addr != end);
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -401,8 +401,6 @@ void free_swap_and_cache(swp_entry_t entry)
|
|||||||
* No need to decide whether this PTE shares the swap entry with others,
|
* No need to decide whether this PTE shares the swap entry with others,
|
||||||
* just let do_wp_page work it out if a write is requested later - to
|
* just let do_wp_page work it out if a write is requested later - to
|
||||||
* force COW, vm_page_prot omits write permission from any private vma.
|
* force COW, vm_page_prot omits write permission from any private vma.
|
||||||
*
|
|
||||||
* vma->vm_mm->page_table_lock is held.
|
|
||||||
*/
|
*/
|
||||||
static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
|
static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
|
||||||
unsigned long addr, swp_entry_t entry, struct page *page)
|
unsigned long addr, swp_entry_t entry, struct page *page)
|
||||||
@ -424,23 +422,25 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
unsigned long addr, unsigned long end,
|
unsigned long addr, unsigned long end,
|
||||||
swp_entry_t entry, struct page *page)
|
swp_entry_t entry, struct page *page)
|
||||||
{
|
{
|
||||||
pte_t *pte;
|
|
||||||
pte_t swp_pte = swp_entry_to_pte(entry);
|
pte_t swp_pte = swp_entry_to_pte(entry);
|
||||||
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
int found = 0;
|
||||||
|
|
||||||
pte = pte_offset_map(pmd, addr);
|
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||||
do {
|
do {
|
||||||
/*
|
/*
|
||||||
* swapoff spends a _lot_ of time in this loop!
|
* swapoff spends a _lot_ of time in this loop!
|
||||||
* Test inline before going to call unuse_pte.
|
* Test inline before going to call unuse_pte.
|
||||||
*/
|
*/
|
||||||
if (unlikely(pte_same(*pte, swp_pte))) {
|
if (unlikely(pte_same(*pte, swp_pte))) {
|
||||||
unuse_pte(vma, pte, addr, entry, page);
|
unuse_pte(vma, pte++, addr, entry, page);
|
||||||
pte_unmap(pte);
|
found = 1;
|
||||||
return 1;
|
break;
|
||||||
}
|
}
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
pte_unmap(pte - 1);
|
pte_unmap_unlock(pte - 1, ptl);
|
||||||
return 0;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||||
@ -522,12 +522,10 @@ static int unuse_mm(struct mm_struct *mm,
|
|||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
}
|
}
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||||
if (vma->anon_vma && unuse_vma(vma, entry, page))
|
if (vma->anon_vma && unuse_vma(vma, entry, page))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
/*
|
/*
|
||||||
* Currently unuse_mm cannot fail, but leave error handling
|
* Currently unuse_mm cannot fail, but leave error handling
|
||||||
|
Loading…
x
Reference in New Issue
Block a user