58d5d0d8dd
lockdep just caught this one: ================================= [ INFO: inconsistent lock state ] 2.6.24 #38 --------------------------------- inconsistent {in-softirq-W} -> {softirq-on-W} usage. swapper/1 [HC0[0]:SC0[0]:HE1:SE1] takes: (pgd_lock){-+..}, at: [<ffffffff8022a9ea>] mm_init+0x1da/0x250 {in-softirq-W} state was registered at: [<ffffffffffffffff>] 0xffffffffffffffff irq event stamp: 394559 hardirqs last enabled at (394559): [<ffffffff80267f0a>] get_page_from_freelist+0x30a/0x4c0 hardirqs last disabled at (394558): [<ffffffff80267d25>] get_page_from_freelist+0x125/0x4c0 softirqs last enabled at (393952): [<ffffffff80232f8e>] __do_softirq+0xce/0xe0 softirqs last disabled at (393945): [<ffffffff8020c57c>] call_softirq+0x1c/0x30 other info that might help us debug this: no locks held by swapper/1. stack backtrace: Pid: 1, comm: swapper Not tainted 2.6.24 #38 Call Trace: [<ffffffff8024e1fb>] print_usage_bug+0x18b/0x190 [<ffffffff8024f55d>] mark_lock+0x53d/0x560 [<ffffffff8024fffa>] __lock_acquire+0x3ca/0xed0 [<ffffffff80250ba8>] lock_acquire+0xa8/0xe0 [<ffffffff8022a9ea>] ? mm_init+0x1da/0x250 [<ffffffff809bcd10>] _spin_lock+0x30/0x70 [<ffffffff8022a9ea>] mm_init+0x1da/0x250 [<ffffffff8022aa99>] mm_alloc+0x39/0x50 [<ffffffff8028b95a>] bprm_mm_init+0x2a/0x1a0 [<ffffffff8028d12b>] do_execve+0x7b/0x220 [<ffffffff80209776>] sys_execve+0x46/0x70 [<ffffffff8020c214>] kernel_execve+0x64/0xd0 [<ffffffff8020901e>] ? _stext+0x1e/0x20 [<ffffffff802090ba>] init_post+0x9a/0xf0 [<ffffffff809bc5f6>] ? trace_hardirqs_on_thunk+0x35/0x3a [<ffffffff8024f75a>] ? trace_hardirqs_on+0xba/0xd0 [<ffffffff8020c1a8>] ? child_rip+0xa/0x12 [<ffffffff8020bcbc>] ? restore_args+0x0/0x44 [<ffffffff8020c19e>] ? child_rip+0x0/0x12 turns out that pgd_lock has been used on 64-bit x86 in an irq-unsafe way for almost two years, since commit 8c914cb704a11460e. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
122 lines
3.1 KiB
C
122 lines
3.1 KiB
C
#ifndef _X86_64_PGALLOC_H
|
|
#define _X86_64_PGALLOC_H
|
|
|
|
#include <asm/pda.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/mm.h>
|
|
|
|
#define pmd_populate_kernel(mm, pmd, pte) \
|
|
set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
|
|
#define pud_populate(mm, pud, pmd) \
|
|
set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)))
|
|
#define pgd_populate(mm, pgd, pud) \
|
|
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)))
|
|
|
|
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
|
|
{
|
|
set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
|
|
}
|
|
|
|
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|
{
|
|
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pmd);
|
|
}
|
|
|
|
static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
|
|
{
|
|
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pud);
|
|
}
|
|
|
|
static inline void pgd_list_add(pgd_t *pgd)
|
|
{
|
|
struct page *page = virt_to_page(pgd);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&pgd_lock, flags);
|
|
list_add(&page->lru, &pgd_list);
|
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
|
|
static inline void pgd_list_del(pgd_t *pgd)
|
|
{
|
|
struct page *page = virt_to_page(pgd);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&pgd_lock, flags);
|
|
list_del(&page->lru);
|
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
|
|
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
unsigned boundary;
|
|
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
|
|
if (!pgd)
|
|
return NULL;
|
|
pgd_list_add(pgd);
|
|
/*
|
|
* Copy kernel pointers in from init.
|
|
* Could keep a freelist or slab cache of those because the kernel
|
|
* part never changes.
|
|
*/
|
|
boundary = pgd_index(__PAGE_OFFSET);
|
|
memset(pgd, 0, boundary * sizeof(pgd_t));
|
|
memcpy(pgd + boundary,
|
|
init_level4_pgt + boundary,
|
|
(PTRS_PER_PGD - boundary) * sizeof(pgd_t));
|
|
return pgd;
|
|
}
|
|
|
|
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|
{
|
|
BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
|
|
pgd_list_del(pgd);
|
|
free_page((unsigned long)pgd);
|
|
}
|
|
|
|
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
if (!p)
|
|
return NULL;
|
|
return virt_to_page(p);
|
|
}
|
|
|
|
/* Should really implement gc for free page table pages. This could be
|
|
done with a reference count in struct page. */
|
|
|
|
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
|
{
|
|
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pte);
|
|
}
|
|
|
|
static inline void pte_free(struct mm_struct *mm, struct page *pte)
|
|
{
|
|
__free_page(pte);
|
|
}
|
|
|
|
#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
|
|
|
|
#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
|
|
#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
|
|
|
|
#endif /* _X86_64_PGALLOC_H */
|