2005-06-25 14:58:02 -07:00
/*
2007-10-12 21:10:53 -04:00
* handle transition of Linux booting another kernel
2005-06-25 14:58:02 -07:00
* Copyright ( C ) 2002 - 2005 Eric Biederman < ebiederm @ xmission . com >
*
* This source code is licensed under the GNU General Public License ,
* Version 2. See the file COPYING for more details .
*/
# include <linux/mm.h>
# include <linux/kexec.h>
# include <linux/string.h>
# include <linux/reboot.h>
2007-10-16 23:27:27 -07:00
# include <linux/numa.h>
2005-06-25 14:58:02 -07:00
# include <asm/pgtable.h>
# include <asm/tlbflush.h>
# include <asm/mmu_context.h>
# include <asm/io.h>
2005-07-29 13:25:28 -06:00
2006-09-26 10:52:38 +02:00
# define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u64 kexec_pgd [ 512 ] PAGE_ALIGNED ;
static u64 kexec_pud0 [ 512 ] PAGE_ALIGNED ;
static u64 kexec_pmd0 [ 512 ] PAGE_ALIGNED ;
static u64 kexec_pte0 [ 512 ] PAGE_ALIGNED ;
static u64 kexec_pud1 [ 512 ] PAGE_ALIGNED ;
static u64 kexec_pmd1 [ 512 ] PAGE_ALIGNED ;
static u64 kexec_pte1 [ 512 ] PAGE_ALIGNED ;
2005-07-29 13:25:28 -06:00
static void init_level2_page ( pmd_t * level2p , unsigned long addr )
2005-06-25 14:58:02 -07:00
{
unsigned long end_addr ;
2005-06-25 14:58:28 -07:00
2005-06-25 14:58:02 -07:00
addr & = PAGE_MASK ;
2005-07-29 13:25:28 -06:00
end_addr = addr + PUD_SIZE ;
2005-06-25 14:58:28 -07:00
while ( addr < end_addr ) {
2005-07-29 13:25:28 -06:00
set_pmd ( level2p + + , __pmd ( addr | __PAGE_KERNEL_LARGE_EXEC ) ) ;
addr + = PMD_SIZE ;
2005-06-25 14:58:02 -07:00
}
}
2005-07-29 13:25:28 -06:00
static int init_level3_page ( struct kimage * image , pud_t * level3p ,
2005-06-25 14:58:28 -07:00
unsigned long addr , unsigned long last_addr )
2005-06-25 14:58:02 -07:00
{
unsigned long end_addr ;
int result ;
2005-06-25 14:58:28 -07:00
2005-06-25 14:58:02 -07:00
result = 0 ;
addr & = PAGE_MASK ;
2005-07-29 13:25:28 -06:00
end_addr = addr + PGDIR_SIZE ;
2005-06-25 14:58:28 -07:00
while ( ( addr < last_addr ) & & ( addr < end_addr ) ) {
2005-06-25 14:58:02 -07:00
struct page * page ;
2005-07-29 13:25:28 -06:00
pmd_t * level2p ;
2005-06-25 14:58:28 -07:00
2005-06-25 14:58:02 -07:00
page = kimage_alloc_control_pages ( image , 0 ) ;
if ( ! page ) {
result = - ENOMEM ;
goto out ;
}
2005-07-29 13:25:28 -06:00
level2p = ( pmd_t * ) page_address ( page ) ;
2005-06-25 14:58:02 -07:00
init_level2_page ( level2p , addr ) ;
2005-07-29 13:25:28 -06:00
set_pud ( level3p + + , __pud ( __pa ( level2p ) | _KERNPG_TABLE ) ) ;
addr + = PUD_SIZE ;
2005-06-25 14:58:02 -07:00
}
/* clear the unused entries */
2005-06-25 14:58:28 -07:00
while ( addr < end_addr ) {
2005-07-29 13:25:28 -06:00
pud_clear ( level3p + + ) ;
addr + = PUD_SIZE ;
2005-06-25 14:58:02 -07:00
}
out :
return result ;
}
2005-07-29 13:25:28 -06:00
static int init_level4_page ( struct kimage * image , pgd_t * level4p ,
2005-06-25 14:58:28 -07:00
unsigned long addr , unsigned long last_addr )
2005-06-25 14:58:02 -07:00
{
unsigned long end_addr ;
int result ;
2005-06-25 14:58:28 -07:00
2005-06-25 14:58:02 -07:00
result = 0 ;
addr & = PAGE_MASK ;
2005-07-29 13:25:28 -06:00
end_addr = addr + ( PTRS_PER_PGD * PGDIR_SIZE ) ;
2005-06-25 14:58:28 -07:00
while ( ( addr < last_addr ) & & ( addr < end_addr ) ) {
2005-06-25 14:58:02 -07:00
struct page * page ;
2005-07-29 13:25:28 -06:00
pud_t * level3p ;
2005-06-25 14:58:28 -07:00
2005-06-25 14:58:02 -07:00
page = kimage_alloc_control_pages ( image , 0 ) ;
if ( ! page ) {
result = - ENOMEM ;
goto out ;
}
2005-07-29 13:25:28 -06:00
level3p = ( pud_t * ) page_address ( page ) ;
2005-06-25 14:58:02 -07:00
result = init_level3_page ( image , level3p , addr , last_addr ) ;
if ( result ) {
goto out ;
}
2005-07-29 13:25:28 -06:00
set_pgd ( level4p + + , __pgd ( __pa ( level3p ) | _KERNPG_TABLE ) ) ;
addr + = PGDIR_SIZE ;
2005-06-25 14:58:02 -07:00
}
/* clear the unused entries */
2005-06-25 14:58:28 -07:00
while ( addr < end_addr ) {
2005-07-29 13:25:28 -06:00
pgd_clear ( level4p + + ) ;
addr + = PGDIR_SIZE ;
2005-06-25 14:58:02 -07:00
}
2005-06-25 14:58:28 -07:00
out :
2005-06-25 14:58:02 -07:00
return result ;
}
static int init_pgtable ( struct kimage * image , unsigned long start_pgtable )
{
2005-07-29 13:25:28 -06:00
pgd_t * level4p ;
level4p = ( pgd_t * ) __va ( start_pgtable ) ;
2005-06-25 14:58:28 -07:00
return init_level4_page ( image , level4p , 0 , end_pfn < < PAGE_SHIFT ) ;
2005-06-25 14:58:02 -07:00
}
static void set_idt ( void * newidt , u16 limit )
{
2005-07-29 13:02:09 -06:00
struct desc_ptr curidt ;
2005-06-25 14:58:02 -07:00
/* x86-64 supports unaliged loads & stores */
2005-07-29 13:02:09 -06:00
curidt . size = limit ;
curidt . address = ( unsigned long ) newidt ;
2005-06-25 14:58:02 -07:00
__asm__ __volatile__ (
2005-07-29 13:02:09 -06:00
" lidtq %0 \n "
: : " m " ( curidt )
2005-06-25 14:58:02 -07:00
) ;
} ;
static void set_gdt ( void * newgdt , u16 limit )
{
2005-07-29 13:02:09 -06:00
struct desc_ptr curgdt ;
2005-06-25 14:58:02 -07:00
/* x86-64 supports unaligned loads & stores */
2005-07-29 13:02:09 -06:00
curgdt . size = limit ;
curgdt . address = ( unsigned long ) newgdt ;
2005-06-25 14:58:02 -07:00
__asm__ __volatile__ (
2005-07-29 13:02:09 -06:00
" lgdtq %0 \n "
: : " m " ( curgdt )
2005-06-25 14:58:02 -07:00
) ;
} ;
static void load_segments ( void )
{
__asm__ __volatile__ (
2005-07-29 13:02:09 -06:00
" \t movl %0,%%ds \n "
" \t movl %0,%%es \n "
" \t movl %0,%%ss \n "
" \t movl %0,%%fs \n "
" \t movl %0,%%gs \n "
2006-03-07 21:55:48 -08:00
: : " a " ( __KERNEL_DS ) : " memory "
2005-06-25 14:58:02 -07:00
) ;
}
int machine_kexec_prepare ( struct kimage * image )
{
2006-09-26 10:52:38 +02:00
unsigned long start_pgtable ;
2005-06-25 14:58:02 -07:00
int result ;
/* Calculate the offsets */
2005-06-25 14:58:28 -07:00
start_pgtable = page_to_pfn ( image - > control_code_page ) < < PAGE_SHIFT ;
2005-06-25 14:58:02 -07:00
/* Setup the identity mapped 64bit page table */
result = init_pgtable ( image , start_pgtable ) ;
2005-06-25 14:58:28 -07:00
if ( result )
2005-06-25 14:58:02 -07:00
return result ;
return 0 ;
}
void machine_kexec_cleanup ( struct kimage * image )
{
return ;
}
/*
* Do not allocate memory ( or fail in any way ) in machine_kexec ( ) .
* We are past the point of no return , committed to rebooting now .
*/
NORET_TYPE void machine_kexec ( struct kimage * image )
{
2006-09-26 10:52:38 +02:00
unsigned long page_list [ PAGES_NR ] ;
void * control_page ;
2005-06-25 14:58:02 -07:00
/* Interrupts aren't acceptable while we reboot */
local_irq_disable ( ) ;
2006-09-26 10:52:38 +02:00
control_page = page_address ( image - > control_code_page ) + PAGE_SIZE ;
memcpy ( control_page , relocate_kernel , PAGE_SIZE ) ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_CONTROL_PAGE ] = virt_to_phys ( control_page ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_CONTROL_PAGE ] = ( unsigned long ) relocate_kernel ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PGD ] = virt_to_phys ( & kexec_pgd ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PGD ] = ( unsigned long ) kexec_pgd ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PUD_0 ] = virt_to_phys ( & kexec_pud0 ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PUD_0 ] = ( unsigned long ) kexec_pud0 ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PMD_0 ] = virt_to_phys ( & kexec_pmd0 ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PMD_0 ] = ( unsigned long ) kexec_pmd0 ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PTE_0 ] = virt_to_phys ( & kexec_pte0 ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PTE_0 ] = ( unsigned long ) kexec_pte0 ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PUD_1 ] = virt_to_phys ( & kexec_pud1 ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PUD_1 ] = ( unsigned long ) kexec_pud1 ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PMD_1 ] = virt_to_phys ( & kexec_pmd1 ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PMD_1 ] = ( unsigned long ) kexec_pmd1 ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
page_list [ PA_PTE_1 ] = virt_to_phys ( & kexec_pte1 ) ;
2006-09-26 10:52:38 +02:00
page_list [ VA_PTE_1 ] = ( unsigned long ) kexec_pte1 ;
page_list [ PA_TABLE_PAGE ] =
( unsigned long ) __pa ( page_address ( image - > control_code_page ) ) ;
2005-06-25 14:58:02 -07:00
2006-07-30 03:03:20 -07:00
/* The segment registers are funny things, they have both a
* visible and an invisible part . Whenever the visible part is
* set to a specific selector , the invisible part is loaded
* with from a table in memory . At no other time is the
* descriptor table in memory accessed .
2005-06-25 14:58:02 -07:00
*
* I take advantage of this here by force loading the
* segments , before I zap the gdt with an invalid value .
*/
load_segments ( ) ;
/* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt .
*/
set_gdt ( phys_to_virt ( 0 ) , 0 ) ;
set_idt ( phys_to_virt ( 0 ) , 0 ) ;
2006-09-26 10:52:38 +02:00
2005-06-25 14:58:02 -07:00
/* now call it */
2006-09-26 10:52:38 +02:00
relocate_kernel ( ( unsigned long ) image - > head , ( unsigned long ) page_list ,
image - > start ) ;
2005-06-25 14:58:02 -07:00
}
2006-09-26 10:52:32 +02:00
2007-10-16 23:27:27 -07:00
void arch_crash_save_vmcoreinfo ( void )
{
2007-10-26 14:19:26 +09:00
VMCOREINFO_SYMBOL ( init_level4_pgt ) ;
2007-10-16 23:27:27 -07:00
}