2005-04-16 15:20:36 -07:00
/*
* Copyright ( C ) 1995 Linus Torvalds
*
* Support of BIGMEM added by Gerhard Wichert , Siemens AG , July 1999
*
* Memory region support
* David Parsons < orc @ pell . chi . il . us > , July - August 1999
*
* Added E820 sanitization routine ( removes overlapping memory regions ) ;
* Brian Moyle < bmoyle @ mvista . com > , February 2001
*
* Moved CPU detection code to cpu / $ { cpu } . c
* Patrick Mochel < mochel @ osdl . org > , March 2002
*
* Provisions for empty E820 memory regions ( reported by certain BIOSes ) .
* Alex Achenbach < xela @ slit . de > , December 2002.
*
*/
/*
* This file handles the architecture - dependent parts of initialization
*/
# include <linux/sched.h>
# include <linux/mm.h>
2005-06-23 00:07:57 -07:00
# include <linux/mmzone.h>
2006-07-10 04:44:13 -07:00
# include <linux/screen_info.h>
2005-04-16 15:20:36 -07:00
# include <linux/ioport.h>
# include <linux/acpi.h>
# include <linux/apm_bios.h>
# include <linux/initrd.h>
# include <linux/bootmem.h>
# include <linux/seq_file.h>
# include <linux/console.h>
# include <linux/mca.h>
# include <linux/root_dev.h>
# include <linux/highmem.h>
# include <linux/module.h>
# include <linux/efi.h>
# include <linux/init.h>
# include <linux/edd.h>
2008-04-09 19:50:41 -07:00
# include <linux/iscsi_ibft.h>
2005-04-16 15:20:36 -07:00
# include <linux/nodemask.h>
2005-06-25 14:58:01 -07:00
# include <linux/kexec.h>
2005-06-25 14:58:20 -07:00
# include <linux/crash_dump.h>
2006-01-11 22:43:33 +01:00
# include <linux/dmi.h>
2006-03-27 01:16:04 -08:00
# include <linux/pfn.h>
2008-01-30 13:30:16 +01:00
# include <linux/pci.h>
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 13:34:11 +01:00
# include <linux/init_ohci1394_dma.h>
2008-02-15 17:52:48 -02:00
# include <linux/kvm_para.h>
2005-06-25 14:58:01 -07:00
2005-04-16 15:20:36 -07:00
# include <video/edid.h>
2005-06-25 14:58:01 -07:00
2008-01-30 13:33:32 +01:00
# include <asm/mtrr.h>
2005-06-25 14:57:41 -07:00
# include <asm/apic.h>
2005-04-16 15:20:36 -07:00
# include <asm/e820.h>
# include <asm/mpspec.h>
2006-09-25 23:31:03 -07:00
# include <asm/mmzone.h>
2005-04-16 15:20:36 -07:00
# include <asm/setup.h>
# include <asm/arch_hooks.h>
# include <asm/sections.h>
# include <asm/io_apic.h>
# include <asm/ist.h>
# include <asm/io.h>
2007-02-13 13:26:21 +01:00
# include <asm/vmi.h>
2006-06-25 05:46:50 -07:00
# include <setup_arch.h>
2008-03-17 22:08:17 +03:00
# include <asm/bios_ebda.h>
2007-10-21 16:42:01 -07:00
# include <asm/cacheflush.h>
2008-03-04 19:57:42 +01:00
# include <asm/processor.h>
2005-04-16 15:20:36 -07:00
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables . It contains a * physical *
address , and must not be in the . bss segment ! */
unsigned long init_pg_tables_end __initdata = ~ 0UL ;
/*
* Machine setup . .
*/
2008-01-30 13:30:32 +01:00
static struct resource data_resource = {
. name = " Kernel data " ,
. start = 0 ,
. end = 0 ,
. flags = IORESOURCE_BUSY | IORESOURCE_MEM
} ;
static struct resource code_resource = {
. name = " Kernel code " ,
. start = 0 ,
. end = 0 ,
. flags = IORESOURCE_BUSY | IORESOURCE_MEM
} ;
static struct resource bss_resource = {
. name = " Kernel bss " ,
. start = 0 ,
. end = 0 ,
. flags = IORESOURCE_BUSY | IORESOURCE_MEM
} ;
static struct resource video_ram_resource = {
. name = " Video RAM area " ,
. start = 0xa0000 ,
. end = 0xbffff ,
. flags = IORESOURCE_BUSY | IORESOURCE_MEM
} ;
static struct resource standard_io_resources [ ] = { {
. name = " dma1 " ,
. start = 0x0000 ,
. end = 0x001f ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " pic1 " ,
. start = 0x0020 ,
. end = 0x0021 ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " timer0 " ,
. start = 0x0040 ,
. end = 0x0043 ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " timer1 " ,
. start = 0x0050 ,
. end = 0x0053 ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " keyboard " ,
. start = 0x0060 ,
2008-04-29 14:20:40 +02:00
. end = 0x0060 ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " keyboard " ,
. start = 0x0064 ,
. end = 0x0064 ,
2008-01-30 13:30:32 +01:00
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " dma page reg " ,
. start = 0x0080 ,
. end = 0x008f ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " pic2 " ,
. start = 0x00a0 ,
. end = 0x00a1 ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " dma2 " ,
. start = 0x00c0 ,
. end = 0x00df ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} , {
. name = " fpu " ,
. start = 0x00f0 ,
. end = 0x00ff ,
. flags = IORESOURCE_BUSY | IORESOURCE_IO
} } ;
2005-04-16 15:20:36 -07:00
/* cpu data as detected by the assembly code in head.S */
2007-01-11 01:52:44 +01:00
struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0 , 0 , 0 , 0 , - 1 , 1 , 0 , 0 , - 1 } ;
2005-04-16 15:20:36 -07:00
/* common cpu data for all cpus */
2005-09-06 15:16:33 -07:00
struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0 , 0 , 0 , 0 , - 1 , 1 , 0 , 0 , - 1 } ;
2005-06-23 00:08:33 -07:00
EXPORT_SYMBOL ( boot_cpu_data ) ;
2005-04-16 15:20:36 -07:00
2008-03-27 23:55:04 +03:00
unsigned int def_to_bigsmp ;
2008-02-09 23:24:09 +01:00
# ifndef CONFIG_X86_PAE
2005-04-16 15:20:36 -07:00
unsigned long mmu_cr4_features ;
2008-02-09 23:24:09 +01:00
# else
unsigned long mmu_cr4_features = X86_CR4_PAE ;
# endif
2005-04-16 15:20:36 -07:00
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id ;
unsigned int machine_submodel_id ;
unsigned int BIOS_revision ;
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
int bootloader_type ;
/* user-defined highmem size */
static unsigned int highmem_pages = - 1 ;
/*
* Setup options
*/
struct screen_info screen_info ;
2005-06-23 00:08:33 -07:00
EXPORT_SYMBOL ( screen_info ) ;
2005-04-16 15:20:36 -07:00
struct apm_info apm_info ;
2005-06-23 00:08:33 -07:00
EXPORT_SYMBOL ( apm_info ) ;
2005-04-16 15:20:36 -07:00
struct edid_info edid_info ;
2005-09-09 13:04:34 -07:00
EXPORT_SYMBOL_GPL ( edid_info ) ;
2005-04-16 15:20:36 -07:00
struct ist_info ist_info ;
2005-06-23 00:08:33 -07:00
# if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
defined ( CONFIG_X86_SPEEDSTEP_SMI_MODULE )
EXPORT_SYMBOL ( ist_info ) ;
# endif
2005-04-16 15:20:36 -07:00
extern void early_cpu_init ( void ) ;
extern int root_mountflags ;
2008-04-10 23:28:10 +02:00
unsigned long saved_video_mode ;
2005-04-16 15:20:36 -07:00
2008-01-30 13:32:51 +01:00
# define RAMDISK_IMAGE_START_MASK 0x07FF
2005-04-16 15:20:36 -07:00
# define RAMDISK_PROMPT_FLAG 0x8000
2008-01-30 13:32:51 +01:00
# define RAMDISK_LOAD_FLAG 0x4000
2005-04-16 15:20:36 -07:00
2007-02-12 00:54:11 -08:00
static char __initdata command_line [ COMMAND_LINE_SIZE ] ;
2005-04-16 15:20:36 -07:00
2008-01-30 13:32:51 +01:00
# ifndef CONFIG_DEBUG_BOOT_PARAMS
2007-07-11 12:18:35 -07:00
struct boot_params __initdata boot_params ;
2008-01-30 13:32:51 +01:00
# else
struct boot_params boot_params ;
# endif
2005-04-16 15:20:36 -07:00
# if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd ;
# ifdef CONFIG_EDD_MODULE
EXPORT_SYMBOL ( edd ) ;
# endif
/**
* copy_edd ( ) - Copy the BIOS EDD information
* from boot_params into a safe place .
*
*/
static inline void copy_edd ( void )
{
2007-10-15 17:13:22 -07:00
memcpy ( edd . mbr_signature , boot_params . edd_mbr_sig_buffer ,
sizeof ( edd . mbr_signature ) ) ;
memcpy ( edd . edd_info , boot_params . eddbuf , sizeof ( edd . edd_info ) ) ;
edd . mbr_signature_nr = boot_params . edd_mbr_sig_buf_entries ;
edd . edd_info_nr = boot_params . eddbuf_entries ;
2005-04-16 15:20:36 -07:00
}
# else
static inline void copy_edd ( void )
{
}
# endif
2008-03-07 19:26:26 +01:00
int __initdata user_defined_memmap ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
/*
* " mem=nopentium " disables the 4 MB page tables .
* " mem=XXX[kKmM] " defines a memory region from HIGH_MEM
* to < mem > , overriding the bios size .
* " memmap=XXX[KkmM]@XXX[KkmM] " defines a memory region from
* < start > to < start > + < mem > , overriding the bios size .
*
* HPA tells me bootloaders need to parse mem = , so no new
* option should be mem = [ also see Documentation / i386 / boot . txt ]
*/
static int __init parse_mem ( char * arg )
{
if ( ! arg )
return - EINVAL ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
if ( strcmp ( arg , " nopentium " ) = = 0 ) {
2008-01-30 13:33:20 +01:00
setup_clear_cpu_cap ( X86_FEATURE_PSE ) ;
2006-09-26 10:52:32 +02:00
} else {
/* If the user specifies memory size, we
* limit the BIOS - provided memory map to
* that size . exactmap can be used to specify
* the exact map . mem = number can be used to
* trim the existing memory map .
2005-04-16 15:20:36 -07:00
*/
2006-09-26 10:52:32 +02:00
unsigned long long mem_size ;
2008-01-30 13:32:51 +01:00
2006-09-26 10:52:32 +02:00
mem_size = memparse ( arg , & arg ) ;
limit_regions ( mem_size ) ;
user_defined_memmap = 1 ;
}
return 0 ;
}
early_param ( " mem " , parse_mem ) ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
# ifdef CONFIG_PROC_VMCORE
/* elfcorehdr= specifies the location of elf core header
* stored by the crashed kernel .
*/
static int __init parse_elfcorehdr ( char * arg )
{
if ( ! arg )
return - EINVAL ;
2005-09-12 18:49:25 +02:00
2006-09-26 10:52:32 +02:00
elfcorehdr_addr = memparse ( arg , & arg ) ;
return 0 ;
}
early_param ( " elfcorehdr " , parse_elfcorehdr ) ;
# endif /* CONFIG_PROC_VMCORE */
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
/*
* highmem = size forces highmem to be exactly ' size ' bytes .
* This works even on boxes that have no highmem otherwise .
* This also works to reduce highmem size on bigger boxes .
*/
static int __init parse_highmem ( char * arg )
{
if ( ! arg )
return - EINVAL ;
2005-06-25 14:57:41 -07:00
2006-09-26 10:52:32 +02:00
highmem_pages = memparse ( arg , & arg ) > > PAGE_SHIFT ;
return 0 ;
}
early_param ( " highmem " , parse_highmem ) ;
2005-06-25 14:57:41 -07:00
2006-09-26 10:52:32 +02:00
/*
* vmalloc = size forces the vmalloc area to be exactly ' size '
* bytes . This can be used to increase ( or decrease ) the
* vmalloc area - the default is 128 m .
*/
static int __init parse_vmalloc ( char * arg )
{
if ( ! arg )
return - EINVAL ;
2005-06-25 14:58:01 -07:00
2006-09-26 10:52:32 +02:00
__VMALLOC_RESERVE = memparse ( arg , & arg ) ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
2006-09-26 10:52:32 +02:00
early_param ( " vmalloc " , parse_vmalloc ) ;
2005-04-16 15:20:36 -07:00
2006-09-25 23:32:25 -07:00
/*
* reservetop = size reserves a hole at the top of the kernel address space which
* a hypervisor can load into later . Needed for dynamically loaded hypervisors ,
* so relocating the fixmap can be done before paging initialization .
*/
static int __init parse_reservetop ( char * arg )
{
unsigned long address ;
if ( ! arg )
return - EINVAL ;
address = memparse ( arg , & arg ) ;
reserve_top_address ( address ) ;
return 0 ;
}
early_param ( " reservetop " , parse_reservetop ) ;
2005-04-16 15:20:36 -07:00
/*
* Determine low and high memory ranges :
*/
unsigned long __init find_max_low_pfn ( void )
{
unsigned long max_low_pfn ;
max_low_pfn = max_pfn ;
if ( max_low_pfn > MAXMEM_PFN ) {
if ( highmem_pages = = - 1 )
highmem_pages = max_pfn - MAXMEM_PFN ;
if ( highmem_pages + MAXMEM_PFN < max_pfn )
max_pfn = MAXMEM_PFN + highmem_pages ;
if ( highmem_pages + MAXMEM_PFN > max_pfn ) {
printk ( " only %luMB highmem pages available, ignoring highmem size of %uMB. \n " , pages_to_mb ( max_pfn - MAXMEM_PFN ) , pages_to_mb ( highmem_pages ) ) ;
highmem_pages = 0 ;
}
max_low_pfn = MAXMEM_PFN ;
# ifndef CONFIG_HIGHMEM
/* Maximum memory usable is what is directly addressable */
printk ( KERN_WARNING " Warning only %ldMB will be used. \n " ,
MAXMEM > > 20 ) ;
if ( max_pfn > MAX_NONPAE_PFN )
2007-07-21 17:11:13 +02:00
printk ( KERN_WARNING " Use a HIGHMEM64G enabled kernel. \n " ) ;
2005-04-16 15:20:36 -07:00
else
printk ( KERN_WARNING " Use a HIGHMEM enabled kernel. \n " ) ;
max_pfn = MAXMEM_PFN ;
# else /* !CONFIG_HIGHMEM */
2007-07-21 17:11:13 +02:00
# ifndef CONFIG_HIGHMEM64G
2005-04-16 15:20:36 -07:00
if ( max_pfn > MAX_NONPAE_PFN ) {
max_pfn = MAX_NONPAE_PFN ;
printk ( KERN_WARNING " Warning only 4GB will be used. \n " ) ;
2007-07-21 17:11:13 +02:00
printk ( KERN_WARNING " Use a HIGHMEM64G enabled kernel. \n " ) ;
2005-04-16 15:20:36 -07:00
}
2007-07-21 17:11:13 +02:00
# endif /* !CONFIG_HIGHMEM64G */
2005-04-16 15:20:36 -07:00
# endif /* !CONFIG_HIGHMEM */
} else {
if ( highmem_pages = = - 1 )
highmem_pages = 0 ;
# ifdef CONFIG_HIGHMEM
if ( highmem_pages > = max_pfn ) {
printk ( KERN_ERR " highmem size specified (%uMB) is bigger than pages available (%luMB)!. \n " , pages_to_mb ( highmem_pages ) , pages_to_mb ( max_pfn ) ) ;
highmem_pages = 0 ;
}
if ( highmem_pages ) {
if ( max_low_pfn - highmem_pages < 64 * 1024 * 1024 / PAGE_SIZE ) {
printk ( KERN_ERR " highmem size %uMB results in smaller than 64MB lowmem, ignoring it. \n " , pages_to_mb ( highmem_pages ) ) ;
highmem_pages = 0 ;
}
max_low_pfn - = highmem_pages ;
}
# else
if ( highmem_pages )
printk ( KERN_ERR " ignoring highmem size on non-highmem kernel! \n " ) ;
# endif
}
return max_low_pfn ;
}
2008-03-01 17:09:12 +01:00
# define BIOS_LOWMEM_KILOBYTES 0x413
2005-04-16 15:20:36 -07:00
/*
2008-03-01 17:09:12 +01:00
* The BIOS places the EBDA / XBDA at the top of conventional
* memory , and usually decreases the reported amount of
* conventional memory ( int 0x12 ) too . This also contains a
* workaround for Dell systems that neglect to reserve EBDA .
* The same workaround also avoids a problem with the AMD768MPX
* chipset : reserve a page before VGA to prevent PCI prefetch
* into it ( errata # 56 ) . Usually the page is reserved anyways ,
* unless you have no PS / 2 mouse plugged in .
2005-04-16 15:20:36 -07:00
*/
static void __init reserve_ebda_region ( void )
{
2008-03-01 17:09:12 +01:00
unsigned int lowmem , ebda_addr ;
/* To determine the position of the EBDA and the */
/* end of conventional memory, we need to look at */
/* the BIOS data area. In a paravirtual environment */
/* that area is absent. We'll just have to assume */
/* that the paravirt case can handle memory setup */
/* correctly, without our help. */
2008-03-04 19:57:42 +01:00
if ( paravirt_enabled ( ) )
2008-03-01 17:09:12 +01:00
return ;
/* end of low (conventional) memory */
lowmem = * ( unsigned short * ) __va ( BIOS_LOWMEM_KILOBYTES ) ;
lowmem < < = 10 ;
/* start of EBDA area */
2008-04-19 23:55:20 +09:00
ebda_addr = get_bios_ebda ( ) ;
2008-03-01 17:09:12 +01:00
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
if ( ( lowmem - ebda_addr ) < = 0x10000 )
lowmem = ebda_addr ;
/* Fixup: bios does not report an EBDA at all. */
/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
if ( ( ebda_addr = = 0 ) & & ( lowmem > = 0x9f000 ) )
lowmem = 0x9f000 ;
/* Paranoia: should never happen, but... */
if ( ( lowmem = = 0 ) | | ( lowmem > = 0x100000 ) )
lowmem = 0x9f000 ;
/* reserve all memory between lowmem and the 1MB mark */
reserve_bootmem ( lowmem , 0x100000 - lowmem , BOOTMEM_DEFAULT ) ;
2005-04-16 15:20:36 -07:00
}
2005-06-23 00:07:57 -07:00
# ifndef CONFIG_NEED_MULTIPLE_NODES
2008-04-23 13:20:56 +02:00
static void __init setup_bootmem_allocator ( void ) ;
2005-04-16 15:20:36 -07:00
static unsigned long __init setup_memory ( void )
{
/*
* partially used pages are not usable - thus
* we are rounding upwards :
*/
min_low_pfn = PFN_UP ( init_pg_tables_end ) ;
max_low_pfn = find_max_low_pfn ( ) ;
# ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn ;
if ( max_pfn > max_low_pfn ) {
highstart_pfn = max_low_pfn ;
}
printk ( KERN_NOTICE " %ldMB HIGHMEM available. \n " ,
pages_to_mb ( highend_pfn - highstart_pfn ) ) ;
2006-09-26 10:52:31 +02:00
num_physpages = highend_pfn ;
high_memory = ( void * ) __va ( highstart_pfn * PAGE_SIZE - 1 ) + 1 ;
# else
num_physpages = max_low_pfn ;
high_memory = ( void * ) __va ( max_low_pfn * PAGE_SIZE - 1 ) + 1 ;
# endif
# ifdef CONFIG_FLATMEM
max_mapnr = num_physpages ;
2005-04-16 15:20:36 -07:00
# endif
printk ( KERN_NOTICE " %ldMB LOWMEM available. \n " ,
pages_to_mb ( max_low_pfn ) ) ;
setup_bootmem_allocator ( ) ;
return max_low_pfn ;
}
2008-04-23 13:20:56 +02:00
static void __init zone_sizes_init ( void )
2005-04-16 15:20:36 -07:00
{
2006-10-11 01:20:39 -07:00
unsigned long max_zone_pfns [ MAX_NR_ZONES ] ;
memset ( max_zone_pfns , 0 , sizeof ( max_zone_pfns ) ) ;
max_zone_pfns [ ZONE_DMA ] =
virt_to_phys ( ( char * ) MAX_DMA_ADDRESS ) > > PAGE_SHIFT ;
max_zone_pfns [ ZONE_NORMAL ] = max_low_pfn ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_HIGHMEM
2006-10-11 01:20:39 -07:00
max_zone_pfns [ ZONE_HIGHMEM ] = highend_pfn ;
2006-09-27 01:49:51 -07:00
add_active_range ( 0 , 0 , highend_pfn ) ;
# else
add_active_range ( 0 , 0 , max_low_pfn ) ;
2005-04-16 15:20:36 -07:00
# endif
2006-09-27 01:49:51 -07:00
free_area_init_nodes ( max_zone_pfns ) ;
2005-04-16 15:20:36 -07:00
}
# else
2005-06-23 00:07:57 -07:00
extern unsigned long __init setup_memory ( void ) ;
2005-04-16 15:20:36 -07:00
extern void zone_sizes_init ( void ) ;
2005-06-23 00:07:57 -07:00
# endif /* !CONFIG_NEED_MULTIPLE_NODES */
2005-04-16 15:20:36 -07:00
2007-10-18 23:40:59 -07:00
static inline unsigned long long get_total_mem ( void )
{
unsigned long long total ;
total = max_low_pfn - min_low_pfn ;
# ifdef CONFIG_HIGHMEM
total + = highend_pfn - highstart_pfn ;
# endif
return total < < PAGE_SHIFT ;
}
# ifdef CONFIG_KEXEC
static void __init reserve_crashkernel ( void )
{
unsigned long long total_mem ;
unsigned long long crash_size , crash_base ;
int ret ;
total_mem = get_total_mem ( ) ;
ret = parse_crashkernel ( boot_command_line , total_mem ,
& crash_size , & crash_base ) ;
if ( ret = = 0 & & crash_size > 0 ) {
if ( crash_base > 0 ) {
printk ( KERN_INFO " Reserving %ldMB of memory at %ldMB "
" for crashkernel (System RAM: %ldMB) \n " ,
( unsigned long ) ( crash_size > > 20 ) ,
( unsigned long ) ( crash_base > > 20 ) ,
( unsigned long ) ( total_mem > > 20 ) ) ;
2008-06-08 16:16:07 +02:00
if ( reserve_bootmem ( crash_base , crash_size ,
BOOTMEM_EXCLUSIVE ) < 0 ) {
printk ( KERN_INFO " crashkernel reservation "
" failed - memory is in use \n " ) ;
return ;
}
2007-10-18 23:40:59 -07:00
crashk_res . start = crash_base ;
crashk_res . end = crash_base + crash_size - 1 ;
} else
printk ( KERN_INFO " crashkernel reservation failed - "
" you have to specify a base address \n " ) ;
}
}
# else
static inline void __init reserve_crashkernel ( void )
{ }
# endif
2008-01-30 13:32:51 +01:00
# ifdef CONFIG_BLK_DEV_INITRD
static bool do_relocate_initrd = false ;
static void __init reserve_initrd ( void )
{
unsigned long ramdisk_image = boot_params . hdr . ramdisk_image ;
unsigned long ramdisk_size = boot_params . hdr . ramdisk_size ;
unsigned long ramdisk_end = ramdisk_image + ramdisk_size ;
unsigned long end_of_lowmem = max_low_pfn < < PAGE_SHIFT ;
unsigned long ramdisk_here ;
initrd_start = 0 ;
if ( ! boot_params . hdr . type_of_loader | |
! ramdisk_image | | ! ramdisk_size )
return ; /* No initrd provided by bootloader */
if ( ramdisk_end < ramdisk_image ) {
printk ( KERN_ERR " initrd wraps around end of memory, "
" disabling initrd \n " ) ;
return ;
}
if ( ramdisk_size > = end_of_lowmem / 2 ) {
printk ( KERN_ERR " initrd too large to handle, "
" disabling initrd \n " ) ;
return ;
}
if ( ramdisk_end < = end_of_lowmem ) {
/* All in lowmem, easy case */
2008-02-07 00:15:17 -08:00
reserve_bootmem ( ramdisk_image , ramdisk_size , BOOTMEM_DEFAULT ) ;
2008-01-30 13:32:51 +01:00
initrd_start = ramdisk_image + PAGE_OFFSET ;
initrd_end = initrd_start + ramdisk_size ;
return ;
}
/* We need to move the initrd down into lowmem */
ramdisk_here = ( end_of_lowmem - ramdisk_size ) & PAGE_MASK ;
/* Note: this includes all the lowmem currently occupied by
the initrd , we rely on that fact to keep the data intact . */
2008-02-07 00:15:17 -08:00
reserve_bootmem ( ramdisk_here , ramdisk_size , BOOTMEM_DEFAULT ) ;
2008-01-30 13:32:51 +01:00
initrd_start = ramdisk_here + PAGE_OFFSET ;
initrd_end = initrd_start + ramdisk_size ;
do_relocate_initrd = true ;
}
# define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
static void __init relocate_initrd ( void )
{
unsigned long ramdisk_image = boot_params . hdr . ramdisk_image ;
unsigned long ramdisk_size = boot_params . hdr . ramdisk_size ;
unsigned long end_of_lowmem = max_low_pfn < < PAGE_SHIFT ;
unsigned long ramdisk_here ;
unsigned long slop , clen , mapaddr ;
char * p , * q ;
if ( ! do_relocate_initrd )
return ;
ramdisk_here = initrd_start - PAGE_OFFSET ;
q = ( char * ) initrd_start ;
/* Copy any lowmem portion of the initrd */
if ( ramdisk_image < end_of_lowmem ) {
clen = end_of_lowmem - ramdisk_image ;
p = ( char * ) __va ( ramdisk_image ) ;
memcpy ( q , p , clen ) ;
q + = clen ;
ramdisk_image + = clen ;
ramdisk_size - = clen ;
}
/* Copy the highmem portion of the initrd */
while ( ramdisk_size ) {
slop = ramdisk_image & ~ PAGE_MASK ;
clen = ramdisk_size ;
if ( clen > MAX_MAP_CHUNK - slop )
clen = MAX_MAP_CHUNK - slop ;
mapaddr = ramdisk_image & PAGE_MASK ;
2008-01-30 13:33:44 +01:00
p = early_ioremap ( mapaddr , clen + slop ) ;
2008-01-30 13:32:51 +01:00
memcpy ( q , p + slop , clen ) ;
2008-01-30 13:33:44 +01:00
early_iounmap ( p , clen + slop ) ;
2008-01-30 13:32:51 +01:00
q + = clen ;
ramdisk_image + = clen ;
ramdisk_size - = clen ;
}
}
# endif /* CONFIG_BLK_DEV_INITRD */
2005-04-16 15:20:36 -07:00
void __init setup_bootmem_allocator ( void )
{
unsigned long bootmap_size ;
/*
* Initialize the boot - time allocator ( with low memory only ) :
*/
bootmap_size = init_bootmem ( min_low_pfn , max_low_pfn ) ;
register_bootmem_low_pages ( max_low_pfn ) ;
/*
* Reserve the bootmem bitmap itself as well . We do this in two
* steps ( first step was init_bootmem ( ) ) because this catches
* the ( very unlikely ) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area .
*/
2006-12-07 02:14:03 +01:00
reserve_bootmem ( __pa_symbol ( _text ) , ( PFN_PHYS ( min_low_pfn ) +
2008-02-07 00:15:17 -08:00
bootmap_size + PAGE_SIZE - 1 ) - __pa_symbol ( _text ) ,
BOOTMEM_DEFAULT ) ;
2005-04-16 15:20:36 -07:00
/*
* reserve physical page 0 - it ' s a special BIOS page on many boxes ,
* enabling clean reboots , SMP operation , laptop functions .
*/
2008-02-07 00:15:17 -08:00
reserve_bootmem ( 0 , PAGE_SIZE , BOOTMEM_DEFAULT ) ;
2005-04-16 15:20:36 -07:00
2008-03-01 17:09:12 +01:00
/* reserve EBDA region */
2005-04-16 15:20:36 -07:00
reserve_ebda_region ( ) ;
# ifdef CONFIG_SMP
/*
* But first pinch a few for the stack / trampoline stuff
* FIXME : Don ' t need the extra page at 4 K , but need to fix
* trampoline before removing it . ( see the GDT stuff )
*/
2008-02-07 00:15:17 -08:00
reserve_bootmem ( PAGE_SIZE , PAGE_SIZE , BOOTMEM_DEFAULT ) ;
2005-04-16 15:20:36 -07:00
# endif
2007-07-28 03:33:16 -04:00
# ifdef CONFIG_ACPI_SLEEP
2005-04-16 15:20:36 -07:00
/*
* Reserve low memory region for sleep support .
*/
acpi_reserve_bootmem ( ) ;
# endif
# ifdef CONFIG_X86_FIND_SMP_CONFIG
/*
* Find and reserve possible boot - time SMP configuration :
*/
find_smp_config ( ) ;
# endif
# ifdef CONFIG_BLK_DEV_INITRD
2008-01-30 13:32:51 +01:00
reserve_initrd ( ) ;
2005-04-16 15:20:36 -07:00
# endif
2008-01-30 13:32:51 +01:00
numa_kva_reserve ( ) ;
2007-10-18 23:40:59 -07:00
reserve_crashkernel ( ) ;
2008-04-09 19:50:41 -07:00
reserve_ibft_region ( ) ;
2005-04-16 15:20:36 -07:00
}
/*
* The node 0 pgdat is initialized before all of these because
* it ' s needed for bootmem . node > 0 pgdats have their virtual
* space allocated before the pagetables are in place to access
* them , so they can ' t be cleared then .
*
* This should all compile down to nothing when NUMA is off .
*/
2007-07-21 17:10:27 +02:00
static void __init remapped_pgdat_init ( void )
2005-04-16 15:20:36 -07:00
{
int nid ;
for_each_online_node ( nid ) {
if ( nid ! = 0 )
memset ( NODE_DATA ( nid ) , 0 , sizeof ( struct pglist_data ) ) ;
}
}
# ifdef CONFIG_MCA
static void set_mca_bus ( int x )
{
MCA_bus = x ;
}
# else
static void set_mca_bus ( int x ) { }
# endif
2006-12-07 02:14:07 +01:00
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
2007-01-11 01:52:44 +01:00
char * __init __attribute__ ( ( weak ) ) memory_setup ( void )
2006-12-07 02:14:07 +01:00
{
return machine_specific_memory_setup ( ) ;
}
2008-03-19 14:25:20 -03:00
# ifdef CONFIG_NUMA
/*
* In the golden day , when everything among i386 and x86_64 will be
* integrated , this will not live here
*/
void * x86_cpu_to_node_map_early_ptr ;
int x86_cpu_to_node_map_init [ NR_CPUS ] = {
[ 0 . . . NR_CPUS - 1 ] = NUMA_NO_NODE
} ;
DEFINE_PER_CPU ( int , x86_cpu_to_node_map ) = NUMA_NO_NODE ;
# endif
2005-04-16 15:20:36 -07:00
/*
* Determine if we were loaded by an EFI loader . If so , then we have also been
* passed the efi memmap , systab , etc . , so we should use these data structures
* for initialization . Note , the efi init code path is determined by the
* global efi_enabled . This allows the same kernel image to be used on existing
* systems ( with a traditional BIOS ) as well as on EFI systems .
*/
void __init setup_arch ( char * * cmdline_p )
{
unsigned long max_low_pfn ;
memcpy ( & boot_cpu_data , & new_cpu_data , sizeof ( new_cpu_data ) ) ;
pre_setup_arch_hook ( ) ;
early_cpu_init ( ) ;
2008-01-30 13:33:44 +01:00
early_ioremap_init ( ) ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_EFI
2008-01-30 13:31:19 +01:00
if ( ! strncmp ( ( char * ) & boot_params . efi_info . efi_loader_signature ,
" EL32 " , 4 ) )
2005-04-16 15:20:36 -07:00
efi_enabled = 1 ;
# endif
2007-10-15 17:13:22 -07:00
ROOT_DEV = old_decode_dev ( boot_params . hdr . root_dev ) ;
screen_info = boot_params . screen_info ;
edid_info = boot_params . edid_info ;
apm_info . bios = boot_params . apm_bios_info ;
ist_info = boot_params . ist_info ;
2008-04-10 23:28:10 +02:00
saved_video_mode = boot_params . hdr . vid_mode ;
2007-10-15 17:13:22 -07:00
if ( boot_params . sys_desc_table . length ! = 0 ) {
set_mca_bus ( boot_params . sys_desc_table . table [ 3 ] & 0x2 ) ;
machine_id = boot_params . sys_desc_table . table [ 0 ] ;
machine_submodel_id = boot_params . sys_desc_table . table [ 1 ] ;
BIOS_revision = boot_params . sys_desc_table . table [ 2 ] ;
2005-04-16 15:20:36 -07:00
}
2007-10-15 17:13:22 -07:00
bootloader_type = boot_params . hdr . type_of_loader ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_BLK_DEV_RAM
2007-10-15 17:13:22 -07:00
rd_image_start = boot_params . hdr . ram_size & RAMDISK_IMAGE_START_MASK ;
rd_prompt = ( ( boot_params . hdr . ram_size & RAMDISK_PROMPT_FLAG ) ! = 0 ) ;
rd_doload = ( ( boot_params . hdr . ram_size & RAMDISK_LOAD_FLAG ) ! = 0 ) ;
2005-04-16 15:20:36 -07:00
# endif
ARCH_SETUP
2008-01-30 13:31:19 +01:00
printk ( KERN_INFO " BIOS-provided physical RAM map: \n " ) ;
print_memory_map ( memory_setup ( ) ) ;
2005-04-16 15:20:36 -07:00
copy_edd ( ) ;
2007-10-15 17:13:22 -07:00
if ( ! boot_params . hdr . root_flags )
2005-04-16 15:20:36 -07:00
root_mountflags & = ~ MS_RDONLY ;
init_mm . start_code = ( unsigned long ) _text ;
init_mm . end_code = ( unsigned long ) _etext ;
init_mm . end_data = ( unsigned long ) _edata ;
init_mm . brk = init_pg_tables_end + PAGE_OFFSET ;
code_resource . start = virt_to_phys ( _text ) ;
code_resource . end = virt_to_phys ( _etext ) - 1 ;
data_resource . start = virt_to_phys ( _etext ) ;
data_resource . end = virt_to_phys ( _edata ) - 1 ;
2007-10-21 16:42:01 -07:00
bss_resource . start = virt_to_phys ( & __bss_start ) ;
bss_resource . end = virt_to_phys ( & __bss_stop ) - 1 ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
parse_early_param ( ) ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
if ( user_defined_memmap ) {
printk ( KERN_INFO " user-defined physical RAM map: \n " ) ;
print_memory_map ( " user " ) ;
2006-03-23 02:59:41 -08:00
}
2006-09-26 10:52:32 +02:00
2007-02-12 00:54:11 -08:00
strlcpy ( command_line , boot_command_line , COMMAND_LINE_SIZE ) ;
2006-09-26 10:52:32 +02:00
* cmdline_p = command_line ;
2006-03-23 02:59:41 -08:00
2008-01-30 13:32:11 +01:00
if ( efi_enabled )
efi_init ( ) ;
2008-01-30 13:33:32 +01:00
/* update e820 for memory not covered by WB MTRRs */
2008-04-16 02:29:42 +02:00
propagate_e820_map ( ) ;
2008-01-30 13:33:32 +01:00
mtrr_bp_init ( ) ;
if ( mtrr_trim_uncached_memory ( max_pfn ) )
2008-04-16 02:29:42 +02:00
propagate_e820_map ( ) ;
2008-03-23 00:16:49 -07:00
max_low_pfn = setup_memory ( ) ;
2008-01-30 13:33:32 +01:00
2008-02-15 17:52:48 -02:00
# ifdef CONFIG_KVM_CLOCK
kvmclock_init ( ) ;
# endif
2007-02-13 13:26:21 +01:00
# ifdef CONFIG_VMI
/*
* Must be after max_low_pfn is determined , and before kernel
* pagetables are setup .
*/
vmi_init ( ) ;
# endif
2008-02-22 12:21:36 -05:00
kvm_guest_init ( ) ;
2007-02-13 13:26:21 +01:00
2005-04-16 15:20:36 -07:00
/*
* NOTE : before this point _nobody_ is allowed to allocate
* any memory using the bootmem allocator . Although the
2007-10-20 01:13:56 +02:00
* allocator is now initialised only the first 8 Mb of the kernel
2005-04-16 15:20:36 -07:00
* virtual address space has been mapped . All allocations before
* paging_init ( ) has completed must use the alloc_bootmem_low_pages ( )
* variant ( which allocates DMA ' able memory ) and care must be taken
* not to exceed the 8 Mb limit .
*/
# ifdef CONFIG_SMP
smp_alloc_memory ( ) ; /* AP processor realmode stacks in low memory*/
# endif
paging_init ( ) ;
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 13:34:11 +01:00
/*
* NOTE : On x86 - 32 , only from this point on , fixmaps are ready for use .
*/
# ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
if ( init_ohci1394_dma_early )
init_ohci1394_dma_on_all_controllers ( ) ;
# endif
2005-04-16 15:20:36 -07:00
remapped_pgdat_init ( ) ;
2005-06-23 00:07:57 -07:00
sparse_init ( ) ;
2005-04-16 15:20:36 -07:00
zone_sizes_init ( ) ;
/*
* NOTE : at this point the bootmem allocator is fully available .
*/
2008-01-30 13:32:51 +01:00
# ifdef CONFIG_BLK_DEV_INITRD
relocate_initrd ( ) ;
# endif
2007-07-17 18:37:03 -07:00
paravirt_post_allocator_init ( ) ;
2005-04-16 15:20:36 -07:00
dmi_scan_machine ( ) ;
2008-01-30 13:32:51 +01:00
io_delay_init ( ) ;
2008-01-30 13:30:05 +01:00
2008-03-19 14:25:20 -03:00
# ifdef CONFIG_X86_SMP
/*
* setup to use the early static init tables during kernel startup
* X86_SMP will exclude sub - arches that don ' t deal well with it .
*/
x86_cpu_to_apicid_early_ptr = ( void * ) x86_cpu_to_apicid_init ;
x86_bios_cpu_apicid_early_ptr = ( void * ) x86_bios_cpu_apicid_init ;
# ifdef CONFIG_NUMA
x86_cpu_to_node_map_early_ptr = ( void * ) x86_cpu_to_node_map_init ;
# endif
# endif
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_X86_GENERICARCH
2006-09-26 10:52:32 +02:00
generic_apic_probe ( ) ;
2008-01-30 13:32:51 +01:00
# endif
2005-04-16 15:20:36 -07:00
2005-08-24 12:07:20 -04:00
# ifdef CONFIG_ACPI
2005-04-16 15:20:36 -07:00
/*
* Parse the ACPI tables for possible boot - time SMP configuration .
*/
acpi_boot_table_init ( ) ;
2006-06-08 00:43:38 -07:00
# endif
2007-10-19 20:35:03 +02:00
early_quirks ( ) ;
2006-06-08 00:43:38 -07:00
# ifdef CONFIG_ACPI
2005-04-16 15:20:36 -07:00
acpi_boot_init ( ) ;
2005-09-03 15:56:31 -07:00
# if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
if ( def_to_bigsmp )
printk ( KERN_WARNING " More than 8 CPUs detected and "
" CONFIG_X86_PC cannot handle it. \n Use "
" CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP. \n " ) ;
# endif
# endif
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_X86_LOCAL_APIC
if ( smp_found_config )
get_smp_config ( ) ;
# endif
2006-12-22 01:09:54 -08:00
e820_register_memory ( ) ;
2007-07-21 17:11:09 +02:00
e820_mark_nosave_regions ( ) ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_VT
# if defined(CONFIG_VGA_CONSOLE)
if ( ! efi_enabled | | ( efi_mem_type ( 0xa0000 ) ! = EFI_CONVENTIONAL_MEMORY ) )
conswitchp = & vga_con ;
# elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = & dummy_con ;
# endif
# endif
}
2008-01-30 13:30:32 +01:00
/*
* Request address space for all standard resources
*
* This is called just before pcibios_init ( ) , which is also a
* subsys_initcall , but is linked in later ( in arch / i386 / pci / common . c ) .
*/
static int __init request_standard_resources ( void )
{
int i ;
printk ( KERN_INFO " Setting up standard PCI resources \n " ) ;
2008-01-30 13:31:19 +01:00
init_iomem_resources ( & code_resource , & data_resource , & bss_resource ) ;
2008-01-30 13:30:32 +01:00
request_resource ( & iomem_resource , & video_ram_resource ) ;
/* request I/O space for devices used on all i[345]86 PCs */
for ( i = 0 ; i < ARRAY_SIZE ( standard_io_resources ) ; i + + )
request_resource ( & ioport_resource , & standard_io_resources [ i ] ) ;
return 0 ;
}
subsys_initcall ( request_standard_resources ) ;