2008-01-30 13:30:12 +01:00
/*
2005-04-16 15:20:36 -07:00
* Handle the memory map .
* The functions here do the job until bootmem takes over .
2005-05-01 08:58:52 -07:00
*
* Getting sanitize_e820_map ( ) in sync with i386 version by applying change :
* - Provisions for empty E820 memory regions ( reported by certain BIOSes ) .
* Alex Achenbach < xela @ slit . de > , December 2002.
* Venkatesh Pallipadi < venkatesh . pallipadi @ intel . com >
*
2005-04-16 15:20:36 -07:00
*/
# include <linux/kernel.h>
# include <linux/types.h>
# include <linux/init.h>
# include <linux/bootmem.h>
# include <linux/ioport.h>
# include <linux/string.h>
2005-06-25 14:58:04 -07:00
# include <linux/kexec.h>
2005-09-16 19:27:54 -07:00
# include <linux/module.h>
2006-09-25 23:32:46 -07:00
# include <linux/mm.h>
2007-05-06 14:50:43 -07:00
# include <linux/suspend.h>
# include <linux/pfn.h>
2005-09-16 19:27:54 -07:00
2006-07-10 04:43:49 -07:00
# include <asm/pgtable.h>
2005-04-16 15:20:36 -07:00
# include <asm/page.h>
# include <asm/e820.h>
# include <asm/proto.h>
2007-10-15 17:13:22 -07:00
# include <asm/setup.h>
2005-11-05 17:25:53 +01:00
# include <asm/sections.h>
2008-01-30 13:30:17 +01:00
# include <asm/kdebug.h>
2005-04-16 15:20:36 -07:00
2007-05-02 19:27:11 +02:00
struct e820map e820 ;
2006-09-26 10:52:33 +02:00
2008-01-30 13:30:12 +01:00
/*
2005-04-16 15:20:36 -07:00
* PFN of last memory page .
*/
2008-01-30 13:30:12 +01:00
unsigned long end_pfn ;
2005-04-16 15:20:36 -07:00
2008-01-30 13:30:12 +01:00
/*
2005-04-16 15:20:36 -07:00
* end_pfn only includes RAM , while end_pfn_map includes all e820 entries .
* The direct mapping extends to end_pfn_map , so that we can directly access
* apertures , ACPI and other tables without having to play with fixmaps .
2008-01-30 13:30:12 +01:00
*/
unsigned long end_pfn_map ;
2005-04-16 15:20:36 -07:00
2008-01-30 13:30:12 +01:00
/*
2005-04-16 15:20:36 -07:00
* Last pfn which the user wants to use .
*/
2006-09-26 10:52:31 +02:00
static unsigned long __initdata end_user_pfn = MAXMEM > > PAGE_SHIFT ;
2005-04-16 15:20:36 -07:00
2008-01-30 13:33:17 +01:00
/*
* Early reserved memory areas .
*/
# define MAX_EARLY_RES 20
2007-10-15 17:13:22 -07:00
2008-01-30 13:33:17 +01:00
struct early_res {
unsigned long start , end ;
2008-02-01 17:49:41 +01:00
char name [ 16 ] ;
2008-01-30 13:33:17 +01:00
} ;
static struct early_res early_res [ MAX_EARLY_RES ] __initdata = {
2008-02-01 17:49:41 +01:00
{ 0 , PAGE_SIZE , " BIOS data page " } , /* BIOS data page */
2008-01-30 13:33:17 +01:00
# ifdef CONFIG_SMP
2008-02-01 17:49:41 +01:00
{ SMP_TRAMPOLINE_BASE , SMP_TRAMPOLINE_BASE + 2 * PAGE_SIZE , " SMP_TRAMPOLINE " } ,
2005-04-16 15:20:36 -07:00
# endif
2008-01-30 13:33:17 +01:00
{ }
} ;
2008-02-01 17:49:41 +01:00
void __init reserve_early ( unsigned long start , unsigned long end , char * name )
2008-01-30 13:33:17 +01:00
{
int i ;
struct early_res * r ;
for ( i = 0 ; i < MAX_EARLY_RES & & early_res [ i ] . end ; i + + ) {
r = & early_res [ i ] ;
if ( end > r - > start & & start < r - > end )
2008-02-01 17:49:41 +01:00
panic ( " Overlapping early reservations %lx-%lx %s to %lx-%lx %s \n " ,
start , end - 1 , name ? name : " " , r - > start , r - > end - 1 , r - > name ) ;
2005-04-16 15:20:36 -07:00
}
2008-01-30 13:33:17 +01:00
if ( i > = MAX_EARLY_RES )
panic ( " Too many early reservations " ) ;
r = & early_res [ i ] ;
r - > start = start ;
r - > end = end ;
2008-02-01 17:49:41 +01:00
if ( name )
strncpy ( r - > name , name , sizeof ( r - > name ) - 1 ) ;
2008-01-30 13:33:17 +01:00
}
2006-05-08 15:17:28 +02:00
2008-01-30 13:33:17 +01:00
void __init early_res_to_bootmem ( void )
{
int i ;
for ( i = 0 ; i < MAX_EARLY_RES & & early_res [ i ] . end ; i + + ) {
struct early_res * r = & early_res [ i ] ;
2008-02-01 17:49:41 +01:00
printk ( KERN_INFO " early res: %d [%lx-%lx] %s \n " , i ,
r - > start , r - > end - 1 , r - > name ) ;
2008-01-30 13:33:17 +01:00
reserve_bootmem_generic ( r - > start , r - > end - r - > start ) ;
2006-05-08 15:17:28 +02:00
}
2008-01-30 13:33:17 +01:00
}
2006-05-08 15:17:28 +02:00
2008-01-30 13:33:17 +01:00
/* Check for already reserved areas */
2008-04-17 17:40:45 +02:00
static inline int
bad_addr ( unsigned long * addrp , unsigned long size , unsigned long align )
2008-01-30 13:33:17 +01:00
{
int i ;
unsigned long addr = * addrp , last ;
int changed = 0 ;
again :
last = addr + size ;
for ( i = 0 ; i < MAX_EARLY_RES & & early_res [ i ] . end ; i + + ) {
struct early_res * r = & early_res [ i ] ;
if ( last > = r - > start & & addr < r - > end ) {
2008-04-17 17:40:45 +02:00
* addrp = addr = round_up ( r - > end , align ) ;
2008-01-30 13:33:17 +01:00
changed = 1 ;
goto again ;
}
2007-02-13 13:26:19 +01:00
}
2008-01-30 13:33:17 +01:00
return changed ;
2008-01-30 13:30:12 +01:00
}
2005-04-16 15:20:36 -07:00
2006-04-07 19:49:27 +02:00
/*
* This function checks if any part of the range < start , end > is mapped
* with type .
*/
2007-05-02 19:27:11 +02:00
int
2006-04-07 19:49:24 +02:00
e820_any_mapped ( unsigned long start , unsigned long end , unsigned type )
2008-01-30 13:30:12 +01:00
{
2005-04-16 15:20:36 -07:00
int i ;
2008-01-30 13:30:12 +01:00
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
struct e820entry * ei = & e820 . map [ i ] ;
if ( type & & ei - > type ! = type )
2005-04-16 15:20:36 -07:00
continue ;
2005-09-06 15:16:20 -07:00
if ( ei - > addr > = end | | ei - > addr + ei - > size < = start )
2008-01-30 13:30:12 +01:00
continue ;
return 1 ;
}
2005-04-16 15:20:36 -07:00
return 0 ;
}
2007-05-02 19:27:11 +02:00
EXPORT_SYMBOL_GPL ( e820_any_mapped ) ;
2005-04-16 15:20:36 -07:00
2006-09-19 08:15:22 -07:00
/*
* This function checks if the entire range < start , end > is mapped with type .
*
* Note : this function only works correct if the e820 table is sorted and
* not - overlapping , which is the case
*/
2008-01-30 13:30:12 +01:00
int __init e820_all_mapped ( unsigned long start , unsigned long end ,
unsigned type )
2006-09-19 08:15:22 -07:00
{
int i ;
2008-01-30 13:30:12 +01:00
2006-09-19 08:15:22 -07:00
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
struct e820entry * ei = & e820 . map [ i ] ;
2008-01-30 13:30:12 +01:00
2006-09-19 08:15:22 -07:00
if ( type & & ei - > type ! = type )
continue ;
/* is the region (part) in overlap with the current region ?*/
if ( ei - > addr > = end | | ei - > addr + ei - > size < = start )
continue ;
/* if the region is at the beginning of <start,end> we move
* start to the end of the region since it ' s ok until there
*/
if ( ei - > addr < = start )
start = ei - > addr + ei - > size ;
2008-01-30 13:30:12 +01:00
/*
* if start is now at or beyond end , we ' re done , full
* coverage
*/
2006-09-19 08:15:22 -07:00
if ( start > = end )
2008-01-30 13:30:12 +01:00
return 1 ;
2006-09-19 08:15:22 -07:00
}
return 0 ;
}
2008-01-30 13:30:12 +01:00
/*
2008-02-01 17:49:41 +01:00
* Find a free area with specified alignment in a specific range .
2008-01-30 13:30:12 +01:00
*/
unsigned long __init find_e820_area ( unsigned long start , unsigned long end ,
2008-04-17 17:40:45 +02:00
unsigned long size , unsigned long align )
2008-01-30 13:30:12 +01:00
{
int i ;
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
struct e820entry * ei = & e820 . map [ i ] ;
2008-04-17 17:40:45 +02:00
unsigned long addr , last ;
unsigned long ei_last ;
2008-01-30 13:30:12 +01:00
if ( ei - > type ! = E820_RAM )
continue ;
2008-04-17 17:40:45 +02:00
addr = round_up ( ei - > addr , align ) ;
ei_last = ei - > addr + ei - > size ;
2008-01-30 13:30:12 +01:00
if ( addr < start )
2008-04-17 17:40:45 +02:00
addr = round_up ( start , align ) ;
if ( addr > ei_last )
2008-01-30 13:30:12 +01:00
continue ;
2008-04-17 17:40:45 +02:00
while ( bad_addr ( & addr , size , align ) & & addr + size < = ei_last )
2005-04-16 15:20:36 -07:00
;
2008-02-01 17:49:41 +01:00
last = addr + size ;
2008-04-17 17:40:45 +02:00
if ( last > ei_last )
2005-04-16 15:20:36 -07:00
continue ;
2008-01-30 13:30:12 +01:00
if ( last > end )
2005-04-16 15:20:36 -07:00
continue ;
2008-01-30 13:30:12 +01:00
return addr ;
}
return - 1UL ;
}
2005-04-16 15:20:36 -07:00
/*
* Find the highest page frame number we have available
*/
unsigned long __init e820_end_of_ram ( void )
{
2008-01-30 13:30:12 +01:00
unsigned long end_pfn ;
2006-09-27 01:49:52 -07:00
end_pfn = find_max_pfn_with_active_regions ( ) ;
2008-01-30 13:30:12 +01:00
if ( end_pfn > end_pfn_map )
2005-04-16 15:20:36 -07:00
end_pfn_map = end_pfn ;
if ( end_pfn_map > MAXMEM > > PAGE_SHIFT )
end_pfn_map = MAXMEM > > PAGE_SHIFT ;
if ( end_pfn > end_user_pfn )
end_pfn = end_user_pfn ;
2008-01-30 13:30:12 +01:00
if ( end_pfn > end_pfn_map )
end_pfn = end_pfn_map ;
2005-04-16 15:20:36 -07:00
2008-01-30 13:30:12 +01:00
printk ( KERN_INFO " end_pfn_map = %lu \n " , end_pfn_map ) ;
return end_pfn ;
2005-04-16 15:20:36 -07:00
}
2005-08-26 18:34:10 -07:00
/*
2005-04-16 15:20:36 -07:00
* Mark e820 reserved areas as busy for the resource manager .
*/
2008-01-30 13:30:32 +01:00
void __init e820_reserve_resources ( struct resource * code_resource ,
struct resource * data_resource , struct resource * bss_resource )
2005-04-16 15:20:36 -07:00
{
int i ;
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
struct resource * res ;
res = alloc_bootmem_low ( sizeof ( struct resource ) ) ;
switch ( e820 . map [ i ] . type ) {
case E820_RAM : res - > name = " System RAM " ; break ;
case E820_ACPI : res - > name = " ACPI Tables " ; break ;
case E820_NVS : res - > name = " ACPI Non-volatile Storage " ; break ;
default : res - > name = " reserved " ;
}
res - > start = e820 . map [ i ] . addr ;
res - > end = res - > start + e820 . map [ i ] . size - 1 ;
res - > flags = IORESOURCE_MEM | IORESOURCE_BUSY ;
request_resource ( & iomem_resource , res ) ;
if ( e820 . map [ i ] . type = = E820_RAM ) {
/*
2008-01-30 13:30:12 +01:00
* We don ' t know which RAM region contains kernel data ,
* so we try it repeatedly and let the resource manager
* test it .
2005-04-16 15:20:36 -07:00
*/
2008-01-30 13:30:32 +01:00
request_resource ( res , code_resource ) ;
request_resource ( res , data_resource ) ;
request_resource ( res , bss_resource ) ;
2005-06-25 14:58:04 -07:00
# ifdef CONFIG_KEXEC
2007-10-18 23:40:59 -07:00
if ( crashk_res . start ! = crashk_res . end )
request_resource ( res , & crashk_res ) ;
2005-06-25 14:58:04 -07:00
# endif
2005-04-16 15:20:36 -07:00
}
}
}
2006-09-25 23:32:46 -07:00
/*
* Find the ranges of physical addresses that do not correspond to
* e820 RAM areas and mark the corresponding pages as nosave for software
* suspend and suspend to RAM .
*
* This function requires the e820 map to be sorted and without any
* overlapping entries and assumes the first e820 area to be RAM .
*/
void __init e820_mark_nosave_regions ( void )
{
int i ;
unsigned long paddr ;
paddr = round_down ( e820 . map [ 0 ] . addr + e820 . map [ 0 ] . size , PAGE_SIZE ) ;
for ( i = 1 ; i < e820 . nr_map ; i + + ) {
struct e820entry * ei = & e820 . map [ i ] ;
if ( paddr < ei - > addr )
2007-05-06 14:50:43 -07:00
register_nosave_region ( PFN_DOWN ( paddr ) ,
PFN_UP ( ei - > addr ) ) ;
2006-09-25 23:32:46 -07:00
paddr = round_down ( ei - > addr + ei - > size , PAGE_SIZE ) ;
if ( ei - > type ! = E820_RAM )
2007-05-06 14:50:43 -07:00
register_nosave_region ( PFN_UP ( ei - > addr ) ,
PFN_DOWN ( paddr ) ) ;
2006-09-25 23:32:46 -07:00
if ( paddr > = ( end_pfn < < PAGE_SHIFT ) )
break ;
}
}
2007-07-21 17:10:31 +02:00
/*
* Finds an active region in the address range from start_pfn to end_pfn and
* returns its range in ei_startpfn and ei_endpfn for the e820 entry .
*/
static int __init e820_find_active_region ( const struct e820entry * ei ,
unsigned long start_pfn ,
unsigned long end_pfn ,
unsigned long * ei_startpfn ,
unsigned long * ei_endpfn )
{
* ei_startpfn = round_up ( ei - > addr , PAGE_SIZE ) > > PAGE_SHIFT ;
* ei_endpfn = round_down ( ei - > addr + ei - > size , PAGE_SIZE ) > > PAGE_SHIFT ;
/* Skip map entries smaller than a page */
if ( * ei_startpfn > = * ei_endpfn )
return 0 ;
/* Check if end_pfn_map should be updated */
if ( ei - > type ! = E820_RAM & & * ei_endpfn > end_pfn_map )
end_pfn_map = * ei_endpfn ;
/* Skip if map is outside the node */
if ( ei - > type ! = E820_RAM | | * ei_endpfn < = start_pfn | |
* ei_startpfn > = end_pfn )
return 0 ;
/* Check for overlaps */
if ( * ei_startpfn < start_pfn )
* ei_startpfn = start_pfn ;
if ( * ei_endpfn > end_pfn )
* ei_endpfn = end_pfn ;
/* Obey end_user_pfn to save on memmap */
if ( * ei_startpfn > = end_user_pfn )
return 0 ;
if ( * ei_endpfn > end_user_pfn )
* ei_endpfn = end_user_pfn ;
return 1 ;
}
2006-09-27 01:49:52 -07:00
/* Walk the e820 map and register active regions within a node */
void __init
e820_register_active_regions ( int nid , unsigned long start_pfn ,
unsigned long end_pfn )
{
2007-07-21 17:10:31 +02:00
unsigned long ei_startpfn ;
unsigned long ei_endpfn ;
2006-09-27 01:49:52 -07:00
int i ;
2007-07-21 17:10:31 +02:00
for ( i = 0 ; i < e820 . nr_map ; i + + )
if ( e820_find_active_region ( & e820 . map [ i ] ,
start_pfn , end_pfn ,
& ei_startpfn , & ei_endpfn ) )
add_active_range ( nid , ei_startpfn , ei_endpfn ) ;
2006-09-27 01:49:52 -07:00
}
2008-01-30 13:30:12 +01:00
/*
2005-04-16 15:20:36 -07:00
* Add a memory region to the kernel e820 map .
2008-01-30 13:30:12 +01:00
*/
2005-04-16 15:20:36 -07:00
void __init add_memory_region ( unsigned long start , unsigned long size , int type )
{
int x = e820 . nr_map ;
if ( x = = E820MAX ) {
printk ( KERN_ERR " Ooops! Too many entries in the memory map! \n " ) ;
return ;
}
e820 . map [ x ] . addr = start ;
e820 . map [ x ] . size = size ;
e820 . map [ x ] . type = type ;
e820 . nr_map + + ;
}
2007-07-21 17:11:29 +02:00
/*
* Find the hole size ( in bytes ) in the memory range .
* @ start : starting address of the memory range to scan
* @ end : ending address of the memory range to scan
*/
unsigned long __init e820_hole_size ( unsigned long start , unsigned long end )
{
unsigned long start_pfn = start > > PAGE_SHIFT ;
unsigned long end_pfn = end > > PAGE_SHIFT ;
2008-01-30 13:30:12 +01:00
unsigned long ei_startpfn , ei_endpfn , ram = 0 ;
2007-07-21 17:11:29 +02:00
int i ;
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
if ( e820_find_active_region ( & e820 . map [ i ] ,
start_pfn , end_pfn ,
& ei_startpfn , & ei_endpfn ) )
ram + = ei_endpfn - ei_startpfn ;
}
return end - start - ( ram < < PAGE_SHIFT ) ;
}
2008-01-30 13:30:30 +01:00
static void __init e820_print_map ( char * who )
2005-04-16 15:20:36 -07:00
{
int i ;
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
2007-07-21 17:11:37 +02:00
printk ( KERN_INFO " %s: %016Lx - %016Lx " , who ,
2008-01-30 13:30:12 +01:00
( unsigned long long ) e820 . map [ i ] . addr ,
( unsigned long long )
( e820 . map [ i ] . addr + e820 . map [ i ] . size ) ) ;
2005-04-16 15:20:36 -07:00
switch ( e820 . map [ i ] . type ) {
2008-01-30 13:30:12 +01:00
case E820_RAM :
printk ( KERN_CONT " (usable) \n " ) ;
break ;
2005-04-16 15:20:36 -07:00
case E820_RESERVED :
2008-01-30 13:30:12 +01:00
printk ( KERN_CONT " (reserved) \n " ) ;
break ;
2005-04-16 15:20:36 -07:00
case E820_ACPI :
2008-01-30 13:30:12 +01:00
printk ( KERN_CONT " (ACPI data) \n " ) ;
break ;
2005-04-16 15:20:36 -07:00
case E820_NVS :
2008-01-30 13:30:12 +01:00
printk ( KERN_CONT " (ACPI NVS) \n " ) ;
break ;
default :
printk ( KERN_CONT " type %u \n " , e820 . map [ i ] . type ) ;
break ;
2005-04-16 15:20:36 -07:00
}
}
}
/*
* Sanitize the BIOS e820 map .
*
2008-01-30 13:30:12 +01:00
* Some e820 responses include overlapping entries . The following
2005-04-16 15:20:36 -07:00
* replaces the original e820 map with a new one , removing overlaps .
*
*/
2008-01-30 13:30:12 +01:00
static int __init sanitize_e820_map ( struct e820entry * biosmap , char * pnr_map )
2005-04-16 15:20:36 -07:00
{
struct change_member {
struct e820entry * pbios ; /* pointer to original bios entry */
unsigned long long addr ; /* address for this change point */
} ;
static struct change_member change_point_list [ 2 * E820MAX ] __initdata ;
static struct change_member * change_point [ 2 * E820MAX ] __initdata ;
static struct e820entry * overlap_list [ E820MAX ] __initdata ;
static struct e820entry new_bios [ E820MAX ] __initdata ;
struct change_member * change_tmp ;
unsigned long current_type , last_type ;
unsigned long long last_addr ;
int chgidx , still_changing ;
int overlap_entries ;
int new_bios_entry ;
2005-05-01 08:58:52 -07:00
int old_nr , new_nr , chg_nr ;
2005-04-16 15:20:36 -07:00
int i ;
/*
2008-01-30 13:30:12 +01:00
Visually we ' re performing the following
( 1 , 2 , 3 , 4 = memory types ) . . .
2005-04-16 15:20:36 -07:00
Sample memory map ( w / overlaps ) :
____22__________________
______________________4_
____1111________________
_44_____________________
11111111 ________________
____________________33__
___________44___________
__________33333_________
______________22________
___________________2222_
_________111111111______
_____________________11_
_________________4______
Sanitized equivalent ( no overlap ) :
1 _______________________
_44_____________________
___1____________________
____22__________________
______11________________
_________1______________
__________3_____________
___________44___________
_____________33_________
_______________2________
________________1_______
_________________4______
___________________2____
____________________33__
______________________4_
*/
/* if there's only one memory region, don't bother */
if ( * pnr_map < 2 )
return - 1 ;
old_nr = * pnr_map ;
/* bail out if we find any unreasonable addresses in bios map */
2008-01-30 13:30:12 +01:00
for ( i = 0 ; i < old_nr ; i + + )
2005-04-16 15:20:36 -07:00
if ( biosmap [ i ] . addr + biosmap [ i ] . size < biosmap [ i ] . addr )
return - 1 ;
/* create pointers for initial change-point information (for sorting) */
2008-01-30 13:30:12 +01:00
for ( i = 0 ; i < 2 * old_nr ; i + + )
2005-04-16 15:20:36 -07:00
change_point [ i ] = & change_point_list [ i ] ;
2005-05-01 08:58:52 -07:00
/* record all known change-points (starting and ending addresses),
omitting those that are for empty memory regions */
2005-04-16 15:20:36 -07:00
chgidx = 0 ;
2008-01-30 13:30:12 +01:00
for ( i = 0 ; i < old_nr ; i + + ) {
2005-05-01 08:58:52 -07:00
if ( biosmap [ i ] . size ! = 0 ) {
change_point [ chgidx ] - > addr = biosmap [ i ] . addr ;
change_point [ chgidx + + ] - > pbios = & biosmap [ i ] ;
2008-01-30 13:30:12 +01:00
change_point [ chgidx ] - > addr = biosmap [ i ] . addr +
biosmap [ i ] . size ;
2005-05-01 08:58:52 -07:00
change_point [ chgidx + + ] - > pbios = & biosmap [ i ] ;
}
2005-04-16 15:20:36 -07:00
}
2005-05-01 08:58:52 -07:00
chg_nr = chgidx ;
2005-04-16 15:20:36 -07:00
/* sort change-point list by memory addresses (low -> high) */
still_changing = 1 ;
while ( still_changing ) {
still_changing = 0 ;
2008-01-30 13:30:12 +01:00
for ( i = 1 ; i < chg_nr ; i + + ) {
unsigned long long curaddr , lastaddr ;
unsigned long long curpbaddr , lastpbaddr ;
curaddr = change_point [ i ] - > addr ;
lastaddr = change_point [ i - 1 ] - > addr ;
curpbaddr = change_point [ i ] - > pbios - > addr ;
lastpbaddr = change_point [ i - 1 ] - > pbios - > addr ;
/*
* swap entries , when :
*
* curaddr > lastaddr or
* curaddr = = lastaddr and curaddr = = curpbaddr and
* lastaddr ! = lastpbaddr
*/
if ( curaddr < lastaddr | |
( curaddr = = lastaddr & & curaddr = = curpbaddr & &
lastaddr ! = lastpbaddr ) ) {
2005-04-16 15:20:36 -07:00
change_tmp = change_point [ i ] ;
change_point [ i ] = change_point [ i - 1 ] ;
change_point [ i - 1 ] = change_tmp ;
2008-01-30 13:30:12 +01:00
still_changing = 1 ;
2005-04-16 15:20:36 -07:00
}
}
}
/* create a new bios memory map, removing overlaps */
2008-01-30 13:30:12 +01:00
overlap_entries = 0 ; /* number of entries in the overlap table */
new_bios_entry = 0 ; /* index for creating new bios map entries */
2005-04-16 15:20:36 -07:00
last_type = 0 ; /* start with undefined memory type */
last_addr = 0 ; /* start with 0 as last starting address */
2008-01-30 13:30:12 +01:00
2005-04-16 15:20:36 -07:00
/* loop through change-points, determining affect on the new bios map */
2008-01-30 13:30:12 +01:00
for ( chgidx = 0 ; chgidx < chg_nr ; chgidx + + ) {
2005-04-16 15:20:36 -07:00
/* keep track of all overlapping bios entries */
2008-01-30 13:30:12 +01:00
if ( change_point [ chgidx ] - > addr = =
change_point [ chgidx ] - > pbios - > addr ) {
/*
* add map entry to overlap list ( > 1 entry
* implies an overlap )
*/
overlap_list [ overlap_entries + + ] =
change_point [ chgidx ] - > pbios ;
} else {
/*
* remove entry from list ( order independent ,
* so swap with last )
*/
for ( i = 0 ; i < overlap_entries ; i + + ) {
if ( overlap_list [ i ] = =
change_point [ chgidx ] - > pbios )
overlap_list [ i ] =
overlap_list [ overlap_entries - 1 ] ;
2005-04-16 15:20:36 -07:00
}
overlap_entries - - ;
}
2008-01-30 13:30:12 +01:00
/*
* if there are overlapping entries , decide which
* " type " to use ( larger value takes precedence - -
* 1 = usable , 2 , 3 , 4 , 4 + = unusable )
*/
2005-04-16 15:20:36 -07:00
current_type = 0 ;
2008-01-30 13:30:12 +01:00
for ( i = 0 ; i < overlap_entries ; i + + )
2005-04-16 15:20:36 -07:00
if ( overlap_list [ i ] - > type > current_type )
current_type = overlap_list [ i ] - > type ;
2008-01-30 13:30:12 +01:00
/*
* continue building up new bios map based on this
* information
*/
2005-04-16 15:20:36 -07:00
if ( current_type ! = last_type ) {
if ( last_type ! = 0 ) {
new_bios [ new_bios_entry ] . size =
change_point [ chgidx ] - > addr - last_addr ;
2008-01-30 13:30:12 +01:00
/*
* move forward only if the new size
* was non - zero
*/
2005-04-16 15:20:36 -07:00
if ( new_bios [ new_bios_entry ] . size ! = 0 )
2008-01-30 13:30:12 +01:00
/*
* no more space left for new
* bios entries ?
*/
2005-04-16 15:20:36 -07:00
if ( + + new_bios_entry > = E820MAX )
2008-01-30 13:30:12 +01:00
break ;
2005-04-16 15:20:36 -07:00
}
if ( current_type ! = 0 ) {
2008-01-30 13:30:12 +01:00
new_bios [ new_bios_entry ] . addr =
change_point [ chgidx ] - > addr ;
2005-04-16 15:20:36 -07:00
new_bios [ new_bios_entry ] . type = current_type ;
2008-01-30 13:30:12 +01:00
last_addr = change_point [ chgidx ] - > addr ;
2005-04-16 15:20:36 -07:00
}
last_type = current_type ;
}
}
2008-01-30 13:30:12 +01:00
/* retain count for new bios entries */
new_nr = new_bios_entry ;
2005-04-16 15:20:36 -07:00
/* copy new bios mapping into original location */
2008-01-30 13:30:12 +01:00
memcpy ( biosmap , new_bios , new_nr * sizeof ( struct e820entry ) ) ;
2005-04-16 15:20:36 -07:00
* pnr_map = new_nr ;
return 0 ;
}
/*
* Copy the BIOS e820 map into a safe place .
*
* Sanity - check it while we ' re at it . .
*
* If we ' re lucky and live on a modern system , the setup code
* will have given us a memory map that we can use to properly
* set up memory . If we aren ' t , we ' ll fake a memory map .
*/
2008-01-30 13:30:12 +01:00
static int __init copy_e820_map ( struct e820entry * biosmap , int nr_map )
2005-04-16 15:20:36 -07:00
{
/* Only one memory region (or negative)? Ignore it */
if ( nr_map < 2 )
return - 1 ;
do {
unsigned long start = biosmap - > addr ;
unsigned long size = biosmap - > size ;
unsigned long end = start + size ;
unsigned long type = biosmap - > type ;
/* Overflow in 64 bits? Ignore the memory map. */
if ( start > end )
return - 1 ;
add_memory_region ( start , size , type ) ;
2008-01-30 13:30:12 +01:00
} while ( biosmap + + , - - nr_map ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2008-01-30 13:30:30 +01:00
static void early_panic ( char * msg )
2005-04-16 15:20:36 -07:00
{
2006-09-26 10:52:37 +02:00
early_printk ( msg ) ;
panic ( msg ) ;
}
2005-04-16 15:20:36 -07:00
2008-01-30 13:31:11 +01:00
/* We're not void only for x86 32-bit compat */
char * __init machine_specific_memory_setup ( void )
2006-09-26 10:52:37 +02:00
{
2008-01-30 13:31:11 +01:00
char * who = " BIOS-e820 " ;
2005-04-16 15:20:36 -07:00
/*
* Try to copy the BIOS - supplied E820 - map .
*
* Otherwise fake a memory map ; one section from 0 k - > 640 k ,
* the next section from 1 mb - > appropriate_mem_k
*/
2007-10-15 17:13:22 -07:00
sanitize_e820_map ( boot_params . e820_map , & boot_params . e820_entries ) ;
if ( copy_e820_map ( boot_params . e820_map , boot_params . e820_entries ) < 0 )
2006-09-26 10:52:37 +02:00
early_panic ( " Cannot find a valid memory map " ) ;
2005-04-16 15:20:36 -07:00
printk ( KERN_INFO " BIOS-provided physical RAM map: \n " ) ;
2008-01-30 13:31:11 +01:00
e820_print_map ( who ) ;
/* In case someone cares... */
return who ;
2005-04-16 15:20:36 -07:00
}
2006-09-26 10:52:32 +02:00
static int __init parse_memopt ( char * p )
{
if ( ! p )
return - EINVAL ;
end_user_pfn = memparse ( p , & p ) ;
2008-01-30 13:30:12 +01:00
end_user_pfn > > = PAGE_SHIFT ;
2006-09-26 10:52:32 +02:00
return 0 ;
2008-01-30 13:30:12 +01:00
}
2006-09-26 10:52:32 +02:00
early_param ( " mem " , parse_memopt ) ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
static int userdef __initdata ;
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:32 +02:00
static int __init parse_memmap_opt ( char * p )
2006-01-09 20:51:46 -08:00
{
2006-09-26 10:52:32 +02:00
char * oldp ;
2006-01-09 20:51:46 -08:00
unsigned long long start_at , mem_size ;
2006-09-26 10:52:32 +02:00
if ( ! strcmp ( p , " exactmap " ) ) {
# ifdef CONFIG_CRASH_DUMP
2008-01-30 13:30:12 +01:00
/*
* If we are doing a crash dump , we still need to know
* the real mem size before original memory map is
2006-09-26 10:52:32 +02:00
* reset .
*/
2006-11-14 16:57:46 +01:00
e820_register_active_regions ( 0 , 0 , - 1UL ) ;
2006-09-26 10:52:32 +02:00
saved_max_pfn = e820_end_of_ram ( ) ;
2006-11-14 16:57:46 +01:00
remove_all_active_ranges ( ) ;
2006-09-26 10:52:32 +02:00
# endif
end_pfn_map = 0 ;
e820 . nr_map = 0 ;
userdef = 1 ;
return 0 ;
}
oldp = p ;
mem_size = memparse ( p , & p ) ;
if ( p = = oldp )
return - EINVAL ;
2008-01-30 13:30:46 +01:00
userdef = 1 ;
2006-01-09 20:51:46 -08:00
if ( * p = = ' @ ' ) {
2006-09-26 10:52:32 +02:00
start_at = memparse ( p + 1 , & p ) ;
2006-01-09 20:51:46 -08:00
add_memory_region ( start_at , mem_size , E820_RAM ) ;
} else if ( * p = = ' # ' ) {
2006-09-26 10:52:32 +02:00
start_at = memparse ( p + 1 , & p ) ;
2006-01-09 20:51:46 -08:00
add_memory_region ( start_at , mem_size , E820_ACPI ) ;
} else if ( * p = = ' $ ' ) {
2006-09-26 10:52:32 +02:00
start_at = memparse ( p + 1 , & p ) ;
2006-01-09 20:51:46 -08:00
add_memory_region ( start_at , mem_size , E820_RESERVED ) ;
} else {
end_user_pfn = ( mem_size > > PAGE_SHIFT ) ;
}
2006-09-26 10:52:32 +02:00
return * p = = ' \0 ' ? 0 : - EINVAL ;
}
early_param ( " memmap " , parse_memmap_opt ) ;
2007-03-16 21:07:36 +01:00
void __init finish_e820_parsing ( void )
2006-09-26 10:52:32 +02:00
{
if ( userdef ) {
2008-01-30 13:30:46 +01:00
char nr = e820 . nr_map ;
if ( sanitize_e820_map ( e820 . map , & nr ) < 0 )
early_panic ( " Invalid user supplied memory map " ) ;
e820 . nr_map = nr ;
2006-09-26 10:52:32 +02:00
printk ( KERN_INFO " user-defined physical RAM map: \n " ) ;
e820_print_map ( " user " ) ;
}
2006-01-09 20:51:46 -08:00
}
2008-03-18 16:44:19 -07:00
void __init update_memory_range ( u64 start , u64 size , unsigned old_type ,
unsigned new_type )
{
int i ;
BUG_ON ( old_type = = new_type ) ;
for ( i = 0 ; i < e820 . nr_map ; i + + ) {
struct e820entry * ei = & e820 . map [ i ] ;
u64 final_start , final_end ;
if ( ei - > type ! = old_type )
continue ;
/* totally covered? */
if ( ei - > addr > = start & & ei - > size < = size ) {
ei - > type = new_type ;
continue ;
}
/* partially covered */
final_start = max ( start , ei - > addr ) ;
final_end = min ( start + size , ei - > addr + ei - > size ) ;
if ( final_start > = final_end )
continue ;
add_memory_region ( final_start , final_end - final_start ,
new_type ) ;
}
}
x86: disable the GART early, 64-bit
For K8 system: 4G RAM with memory hole remapping enabled, or more than
4G RAM installed.
when try to use kexec second kernel, and the first doesn't include
gart_shutdown. the second kernel could have different aper position than
the first kernel. and second kernel could use that hole as RAM that is
still used by GART set by the first kernel. esp. when try to kexec
2.6.24 with sparse mem enable from previous kernel (from RHEL 5 or SLES
10). the new kernel will use aper by GART (set by first kernel) for
vmemmap. and after new kernel setting one new GART. the position will be
real RAM. the _mapcount set is lost.
Bad page state in process 'swapper'
page:ffffe2000e600020 flags:0x0000000000000000 mapping:0000000000000000 mapcount:1 count:0
Trying to fix it up, but a reboot is needed
Backtrace:
Pid: 0, comm: swapper Not tainted 2.6.24-rc7-smp-gcdf71a10-dirty #13
Call Trace:
[<ffffffff8026401f>] bad_page+0x63/0x8d
[<ffffffff80264169>] __free_pages_ok+0x7c/0x2a5
[<ffffffff80ba75d1>] free_all_bootmem_core+0xd0/0x198
[<ffffffff80ba3a42>] numa_free_all_bootmem+0x3b/0x76
[<ffffffff80ba3461>] mem_init+0x3b/0x152
[<ffffffff80b959d3>] start_kernel+0x236/0x2c2
[<ffffffff80b9511a>] _sinittext+0x11a/0x121
and
[ffffe2000e600000-ffffe2000e7fffff] PMD ->ffff81001c200000 on node 0
phys addr is : 0x1c200000
RHEL 5.1 kernel -53 said:
PCI-DMA: aperture base @ 1c000000 size 65536 KB
new kernel said:
Mapping aperture over 65536 KB of RAM @ 3c000000
So could try to disable that GART if possible.
According to Ingo
> hm, i'm wondering, instead of modifying the GART, why dont we simply
> _detect_ whatever GART settings we have inherited, and propagate that
> into our e820 maps? I.e. if there's inconsistency, then punch that out
> from the memory maps and just dont use that memory.
>
> that way it would not matter whether the GART settings came from a [old
> or crashing] Linux kernel that has not called gart_iommu_shutdown(), or
> whether it's a BIOS that has set up an aperture hole inconsistent with
> the memory map it passed. (or the memory map we _think_ i tried to pass
> us)
>
> it would also be more robust to only read and do a memory map quirk
> based on that, than actively trying to change the GART so early in the
> bootup. Later on we have to re-enable the GART _anyway_ and have to
> punch a hole for it.
>
> and as a bonus, we would have shored up our defenses against crappy
> BIOSes as well.
add e820 modification for gart inconsistent setting.
gart_fix_e820=off could be used to disable e820 fix.
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 13:33:09 +01:00
void __init update_e820 ( void )
{
u8 nr_map ;
nr_map = e820 . nr_map ;
if ( sanitize_e820_map ( e820 . map , & nr_map ) )
return ;
e820 . nr_map = nr_map ;
printk ( KERN_INFO " modified physical RAM map: \n " ) ;
e820_print_map ( " modified " ) ;
}
2005-04-16 15:25:12 -07:00
unsigned long pci_mem_start = 0xaeedbabe ;
2006-06-26 13:59:44 +02:00
EXPORT_SYMBOL ( pci_mem_start ) ;
2005-04-16 15:25:12 -07:00
/*
* Search for the biggest gap in the low 32 bits of the e820
* memory space . We pass this space to PCI to assign MMIO resources
* for hotplug or unconfigured devices in .
* Hopefully the BIOS let enough space left .
*/
__init void e820_setup_gap ( void )
{
2005-09-09 00:57:14 +02:00
unsigned long gapstart , gapsize , round ;
2005-04-16 15:25:12 -07:00
unsigned long last ;
int i ;
int found = 0 ;
last = 0x100000000ull ;
gapstart = 0x10000000 ;
gapsize = 0x400000 ;
i = e820 . nr_map ;
while ( - - i > = 0 ) {
unsigned long long start = e820 . map [ i ] . addr ;
unsigned long long end = start + e820 . map [ i ] . size ;
/*
* Since " last " is at most 4 GB , we know we ' ll
* fit in 32 bits if this condition is true
*/
if ( last > end ) {
unsigned long gap = last - end ;
if ( gap > gapsize ) {
gapsize = gap ;
gapstart = end ;
found = 1 ;
}
}
if ( start < last )
last = start ;
}
if ( ! found ) {
gapstart = ( end_pfn < < PAGE_SHIFT ) + 1024 * 1024 ;
2008-01-30 13:30:12 +01:00
printk ( KERN_ERR " PCI: Warning: Cannot find a gap in the 32bit "
" address range \n "
KERN_ERR " PCI: Unassigned devices with 32bit resource "
" registers may break! \n " ) ;
2005-04-16 15:25:12 -07:00
}
/*
2005-09-09 00:57:14 +02:00
* See how much we want to round up : start off with
* rounding to the next 1 MB area .
2005-04-16 15:25:12 -07:00
*/
2005-09-09 00:57:14 +02:00
round = 0x100000 ;
while ( ( gapsize > > 4 ) > round )
round + = round ;
/* Fun with two's complement */
pci_mem_start = ( gapstart + round ) & - round ;
2005-04-16 15:25:12 -07:00
2008-01-30 13:30:12 +01:00
printk ( KERN_INFO
" Allocating PCI resources starting at %lx (gap: %lx:%lx) \n " ,
pci_mem_start , gapstart , gapsize ) ;
2005-04-16 15:25:12 -07:00
}
2007-10-21 16:41:55 -07:00
int __init arch_get_ram_range ( int slot , u64 * addr , u64 * size )
{
int i ;
if ( slot < 0 | | slot > = e820 . nr_map )
return - 1 ;
for ( i = slot ; i < e820 . nr_map ; i + + ) {
if ( e820 . map [ i ] . type ! = E820_RAM )
continue ;
break ;
}
if ( i = = e820 . nr_map | | e820 . map [ i ] . addr > ( max_pfn < < PAGE_SHIFT ) )
return - 1 ;
* addr = e820 . map [ i ] . addr ;
* size = min_t ( u64 , e820 . map [ i ] . size + e820 . map [ i ] . addr ,
max_pfn < < PAGE_SHIFT ) - * addr ;
return i + 1 ;
}