2005-10-30 04:16:54 +03:00
/*
* linux / mm / memory_hotplug . c
*
* Copyright ( C )
*/
# include <linux/stddef.h>
# include <linux/mm.h>
# include <linux/swap.h>
# include <linux/interrupt.h>
# include <linux/pagemap.h>
# include <linux/bootmem.h>
# include <linux/compiler.h>
# include <linux/module.h>
# include <linux/pagevec.h>
# include <linux/slab.h>
# include <linux/sysctl.h>
# include <linux/cpu.h>
# include <linux/memory.h>
# include <linux/memory_hotplug.h>
# include <linux/highmem.h>
# include <linux/vmalloc.h>
2006-06-27 13:53:35 +04:00
# include <linux/ioport.h>
2005-10-30 04:16:54 +03:00
# include <asm/tlbflush.h>
extern void zonetable_add ( struct zone * zone , int nid , int zid , unsigned long pfn ,
unsigned long size ) ;
2006-06-23 13:03:10 +04:00
static int __add_zone ( struct zone * zone , unsigned long phys_start_pfn )
2005-10-30 04:16:54 +03:00
{
struct pglist_data * pgdat = zone - > zone_pgdat ;
int nr_pages = PAGES_PER_SECTION ;
int nid = pgdat - > node_id ;
int zone_type ;
zone_type = zone - pgdat - > node_zones ;
2006-06-23 13:03:10 +04:00
if ( ! populated_zone ( zone ) ) {
int ret = 0 ;
ret = init_currently_empty_zone ( zone , phys_start_pfn , nr_pages ) ;
if ( ret < 0 )
return ret ;
}
2005-10-30 04:16:54 +03:00
memmap_init_zone ( nr_pages , nid , zone_type , phys_start_pfn ) ;
zonetable_add ( zone , nid , zone_type , phys_start_pfn , nr_pages ) ;
2006-06-23 13:03:10 +04:00
return 0 ;
2005-10-30 04:16:54 +03:00
}
2005-10-30 04:16:55 +03:00
extern int sparse_add_one_section ( struct zone * zone , unsigned long start_pfn ,
int nr_pages ) ;
2005-10-30 04:16:54 +03:00
static int __add_section ( struct zone * zone , unsigned long phys_start_pfn )
{
int nr_pages = PAGES_PER_SECTION ;
int ret ;
2006-08-05 23:15:06 +04:00
if ( pfn_valid ( phys_start_pfn ) )
return - EEXIST ;
2005-10-30 04:16:55 +03:00
ret = sparse_add_one_section ( zone , phys_start_pfn , nr_pages ) ;
2005-10-30 04:16:54 +03:00
if ( ret < 0 )
return ret ;
2006-06-23 13:03:10 +04:00
ret = __add_zone ( zone , phys_start_pfn ) ;
if ( ret < 0 )
return ret ;
2005-10-30 04:16:54 +03:00
return register_new_memory ( __pfn_to_section ( phys_start_pfn ) ) ;
}
/*
* Reasonably generic function for adding memory . It is
* expected that archs that support memory hotplug will
* call this function after deciding the zone to which to
* add the new pages .
*/
int __add_pages ( struct zone * zone , unsigned long phys_start_pfn ,
unsigned long nr_pages )
{
unsigned long i ;
int err = 0 ;
2006-08-05 23:14:58 +04:00
int start_sec , end_sec ;
/* during initialize mem_map, align hot-added range to section */
start_sec = pfn_to_section_nr ( phys_start_pfn ) ;
end_sec = pfn_to_section_nr ( phys_start_pfn + nr_pages - 1 ) ;
2005-10-30 04:16:54 +03:00
2006-08-05 23:14:58 +04:00
for ( i = start_sec ; i < = end_sec ; i + + ) {
err = __add_section ( zone , i < < PFN_SECTION_SHIFT ) ;
2005-10-30 04:16:54 +03:00
2006-08-05 23:14:58 +04:00
/*
* EEXIST is finally dealed with by ioresource collision
* check . see add_memory ( ) = > register_memory_resource ( )
* Warning will be printed if there is collision .
2006-05-01 23:16:11 +04:00
*/
if ( err & & ( err ! = - EEXIST ) )
2005-10-30 04:16:54 +03:00
break ;
2006-08-05 23:14:58 +04:00
err = 0 ;
2005-10-30 04:16:54 +03:00
}
return err ;
}
2006-05-01 23:16:11 +04:00
EXPORT_SYMBOL_GPL ( __add_pages ) ;
2005-10-30 04:16:54 +03:00
static void grow_zone_span ( struct zone * zone ,
unsigned long start_pfn , unsigned long end_pfn )
{
unsigned long old_zone_end_pfn ;
zone_span_writelock ( zone ) ;
old_zone_end_pfn = zone - > zone_start_pfn + zone - > spanned_pages ;
if ( start_pfn < zone - > zone_start_pfn )
zone - > zone_start_pfn = start_pfn ;
2006-05-31 08:25:42 +04:00
zone - > spanned_pages = max ( old_zone_end_pfn , end_pfn ) -
zone - > zone_start_pfn ;
2005-10-30 04:16:54 +03:00
zone_span_writeunlock ( zone ) ;
}
static void grow_pgdat_span ( struct pglist_data * pgdat ,
unsigned long start_pfn , unsigned long end_pfn )
{
unsigned long old_pgdat_end_pfn =
pgdat - > node_start_pfn + pgdat - > node_spanned_pages ;
if ( start_pfn < pgdat - > node_start_pfn )
pgdat - > node_start_pfn = start_pfn ;
2006-05-31 08:25:42 +04:00
pgdat - > node_spanned_pages = max ( old_pgdat_end_pfn , end_pfn ) -
pgdat - > node_start_pfn ;
2005-10-30 04:16:54 +03:00
}
int online_pages ( unsigned long pfn , unsigned long nr_pages )
{
unsigned long i ;
unsigned long flags ;
unsigned long onlined_pages = 0 ;
2006-06-27 13:53:36 +04:00
struct resource res ;
u64 section_end ;
unsigned long start_pfn ;
2005-10-30 04:16:54 +03:00
struct zone * zone ;
2006-06-23 13:03:11 +04:00
int need_zonelists_rebuild = 0 ;
2005-10-30 04:16:54 +03:00
/*
* This doesn ' t need a lock to do pfn_to_page ( ) .
* The section can ' t be removed here because of the
* memory_block - > state_sem .
*/
zone = page_zone ( pfn_to_page ( pfn ) ) ;
pgdat_resize_lock ( zone - > zone_pgdat , & flags ) ;
grow_zone_span ( zone , pfn , pfn + nr_pages ) ;
grow_pgdat_span ( zone - > zone_pgdat , pfn , pfn + nr_pages ) ;
pgdat_resize_unlock ( zone - > zone_pgdat , & flags ) ;
2006-06-23 13:03:11 +04:00
/*
* If this zone is not populated , then it is not in zonelist .
* This means the page allocator ignores this zone .
* So , zonelist must be updated after online .
*/
if ( ! populated_zone ( zone ) )
need_zonelists_rebuild = 1 ;
2006-06-27 13:53:36 +04:00
res . start = ( u64 ) pfn < < PAGE_SHIFT ;
res . end = res . start + ( ( u64 ) nr_pages < < PAGE_SHIFT ) - 1 ;
res . flags = IORESOURCE_MEM ; /* we just need system ram */
section_end = res . end ;
2006-08-05 23:15:01 +04:00
while ( ( res . start < res . end ) & & ( find_next_system_ram ( & res ) > = 0 ) ) {
2006-06-27 13:53:36 +04:00
start_pfn = ( unsigned long ) ( res . start > > PAGE_SHIFT ) ;
nr_pages = ( unsigned long )
( ( res . end + 1 - res . start ) > > PAGE_SHIFT ) ;
if ( PageReserved ( pfn_to_page ( start_pfn ) ) ) {
/* this region's page is not onlined now */
for ( i = 0 ; i < nr_pages ; i + + ) {
struct page * page = pfn_to_page ( start_pfn + i ) ;
online_page ( page ) ;
onlined_pages + + ;
}
}
res . start = res . end + 1 ;
res . end = section_end ;
2005-10-30 04:16:54 +03:00
}
zone - > present_pages + = onlined_pages ;
2006-03-10 04:33:51 +03:00
zone - > zone_pgdat - > node_present_pages + = onlined_pages ;
2005-10-30 04:16:54 +03:00
2005-10-30 04:16:56 +03:00
setup_per_zone_pages_min ( ) ;
2006-06-23 13:03:11 +04:00
if ( need_zonelists_rebuild )
build_all_zonelists ( ) ;
2006-06-23 13:03:47 +04:00
vm_total_pages = nr_free_pagecache_pages ( ) ;
2005-10-30 04:16:54 +03:00
return 0 ;
}
2006-06-27 13:53:30 +04:00
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
static pg_data_t * hotadd_new_pgdat ( int nid , u64 start )
{
struct pglist_data * pgdat ;
unsigned long zones_size [ MAX_NR_ZONES ] = { 0 } ;
unsigned long zholes_size [ MAX_NR_ZONES ] = { 0 } ;
unsigned long start_pfn = start > > PAGE_SHIFT ;
pgdat = arch_alloc_nodedata ( nid ) ;
if ( ! pgdat )
return NULL ;
arch_refresh_nodedata ( nid , pgdat ) ;
/* we can use NODE_DATA(nid) from here */
/* init node's zones as empty zones, we don't have any present pages.*/
free_area_init_node ( nid , pgdat , zones_size , start_pfn , zholes_size ) ;
return pgdat ;
}
static void rollback_node_hotadd ( int nid , pg_data_t * pgdat )
{
arch_refresh_nodedata ( nid , NULL ) ;
arch_free_nodedata ( pgdat ) ;
return ;
}
2006-06-27 13:53:35 +04:00
/* add this memory to iomem resource */
2006-08-05 23:15:06 +04:00
static struct resource * register_memory_resource ( u64 start , u64 size )
2006-06-27 13:53:35 +04:00
{
struct resource * res ;
res = kzalloc ( sizeof ( struct resource ) , GFP_KERNEL ) ;
BUG_ON ( ! res ) ;
res - > name = " System RAM " ;
res - > start = start ;
res - > end = start + size - 1 ;
res - > flags = IORESOURCE_MEM ;
if ( request_resource ( & iomem_resource , res ) < 0 ) {
printk ( " System RAM resource %llx - %llx cannot be added \n " ,
( unsigned long long ) res - > start , ( unsigned long long ) res - > end ) ;
kfree ( res ) ;
2006-08-05 23:15:06 +04:00
res = NULL ;
2006-06-27 13:53:35 +04:00
}
2006-08-05 23:15:06 +04:00
return res ;
}
static void release_memory_resource ( struct resource * res )
{
if ( ! res )
return ;
release_resource ( res ) ;
kfree ( res ) ;
return ;
2006-06-27 13:53:35 +04:00
}
2006-06-27 13:53:30 +04:00
int add_memory ( int nid , u64 start , u64 size )
{
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
pg_data_t * pgdat = NULL ;
int new_pgdat = 0 ;
2006-08-05 23:15:06 +04:00
struct resource * res ;
2006-06-27 13:53:30 +04:00
int ret ;
2006-08-05 23:15:06 +04:00
res = register_memory_resource ( start , size ) ;
if ( ! res )
return - EEXIST ;
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
if ( ! node_online ( nid ) ) {
pgdat = hotadd_new_pgdat ( nid , start ) ;
if ( ! pgdat )
return - ENOMEM ;
new_pgdat = 1 ;
ret = kswapd_run ( nid ) ;
if ( ret )
goto error ;
}
2006-06-27 13:53:30 +04:00
/* call arch's memory hotadd */
ret = arch_add_memory ( nid , start , size ) ;
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
if ( ret < 0 )
goto error ;
2006-06-27 13:53:38 +04:00
/* we online node here. we can't roll back from here. */
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
node_set_online ( nid ) ;
2006-06-27 13:53:38 +04:00
if ( new_pgdat ) {
ret = register_one_node ( nid ) ;
/*
* If sysfs file of new node can ' t create , cpu on the node
* can ' t be hot - added . There is no rollback way now .
* So , check by BUG_ON ( ) to catch it reluctantly . .
*/
BUG_ON ( ret ) ;
}
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
return ret ;
error :
/* rollback pgdat allocation and others */
if ( new_pgdat )
rollback_node_hotadd ( nid , pgdat ) ;
2006-08-05 23:15:06 +04:00
if ( res )
release_memory_resource ( res ) ;
[PATCH] pgdat allocation for new node add (call pgdat allocation)
Add node-hot-add support to add_memory().
node hotadd uses this sequence.
1. allocate pgdat.
2. refresh NODE_DATA()
3. call free_area_init_node() to initialize
4. create sysfs entry
5. add memory (old add_memory())
6. set node online
7. run kswapd for new node.
(8). update zonelist after pages are onlined. (This is already merged in -mm
due to update phase is difference.)
Note:
To make common function as much as possible,
there is 2 changes from v2.
- The old add_memory(), which is defiend by each archs,
is renamed to arch_add_memory(). New add_memory becomes
caller of arch dependent function as a common code.
- This patch changes add_memory()'s interface
From: add_memory(start, end)
TO : add_memory(nid, start, end).
It was cause of similar code that finding node id from
physical address is inside of old add_memory() on each arch.
In addition, acpi memory hotplug driver can find node id easier.
In v2, it must walk DSDT'S _CRS by matching physical address to
get the handle of its memory device, then get _PXM and node id.
Because input is just physical address.
However, in v3, the acpi driver can use handle to get _PXM and node id
for the new memory device. It can pass just node id to add_memory().
Fix interface of arch_add_memory() is in next patche.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:34 +04:00
2006-06-27 13:53:30 +04:00
return ret ;
}
EXPORT_SYMBOL_GPL ( add_memory ) ;