2006-06-30 12:55:32 +04:00
/*
* linux / mm / vmstat . c
*
* Manages VM statistics
* Copyright ( C ) 1991 , 1992 , 1993 , 1994 Linus Torvalds
2006-06-30 12:55:33 +04:00
*
* zoned VM statistics
* Copyright ( C ) 2006 Silicon Graphics , Inc . ,
* Christoph Lameter < christoph @ lameter . com >
2006-06-30 12:55:32 +04:00
*/
# include <linux/config.h>
# include <linux/mm.h>
2006-06-30 12:55:33 +04:00
# include <linux/module.h>
2006-06-30 12:55:32 +04:00
void __get_zone_counts ( unsigned long * active , unsigned long * inactive ,
unsigned long * free , struct pglist_data * pgdat )
{
struct zone * zones = pgdat - > node_zones ;
int i ;
* active = 0 ;
* inactive = 0 ;
* free = 0 ;
for ( i = 0 ; i < MAX_NR_ZONES ; i + + ) {
* active + = zones [ i ] . nr_active ;
* inactive + = zones [ i ] . nr_inactive ;
* free + = zones [ i ] . free_pages ;
}
}
void get_zone_counts ( unsigned long * active ,
unsigned long * inactive , unsigned long * free )
{
struct pglist_data * pgdat ;
* active = 0 ;
* inactive = 0 ;
* free = 0 ;
for_each_online_pgdat ( pgdat ) {
unsigned long l , m , n ;
__get_zone_counts ( & l , & m , & n , pgdat ) ;
* active + = l ;
* inactive + = m ;
* free + = n ;
}
}
2006-06-30 12:55:45 +04:00
# ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU ( struct vm_event_state , vm_event_states ) = { { 0 } } ;
EXPORT_PER_CPU_SYMBOL ( vm_event_states ) ;
static void sum_vm_events ( unsigned long * ret , cpumask_t * cpumask )
{
int cpu = 0 ;
int i ;
memset ( ret , 0 , NR_VM_EVENT_ITEMS * sizeof ( unsigned long ) ) ;
cpu = first_cpu ( * cpumask ) ;
while ( cpu < NR_CPUS ) {
struct vm_event_state * this = & per_cpu ( vm_event_states , cpu ) ;
cpu = next_cpu ( cpu , * cpumask ) ;
if ( cpu < NR_CPUS )
prefetch ( & per_cpu ( vm_event_states , cpu ) ) ;
for ( i = 0 ; i < NR_VM_EVENT_ITEMS ; i + + )
ret [ i ] + = this - > event [ i ] ;
}
}
/*
* Accumulate the vm event counters across all CPUs .
* The result is unavoidably approximate - it can change
* during and after execution of this function .
*/
void all_vm_events ( unsigned long * ret )
{
sum_vm_events ( ret , & cpu_online_map ) ;
}
2006-07-10 15:44:31 +04:00
EXPORT_SYMBOL_GPL ( all_vm_events ) ;
2006-06-30 12:55:45 +04:00
# ifdef CONFIG_HOTPLUG
/*
* Fold the foreign cpu events into our own .
*
* This is adding to the events on one processor
* but keeps the global counts constant .
*/
void vm_events_fold_cpu ( int cpu )
{
struct vm_event_state * fold_state = & per_cpu ( vm_event_states , cpu ) ;
int i ;
for ( i = 0 ; i < NR_VM_EVENT_ITEMS ; i + + ) {
count_vm_events ( i , fold_state - > event [ i ] ) ;
fold_state - > event [ i ] = 0 ;
}
}
# endif /* CONFIG_HOTPLUG */
# endif /* CONFIG_VM_EVENT_COUNTERS */
2006-06-30 12:55:33 +04:00
/*
* Manage combined zone based / global counters
*
* vm_stat contains the global counters
*/
atomic_long_t vm_stat [ NR_VM_ZONE_STAT_ITEMS ] ;
EXPORT_SYMBOL ( vm_stat ) ;
# ifdef CONFIG_SMP
# define STAT_THRESHOLD 32
/*
* Determine pointer to currently valid differential byte given a zone and
* the item number .
*
* Preemption must be off
*/
static inline s8 * diff_pointer ( struct zone * zone , enum zone_stat_item item )
{
return & zone_pcp ( zone , smp_processor_id ( ) ) - > vm_stat_diff [ item ] ;
}
/*
* For use when we know that interrupts are disabled .
*/
void __mod_zone_page_state ( struct zone * zone , enum zone_stat_item item ,
int delta )
{
s8 * p ;
long x ;
p = diff_pointer ( zone , item ) ;
x = delta + * p ;
if ( unlikely ( x > STAT_THRESHOLD | | x < - STAT_THRESHOLD ) ) {
zone_page_state_add ( x , zone , item ) ;
x = 0 ;
}
* p = x ;
}
EXPORT_SYMBOL ( __mod_zone_page_state ) ;
/*
* For an unknown interrupt state
*/
void mod_zone_page_state ( struct zone * zone , enum zone_stat_item item ,
int delta )
{
unsigned long flags ;
local_irq_save ( flags ) ;
__mod_zone_page_state ( zone , item , delta ) ;
local_irq_restore ( flags ) ;
}
EXPORT_SYMBOL ( mod_zone_page_state ) ;
/*
* Optimized increment and decrement functions .
*
* These are only for a single page and therefore can take a struct page *
* argument instead of struct zone * . This allows the inclusion of the code
* generated for page_zone ( page ) into the optimized functions .
*
* No overflow check is necessary and therefore the differential can be
* incremented or decremented in place which may allow the compilers to
* generate better code .
*
* The increment or decrement is known and therefore one boundary check can
* be omitted .
*
* Some processors have inc / dec instructions that are atomic vs an interrupt .
* However , the code must first determine the differential location in a zone
* based on the processor number and then inc / dec the counter . There is no
* guarantee without disabling preemption that the processor will not change
* in between and therefore the atomicity vs . interrupt cannot be exploited
* in a useful way here .
*/
2006-06-30 12:55:44 +04:00
static void __inc_zone_state ( struct zone * zone , enum zone_stat_item item )
2006-06-30 12:55:33 +04:00
{
s8 * p = diff_pointer ( zone , item ) ;
( * p ) + + ;
if ( unlikely ( * p > STAT_THRESHOLD ) ) {
zone_page_state_add ( * p , zone , item ) ;
* p = 0 ;
}
}
2006-06-30 12:55:44 +04:00
void __inc_zone_page_state ( struct page * page , enum zone_stat_item item )
{
__inc_zone_state ( page_zone ( page ) , item ) ;
}
2006-06-30 12:55:33 +04:00
EXPORT_SYMBOL ( __inc_zone_page_state ) ;
void __dec_zone_page_state ( struct page * page , enum zone_stat_item item )
{
struct zone * zone = page_zone ( page ) ;
s8 * p = diff_pointer ( zone , item ) ;
( * p ) - - ;
if ( unlikely ( * p < - STAT_THRESHOLD ) ) {
zone_page_state_add ( * p , zone , item ) ;
* p = 0 ;
}
}
EXPORT_SYMBOL ( __dec_zone_page_state ) ;
2006-06-30 12:55:44 +04:00
void inc_zone_state ( struct zone * zone , enum zone_stat_item item )
{
unsigned long flags ;
local_irq_save ( flags ) ;
__inc_zone_state ( zone , item ) ;
local_irq_restore ( flags ) ;
}
2006-06-30 12:55:33 +04:00
void inc_zone_page_state ( struct page * page , enum zone_stat_item item )
{
unsigned long flags ;
struct zone * zone ;
zone = page_zone ( page ) ;
local_irq_save ( flags ) ;
2006-06-30 12:55:44 +04:00
__inc_zone_state ( zone , item ) ;
2006-06-30 12:55:33 +04:00
local_irq_restore ( flags ) ;
}
EXPORT_SYMBOL ( inc_zone_page_state ) ;
void dec_zone_page_state ( struct page * page , enum zone_stat_item item )
{
unsigned long flags ;
struct zone * zone ;
s8 * p ;
zone = page_zone ( page ) ;
local_irq_save ( flags ) ;
p = diff_pointer ( zone , item ) ;
( * p ) - - ;
if ( unlikely ( * p < - STAT_THRESHOLD ) ) {
zone_page_state_add ( * p , zone , item ) ;
* p = 0 ;
}
local_irq_restore ( flags ) ;
}
EXPORT_SYMBOL ( dec_zone_page_state ) ;
/*
* Update the zone counters for one cpu .
*/
void refresh_cpu_vm_stats ( int cpu )
{
struct zone * zone ;
int i ;
unsigned long flags ;
for_each_zone ( zone ) {
struct per_cpu_pageset * pcp ;
pcp = zone_pcp ( zone , cpu ) ;
for ( i = 0 ; i < NR_VM_ZONE_STAT_ITEMS ; i + + )
if ( pcp - > vm_stat_diff [ i ] ) {
local_irq_save ( flags ) ;
zone_page_state_add ( pcp - > vm_stat_diff [ i ] ,
zone , i ) ;
pcp - > vm_stat_diff [ i ] = 0 ;
local_irq_restore ( flags ) ;
}
}
}
static void __refresh_cpu_vm_stats ( void * dummy )
{
refresh_cpu_vm_stats ( smp_processor_id ( ) ) ;
}
/*
* Consolidate all counters .
*
* Note that the result is less inaccurate but still inaccurate
* if concurrent processes are allowed to run .
*/
void refresh_vm_stats ( void )
{
on_each_cpu ( __refresh_cpu_vm_stats , NULL , 0 , 1 ) ;
}
EXPORT_SYMBOL ( refresh_vm_stats ) ;
# endif
2006-06-30 12:55:44 +04:00
# ifdef CONFIG_NUMA
/*
* zonelist = the list of zones passed to the allocator
* z = the zone from which the allocation occurred .
*
* Must be called with interrupts disabled .
*/
void zone_statistics ( struct zonelist * zonelist , struct zone * z )
{
if ( z - > zone_pgdat = = zonelist - > zones [ 0 ] - > zone_pgdat ) {
__inc_zone_state ( z , NUMA_HIT ) ;
} else {
__inc_zone_state ( z , NUMA_MISS ) ;
__inc_zone_state ( zonelist - > zones [ 0 ] , NUMA_FOREIGN ) ;
}
if ( z - > zone_pgdat = = NODE_DATA ( numa_node_id ( ) ) )
__inc_zone_state ( z , NUMA_LOCAL ) ;
else
__inc_zone_state ( z , NUMA_OTHER ) ;
}
# endif
2006-06-30 12:55:32 +04:00
# ifdef CONFIG_PROC_FS
# include <linux/seq_file.h>
static void * frag_start ( struct seq_file * m , loff_t * pos )
{
pg_data_t * pgdat ;
loff_t node = * pos ;
for ( pgdat = first_online_pgdat ( ) ;
pgdat & & node ;
pgdat = next_online_pgdat ( pgdat ) )
- - node ;
return pgdat ;
}
static void * frag_next ( struct seq_file * m , void * arg , loff_t * pos )
{
pg_data_t * pgdat = ( pg_data_t * ) arg ;
( * pos ) + + ;
return next_online_pgdat ( pgdat ) ;
}
static void frag_stop ( struct seq_file * m , void * arg )
{
}
/*
* This walks the free areas for each zone .
*/
static int frag_show ( struct seq_file * m , void * arg )
{
pg_data_t * pgdat = ( pg_data_t * ) arg ;
struct zone * zone ;
struct zone * node_zones = pgdat - > node_zones ;
unsigned long flags ;
int order ;
for ( zone = node_zones ; zone - node_zones < MAX_NR_ZONES ; + + zone ) {
if ( ! populated_zone ( zone ) )
continue ;
spin_lock_irqsave ( & zone - > lock , flags ) ;
seq_printf ( m , " Node %d, zone %8s " , pgdat - > node_id , zone - > name ) ;
for ( order = 0 ; order < MAX_ORDER ; + + order )
seq_printf ( m , " %6lu " , zone - > free_area [ order ] . nr_free ) ;
spin_unlock_irqrestore ( & zone - > lock , flags ) ;
seq_putc ( m , ' \n ' ) ;
}
return 0 ;
}
struct seq_operations fragmentation_op = {
. start = frag_start ,
. next = frag_next ,
. stop = frag_stop ,
. show = frag_show ,
} ;
static char * vmstat_text [ ] = {
2006-06-30 12:55:33 +04:00
/* Zoned VM counters */
2006-06-30 12:55:36 +04:00
" nr_anon_pages " ,
2006-06-30 12:55:34 +04:00
" nr_mapped " ,
2006-06-30 12:55:35 +04:00
" nr_file_pages " ,
2006-06-30 12:55:38 +04:00
" nr_slab " ,
2006-06-30 12:55:38 +04:00
" nr_page_table_pages " ,
2006-06-30 12:55:39 +04:00
" nr_dirty " ,
2006-06-30 12:55:40 +04:00
" nr_writeback " ,
2006-06-30 12:55:32 +04:00
" nr_unstable " ,
2006-06-30 12:55:41 +04:00
" nr_bounce " ,
2006-06-30 12:55:32 +04:00
2006-06-30 12:55:44 +04:00
# ifdef CONFIG_NUMA
" numa_hit " ,
" numa_miss " ,
" numa_foreign " ,
" numa_interleave " ,
" numa_local " ,
" numa_other " ,
# endif
2006-06-30 12:55:45 +04:00
# ifdef CONFIG_VM_EVENT_COUNTERS
2006-06-30 12:55:32 +04:00
" pgpgin " ,
" pgpgout " ,
" pswpin " ,
" pswpout " ,
" pgalloc_dma " ,
2006-06-30 12:55:45 +04:00
" pgalloc_dma32 " ,
" pgalloc_normal " ,
" pgalloc_high " ,
2006-06-30 12:55:32 +04:00
" pgfree " ,
" pgactivate " ,
" pgdeactivate " ,
" pgfault " ,
" pgmajfault " ,
" pgrefill_dma " ,
2006-06-30 12:55:45 +04:00
" pgrefill_dma32 " ,
" pgrefill_normal " ,
" pgrefill_high " ,
2006-06-30 12:55:32 +04:00
" pgsteal_dma " ,
2006-06-30 12:55:45 +04:00
" pgsteal_dma32 " ,
" pgsteal_normal " ,
" pgsteal_high " ,
2006-06-30 12:55:32 +04:00
" pgscan_kswapd_dma " ,
2006-06-30 12:55:45 +04:00
" pgscan_kswapd_dma32 " ,
" pgscan_kswapd_normal " ,
" pgscan_kswapd_high " ,
2006-06-30 12:55:32 +04:00
" pgscan_direct_dma " ,
2006-06-30 12:55:45 +04:00
" pgscan_direct_dma32 " ,
" pgscan_direct_normal " ,
" pgscan_direct_high " ,
2006-06-30 12:55:32 +04:00
" pginodesteal " ,
" slabs_scanned " ,
" kswapd_steal " ,
" kswapd_inodesteal " ,
" pageoutrun " ,
" allocstall " ,
" pgrotated " ,
2006-06-30 12:55:45 +04:00
# endif
2006-06-30 12:55:32 +04:00
} ;
/*
* Output information about zones in @ pgdat .
*/
static int zoneinfo_show ( struct seq_file * m , void * arg )
{
pg_data_t * pgdat = arg ;
struct zone * zone ;
struct zone * node_zones = pgdat - > node_zones ;
unsigned long flags ;
for ( zone = node_zones ; zone - node_zones < MAX_NR_ZONES ; zone + + ) {
int i ;
if ( ! populated_zone ( zone ) )
continue ;
spin_lock_irqsave ( & zone - > lock , flags ) ;
seq_printf ( m , " Node %d, zone %8s " , pgdat - > node_id , zone - > name ) ;
seq_printf ( m ,
" \n pages free %lu "
" \n min %lu "
" \n low %lu "
" \n high %lu "
" \n active %lu "
" \n inactive %lu "
" \n scanned %lu (a: %lu i: %lu) "
" \n spanned %lu "
" \n present %lu " ,
zone - > free_pages ,
zone - > pages_min ,
zone - > pages_low ,
zone - > pages_high ,
zone - > nr_active ,
zone - > nr_inactive ,
zone - > pages_scanned ,
zone - > nr_scan_active , zone - > nr_scan_inactive ,
zone - > spanned_pages ,
zone - > present_pages ) ;
2006-06-30 12:55:33 +04:00
for ( i = 0 ; i < NR_VM_ZONE_STAT_ITEMS ; i + + )
seq_printf ( m , " \n %-12s %lu " , vmstat_text [ i ] ,
zone_page_state ( zone , i ) ) ;
2006-06-30 12:55:32 +04:00
seq_printf ( m ,
" \n protection: (%lu " ,
zone - > lowmem_reserve [ 0 ] ) ;
for ( i = 1 ; i < ARRAY_SIZE ( zone - > lowmem_reserve ) ; i + + )
seq_printf ( m , " , %lu " , zone - > lowmem_reserve [ i ] ) ;
seq_printf ( m ,
" ) "
" \n pagesets " ) ;
for_each_online_cpu ( i ) {
struct per_cpu_pageset * pageset ;
int j ;
pageset = zone_pcp ( zone , i ) ;
for ( j = 0 ; j < ARRAY_SIZE ( pageset - > pcp ) ; j + + ) {
if ( pageset - > pcp [ j ] . count )
break ;
}
if ( j = = ARRAY_SIZE ( pageset - > pcp ) )
continue ;
for ( j = 0 ; j < ARRAY_SIZE ( pageset - > pcp ) ; j + + ) {
seq_printf ( m ,
" \n cpu: %i pcp: %i "
" \n count: %i "
" \n high: %i "
" \n batch: %i " ,
i , j ,
pageset - > pcp [ j ] . count ,
pageset - > pcp [ j ] . high ,
pageset - > pcp [ j ] . batch ) ;
}
}
seq_printf ( m ,
" \n all_unreclaimable: %u "
" \n prev_priority: %i "
" \n temp_priority: %i "
" \n start_pfn: %lu " ,
zone - > all_unreclaimable ,
zone - > prev_priority ,
zone - > temp_priority ,
zone - > zone_start_pfn ) ;
spin_unlock_irqrestore ( & zone - > lock , flags ) ;
seq_putc ( m , ' \n ' ) ;
}
return 0 ;
}
struct seq_operations zoneinfo_op = {
. start = frag_start , /* iterate over all zones. The same as in
* fragmentation . */
. next = frag_next ,
. stop = frag_stop ,
. show = zoneinfo_show ,
} ;
static void * vmstat_start ( struct seq_file * m , loff_t * pos )
{
2006-06-30 12:55:33 +04:00
unsigned long * v ;
2006-06-30 12:55:45 +04:00
# ifdef CONFIG_VM_EVENT_COUNTERS
unsigned long * e ;
# endif
2006-06-30 12:55:33 +04:00
int i ;
2006-06-30 12:55:32 +04:00
if ( * pos > = ARRAY_SIZE ( vmstat_text ) )
return NULL ;
2006-06-30 12:55:45 +04:00
# ifdef CONFIG_VM_EVENT_COUNTERS
2006-06-30 12:55:33 +04:00
v = kmalloc ( NR_VM_ZONE_STAT_ITEMS * sizeof ( unsigned long )
2006-06-30 12:55:45 +04:00
+ sizeof ( struct vm_event_state ) , GFP_KERNEL ) ;
# else
v = kmalloc ( NR_VM_ZONE_STAT_ITEMS * sizeof ( unsigned long ) ,
GFP_KERNEL ) ;
# endif
2006-06-30 12:55:33 +04:00
m - > private = v ;
if ( ! v )
2006-06-30 12:55:32 +04:00
return ERR_PTR ( - ENOMEM ) ;
2006-06-30 12:55:33 +04:00
for ( i = 0 ; i < NR_VM_ZONE_STAT_ITEMS ; i + + )
v [ i ] = global_page_state ( i ) ;
2006-06-30 12:55:45 +04:00
# ifdef CONFIG_VM_EVENT_COUNTERS
e = v + NR_VM_ZONE_STAT_ITEMS ;
all_vm_events ( e ) ;
e [ PGPGIN ] / = 2 ; /* sectors -> kbytes */
e [ PGPGOUT ] / = 2 ;
# endif
2006-06-30 12:55:33 +04:00
return v + * pos ;
2006-06-30 12:55:32 +04:00
}
static void * vmstat_next ( struct seq_file * m , void * arg , loff_t * pos )
{
( * pos ) + + ;
if ( * pos > = ARRAY_SIZE ( vmstat_text ) )
return NULL ;
return ( unsigned long * ) m - > private + * pos ;
}
static int vmstat_show ( struct seq_file * m , void * arg )
{
unsigned long * l = arg ;
unsigned long off = l - ( unsigned long * ) m - > private ;
seq_printf ( m , " %s %lu \n " , vmstat_text [ off ] , * l ) ;
return 0 ;
}
static void vmstat_stop ( struct seq_file * m , void * arg )
{
kfree ( m - > private ) ;
m - > private = NULL ;
}
struct seq_operations vmstat_op = {
. start = vmstat_start ,
. next = vmstat_next ,
. stop = vmstat_stop ,
. show = vmstat_show ,
} ;
# endif /* CONFIG_PROC_FS */