2009-09-22 04:02:47 +04:00
#!/usr/bin/perl
# This is a POC (proof of concept or piece of crap, take your pick) for reading the
# text representation of trace output related to page allocation. It makes an attempt
# to extract some high-level information on what is going on. The accuracy of the parser
# may vary considerably
#
# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
# other options
# --prepend-parent Report on the parent proc and PID
# --read-procstat If the trace lacks process info, get it from /proc
# --ignore-pid Aggregate processes of the same name together
#
# Copyright (c) IBM Corporation 2009
# Author: Mel Gorman <mel@csn.ul.ie>
use strict ;
use Getopt::Long ;
# Tracepoint events
use constant MM_PAGE_ALLOC = > 1 ;
2012-01-11 03:07:09 +04:00
use constant MM_PAGE_FREE = > 2 ;
use constant MM_PAGE_FREE_BATCHED = > 3 ;
2009-09-22 04:02:47 +04:00
use constant MM_PAGE_PCPU_DRAIN = > 4 ;
use constant MM_PAGE_ALLOC_ZONE_LOCKED = > 5 ;
use constant MM_PAGE_ALLOC_EXTFRAG = > 6 ;
use constant EVENT_UNKNOWN = > 7 ;
# Constants used to track state
use constant STATE_PCPU_PAGES_DRAINED = > 8 ;
use constant STATE_PCPU_PAGES_REFILLED = > 9 ;
# High-level events extrapolated from tracepoints
use constant HIGH_PCPU_DRAINS = > 10 ;
use constant HIGH_PCPU_REFILLS = > 11 ;
use constant HIGH_EXT_FRAGMENT = > 12 ;
use constant HIGH_EXT_FRAGMENT_SEVERE = > 13 ;
use constant HIGH_EXT_FRAGMENT_MODERATE = > 14 ;
use constant HIGH_EXT_FRAGMENT_CHANGED = > 15 ;
my % perprocesspid ;
my % perprocess ;
my $ opt_ignorepid ;
my $ opt_read_procstat ;
my $ opt_prepend_parent ;
# Catch sigint and exit on request
my $ sigint_report = 0 ;
my $ sigint_exit = 0 ;
my $ sigint_pending = 0 ;
my $ sigint_received = 0 ;
sub sigint_handler {
my $ current_time = time ;
if ( $ current_time - 2 > $ sigint_received ) {
print "SIGINT received, report pending. Hit ctrl-c again to exit\n" ;
$ sigint_report = 1 ;
} else {
if ( ! $ sigint_exit ) {
print "Second SIGINT received quickly, exiting\n" ;
}
$ sigint_exit + + ;
}
if ( $ sigint_exit > 3 ) {
print "Many SIGINTs received, exiting now without report\n" ;
exit ;
}
$ sigint_received = $ current_time ;
$ sigint_pending = 1 ;
}
$ SIG { INT } = "sigint_handler" ;
# Parse command line options
GetOptions (
'ignore-pid' = > \ $ opt_ignorepid ,
'read-procstat' = > \ $ opt_read_procstat ,
'prepend-parent' = > \ $ opt_prepend_parent ,
) ;
# Defaults for dynamically discovered regex's
my $ regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])' ;
# Dyanically discovered regex
my $ regex_fragdetails ;
# Static regex used. Specified like this for readability and for use with /o
# (process_pid) (cpus ) ( time ) (tpoint ) (details)
my $ regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)' ;
my $ regex_statname = '[-0-9]*\s\((.*)\).*' ;
my $ regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*' ;
sub generate_traceevent_regex {
my $ event = shift ;
my $ default = shift ;
my $ regex ;
# Read the event format or use the default
if ( ! open ( FORMAT , "/sys/kernel/debug/tracing/events/$event/format" ) ) {
$ regex = $ default ;
} else {
my $ line ;
while ( ! eof ( FORMAT ) ) {
$ line = <FORMAT> ;
if ( $ line =~ /^print fmt:\s"(.*)",.*/ ) {
$ regex = $ 1 ;
$ regex =~ s/%p/\([0-9a-f]*\)/g ;
$ regex =~ s/%d/\([-0-9]*\)/g ;
$ regex =~ s/%lu/\([0-9]*\)/g ;
}
}
}
# Verify fields are in the right order
my $ tuple ;
foreach $ tuple ( split /\s/ , $ regex ) {
my ( $ key , $ value ) = split ( /=/ , $ tuple ) ;
my $ expected = shift ;
if ( $ key ne $ expected ) {
print ( "WARNING: Format not as expected '$key' != '$expected'" ) ;
$ regex =~ s/$key=\((.*)\)/$key=$1/ ;
}
}
if ( defined shift ) {
die ( "Fewer fields than expected in format" ) ;
}
return $ regex ;
}
$ regex_fragdetails = generate_traceevent_regex ( "kmem/mm_page_alloc_extfrag" ,
$ regex_fragdetails_default ,
"page" , "pfn" ,
"alloc_order" , "fallback_order" , "pageblock_order" ,
"alloc_migratetype" , "fallback_migratetype" ,
"fragmenting" , "change_ownership" ) ;
sub read_statline ($) {
my $ pid = $ _ [ 0 ] ;
my $ statline ;
if ( open ( STAT , "/proc/$pid/stat" ) ) {
$ statline = <STAT> ;
close ( STAT ) ;
}
if ( $ statline eq '' ) {
$ statline = "-1 (UNKNOWN_PROCESS_NAME) R 0" ;
}
return $ statline ;
}
sub guess_process_pid ($$) {
my $ pid = $ _ [ 0 ] ;
my $ statline = $ _ [ 1 ] ;
if ( $ pid == 0 ) {
return "swapper-0" ;
}
if ( $ statline !~ /$regex_statname/o ) {
die ( "Failed to math stat line for process name :: $statline" ) ;
}
return "$1-$pid" ;
}
sub parent_info ($$) {
my $ pid = $ _ [ 0 ] ;
my $ statline = $ _ [ 1 ] ;
my $ ppid ;
if ( $ pid == 0 ) {
return "NOPARENT-0" ;
}
if ( $ statline !~ /$regex_statppid/o ) {
die ( "Failed to match stat line process ppid:: $statline" ) ;
}
# Read the ppid stat line
$ ppid = $ 1 ;
return guess_process_pid ( $ ppid , read_statline ( $ ppid ) ) ;
}
sub process_events {
my $ traceevent ;
my $ process_pid ;
my $ cpus ;
my $ timestamp ;
my $ tracepoint ;
my $ details ;
my $ statline ;
# Read each line of the event log
EVENT_PROCESS:
while ( $ traceevent = <STDIN> ) {
if ( $ traceevent =~ /$regex_traceevent/o ) {
$ process_pid = $ 1 ;
$ tracepoint = $ 4 ;
if ( $ opt_read_procstat || $ opt_prepend_parent ) {
$ process_pid =~ /(.*)-([0-9]*)$/ ;
my $ process = $ 1 ;
my $ pid = $ 2 ;
$ statline = read_statline ( $ pid ) ;
if ( $ opt_read_procstat && $ process eq '' ) {
$ process_pid = guess_process_pid ( $ pid , $ statline ) ;
}
if ( $ opt_prepend_parent ) {
$ process_pid = parent_info ( $ pid , $ statline ) . " :: $process_pid" ;
}
}
# Unnecessary in this script. Uncomment if required
# $cpus = $2;
# $timestamp = $3;
} else {
next ;
}
# Perl Switch() sucks majorly
if ( $ tracepoint eq "mm_page_alloc" ) {
$ perprocesspid { $ process_pid } - > { MM_PAGE_ALLOC } + + ;
2012-01-11 03:07:09 +04:00
} elsif ( $ tracepoint eq "mm_page_free" ) {
$ perprocesspid { $ process_pid } - > { MM_PAGE_FREE } + +
} elsif ( $ tracepoint eq "mm_page_free_batched" ) {
$ perprocesspid { $ process_pid } - > { MM_PAGE_FREE_BATCHED } + + ;
2009-09-22 04:02:47 +04:00
} elsif ( $ tracepoint eq "mm_page_pcpu_drain" ) {
$ perprocesspid { $ process_pid } - > { MM_PAGE_PCPU_DRAIN } + + ;
$ perprocesspid { $ process_pid } - > { STATE_PCPU_PAGES_DRAINED } + + ;
} elsif ( $ tracepoint eq "mm_page_alloc_zone_locked" ) {
$ perprocesspid { $ process_pid } - > { MM_PAGE_ALLOC_ZONE_LOCKED } + + ;
$ perprocesspid { $ process_pid } - > { STATE_PCPU_PAGES_REFILLED } + + ;
} elsif ( $ tracepoint eq "mm_page_alloc_extfrag" ) {
# Extract the details of the event now
$ details = $ 5 ;
my ( $ page , $ pfn ) ;
my ( $ alloc_order , $ fallback_order , $ pageblock_order ) ;
my ( $ alloc_migratetype , $ fallback_migratetype ) ;
my ( $ fragmenting , $ change_ownership ) ;
if ( $ details !~ /$regex_fragdetails/o ) {
print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n" ;
next ;
}
$ perprocesspid { $ process_pid } - > { MM_PAGE_ALLOC_EXTFRAG } + + ;
$ page = $ 1 ;
$ pfn = $ 2 ;
$ alloc_order = $ 3 ;
$ fallback_order = $ 4 ;
$ pageblock_order = $ 5 ;
$ alloc_migratetype = $ 6 ;
$ fallback_migratetype = $ 7 ;
$ fragmenting = $ 8 ;
$ change_ownership = $ 9 ;
if ( $ fragmenting ) {
$ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAG } + + ;
if ( $ fallback_order <= 3 ) {
$ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAGMENT_SEVERE } + + ;
} else {
$ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAGMENT_MODERATE } + + ;
}
}
if ( $ change_ownership ) {
$ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAGMENT_CHANGED } + + ;
}
} else {
$ perprocesspid { $ process_pid } - > { EVENT_UNKNOWN } + + ;
}
# Catch a full pcpu drain event
if ( $ perprocesspid { $ process_pid } - > { STATE_PCPU_PAGES_DRAINED } &&
$ tracepoint ne "mm_page_pcpu_drain" ) {
$ perprocesspid { $ process_pid } - > { HIGH_PCPU_DRAINS } + + ;
$ perprocesspid { $ process_pid } - > { STATE_PCPU_PAGES_DRAINED } = 0 ;
}
# Catch a full pcpu refill event
if ( $ perprocesspid { $ process_pid } - > { STATE_PCPU_PAGES_REFILLED } &&
$ tracepoint ne "mm_page_alloc_zone_locked" ) {
$ perprocesspid { $ process_pid } - > { HIGH_PCPU_REFILLS } + + ;
$ perprocesspid { $ process_pid } - > { STATE_PCPU_PAGES_REFILLED } = 0 ;
}
if ( $ sigint_pending ) {
last EVENT_PROCESS ;
}
}
}
sub dump_stats {
my $ hashref = shift ;
my % stats = %$ hashref ;
# Dump per-process stats
my $ process_pid ;
my $ max_strlen = 0 ;
# Get the maximum process name
foreach $ process_pid ( keys % perprocesspid ) {
my $ len = length ( $ process_pid ) ;
if ( $ len > $ max_strlen ) {
$ max_strlen = $ len ;
}
}
$ max_strlen += 2 ;
printf ( "\n" ) ;
printf ( "%-" . $ max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" ,
"Process" , "Pages" , "Pages" , "Pages" , "Pages" , "PCPU" , "PCPU" , "PCPU" , "Fragment" , "Fragment" , "MigType" , "Fragment" , "Fragment" , "Unknown" ) ;
printf ( "%-" . $ max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" ,
"details" , "allocd" , "allocd" , "freed" , "freed" , "pages" , "drains" , "refills" , "Fallback" , "Causing" , "Changed" , "Severe" , "Moderate" , "" ) ;
printf ( "%-" . $ max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" ,
"" , "" , "under lock" , "direct" , "pagevec" , "drain" , "" , "" , "" , "" , "" , "" , "" , "" ) ;
foreach $ process_pid ( keys % stats ) {
# Dump final aggregates
if ( $ stats { $ process_pid } - > { STATE_PCPU_PAGES_DRAINED } ) {
$ stats { $ process_pid } - > { HIGH_PCPU_DRAINS } + + ;
$ stats { $ process_pid } - > { STATE_PCPU_PAGES_DRAINED } = 0 ;
}
if ( $ stats { $ process_pid } - > { STATE_PCPU_PAGES_REFILLED } ) {
$ stats { $ process_pid } - > { HIGH_PCPU_REFILLS } + + ;
$ stats { $ process_pid } - > { STATE_PCPU_PAGES_REFILLED } = 0 ;
}
printf ( "%-" . $ max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n" ,
$ process_pid ,
$ stats { $ process_pid } - > { MM_PAGE_ALLOC } ,
$ stats { $ process_pid } - > { MM_PAGE_ALLOC_ZONE_LOCKED } ,
2012-01-11 03:07:09 +04:00
$ stats { $ process_pid } - > { MM_PAGE_FREE } ,
$ stats { $ process_pid } - > { MM_PAGE_FREE_BATCHED } ,
2009-09-22 04:02:47 +04:00
$ stats { $ process_pid } - > { MM_PAGE_PCPU_DRAIN } ,
$ stats { $ process_pid } - > { HIGH_PCPU_DRAINS } ,
$ stats { $ process_pid } - > { HIGH_PCPU_REFILLS } ,
$ stats { $ process_pid } - > { MM_PAGE_ALLOC_EXTFRAG } ,
$ stats { $ process_pid } - > { HIGH_EXT_FRAG } ,
$ stats { $ process_pid } - > { HIGH_EXT_FRAGMENT_CHANGED } ,
$ stats { $ process_pid } - > { HIGH_EXT_FRAGMENT_SEVERE } ,
$ stats { $ process_pid } - > { HIGH_EXT_FRAGMENT_MODERATE } ,
$ stats { $ process_pid } - > { EVENT_UNKNOWN } ) ;
}
}
sub aggregate_perprocesspid () {
my $ process_pid ;
my $ process ;
undef % perprocess ;
foreach $ process_pid ( keys % perprocesspid ) {
$ process = $ process_pid ;
$ process =~ s/-([0-9])*$// ;
if ( $ process eq '' ) {
$ process = "NO_PROCESS_NAME" ;
}
$ perprocess { $ process } - > { MM_PAGE_ALLOC } += $ perprocesspid { $ process_pid } - > { MM_PAGE_ALLOC } ;
$ perprocess { $ process } - > { MM_PAGE_ALLOC_ZONE_LOCKED } += $ perprocesspid { $ process_pid } - > { MM_PAGE_ALLOC_ZONE_LOCKED } ;
2012-01-11 03:07:09 +04:00
$ perprocess { $ process } - > { MM_PAGE_FREE } += $ perprocesspid { $ process_pid } - > { MM_PAGE_FREE } ;
$ perprocess { $ process } - > { MM_PAGE_FREE_BATCHED } += $ perprocesspid { $ process_pid } - > { MM_PAGE_FREE_BATCHED } ;
2009-09-22 04:02:47 +04:00
$ perprocess { $ process } - > { MM_PAGE_PCPU_DRAIN } += $ perprocesspid { $ process_pid } - > { MM_PAGE_PCPU_DRAIN } ;
$ perprocess { $ process } - > { HIGH_PCPU_DRAINS } += $ perprocesspid { $ process_pid } - > { HIGH_PCPU_DRAINS } ;
$ perprocess { $ process } - > { HIGH_PCPU_REFILLS } += $ perprocesspid { $ process_pid } - > { HIGH_PCPU_REFILLS } ;
$ perprocess { $ process } - > { MM_PAGE_ALLOC_EXTFRAG } += $ perprocesspid { $ process_pid } - > { MM_PAGE_ALLOC_EXTFRAG } ;
$ perprocess { $ process } - > { HIGH_EXT_FRAG } += $ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAG } ;
$ perprocess { $ process } - > { HIGH_EXT_FRAGMENT_CHANGED } += $ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAGMENT_CHANGED } ;
$ perprocess { $ process } - > { HIGH_EXT_FRAGMENT_SEVERE } += $ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAGMENT_SEVERE } ;
$ perprocess { $ process } - > { HIGH_EXT_FRAGMENT_MODERATE } += $ perprocesspid { $ process_pid } - > { HIGH_EXT_FRAGMENT_MODERATE } ;
$ perprocess { $ process } - > { EVENT_UNKNOWN } += $ perprocesspid { $ process_pid } - > { EVENT_UNKNOWN } ;
}
}
sub report () {
if ( ! $ opt_ignorepid ) {
dump_stats ( \ % perprocesspid ) ;
} else {
aggregate_perprocesspid ( ) ;
dump_stats ( \ % perprocess ) ;
}
}
# Process events or signals until neither is available
sub signal_loop () {
my $ sigint_processed ;
do {
$ sigint_processed = 0 ;
process_events ( ) ;
# Handle pending signals if any
if ( $ sigint_pending ) {
my $ current_time = time ;
if ( $ sigint_exit ) {
print "Received exit signal\n" ;
$ sigint_pending = 0 ;
}
if ( $ sigint_report ) {
if ( $ current_time >= $ sigint_received + 2 ) {
report ( ) ;
$ sigint_report = 0 ;
$ sigint_pending = 0 ;
$ sigint_processed = 1 ;
}
}
}
} while ( $ sigint_pending || $ sigint_processed ) ;
}
signal_loop ( ) ;
report ( ) ;