2009-04-20 17:00:56 +04:00
/*
* kerneltop . c : show top kernel functions - performance counters showcase
Build with :
2009-04-29 14:47:26 +04:00
make - C Documentation / perf_counter /
2009-04-20 17:00:56 +04:00
Sample output :
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2009-05-26 17:25:34 +04:00
KernelTop : 2669 irqs / sec [ cache - misses / cache - refs ] , ( all , cpu : 2 )
2009-04-20 17:00:56 +04:00
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
weight RIP kernel function
______ ________________ _______________
35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
33.00 - ffffffff804cb740 : sock_alloc_send_skb
31.26 - ffffffff804ce808 : skb_push
22.43 - ffffffff80510004 : tcp_established_options
19.00 - ffffffff8027d250 : find_get_page
15.76 - ffffffff804e4fc9 : eth_type_trans
15.20 - ffffffff804d8baa : dst_release
14.86 - ffffffff804cf5d8 : skb_release_head_state
14.00 - ffffffff802217d5 : read_hpet
12.00 - ffffffff804ffb7f : __ip_local_out
11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
8.54 - ffffffff805001a3 : ip_queue_xmit
*/
/*
* Copyright ( C ) 2008 , Red Hat Inc , Ingo Molnar < mingo @ redhat . com >
*
* Improvements and fixes by :
*
* Arjan van de Ven < arjan @ linux . intel . com >
* Yanmin Zhang < yanmin . zhang @ intel . com >
* Wu Fengguang < fengguang . wu @ intel . com >
* Mike Galbraith < efault @ gmx . de >
* Paul Mackerras < paulus @ samba . org >
*
* Released under the GPL v2 . ( and only v2 , not any later version )
*/
2009-05-23 20:28:58 +04:00
# include "perf.h"
2009-05-27 11:10:38 +04:00
# include "builtin.h"
2009-05-28 21:55:41 +04:00
# include "util/symbol.h"
2009-04-27 10:02:14 +04:00
# include "util/util.h"
2009-05-28 21:55:41 +04:00
# include "util/rbtree.h"
2009-05-26 11:17:18 +04:00
# include "util/parse-options.h"
# include "util/parse-events.h"
2009-04-20 17:00:56 +04:00
# include <assert.h>
# include <fcntl.h>
2009-05-26 11:17:18 +04:00
2009-04-20 17:00:56 +04:00
# include <stdio.h>
2009-05-26 11:17:18 +04:00
2009-04-20 17:00:56 +04:00
# include <errno.h>
# include <time.h>
# include <sched.h>
# include <pthread.h>
# include <sys/syscall.h>
# include <sys/ioctl.h>
# include <sys/poll.h>
# include <sys/prctl.h>
# include <sys/wait.h>
# include <sys/uio.h>
# include <sys/mman.h>
# include <linux/unistd.h>
# include <linux/types.h>
static int system_wide = 0 ;
2009-05-26 11:17:18 +04:00
static __u64 default_event_id [ MAX_COUNTERS ] = {
2009-04-20 17:00:56 +04:00
EID ( PERF_TYPE_SOFTWARE , PERF_COUNT_TASK_CLOCK ) ,
EID ( PERF_TYPE_SOFTWARE , PERF_COUNT_CONTEXT_SWITCHES ) ,
EID ( PERF_TYPE_SOFTWARE , PERF_COUNT_CPU_MIGRATIONS ) ,
EID ( PERF_TYPE_SOFTWARE , PERF_COUNT_PAGE_FAULTS ) ,
EID ( PERF_TYPE_HARDWARE , PERF_COUNT_CPU_CYCLES ) ,
EID ( PERF_TYPE_HARDWARE , PERF_COUNT_INSTRUCTIONS ) ,
EID ( PERF_TYPE_HARDWARE , PERF_COUNT_CACHE_REFERENCES ) ,
EID ( PERF_TYPE_HARDWARE , PERF_COUNT_CACHE_MISSES ) ,
} ;
static int default_interval = 100000 ;
static int event_count [ MAX_COUNTERS ] ;
static int fd [ MAX_NR_CPUS ] [ MAX_COUNTERS ] ;
static __u64 count_filter = 100 ;
2009-05-26 11:17:18 +04:00
static int target_pid = - 1 ;
2009-04-20 17:00:56 +04:00
static int profile_cpu = - 1 ;
static int nr_cpus = 0 ;
static unsigned int realtime_prio = 0 ;
static int group = 0 ;
static unsigned int page_size ;
static unsigned int mmap_pages = 16 ;
static int use_mmap = 0 ;
static int use_munmap = 0 ;
2009-05-15 17:19:29 +04:00
static int freq = 0 ;
2009-04-20 17:00:56 +04:00
static char * sym_filter ;
static unsigned long filter_start ;
static unsigned long filter_end ;
static int delay_secs = 2 ;
static int zero ;
static int dump_symtab ;
2009-04-20 17:37:32 +04:00
static const unsigned int default_count [ ] = {
2009-04-20 17:00:56 +04:00
1000000 ,
1000000 ,
10000 ,
10000 ,
1000000 ,
10000 ,
} ;
/*
* Symbols
*/
static uint64_t min_ip ;
static uint64_t max_ip = - 1ll ;
struct sym_entry {
2009-05-28 21:55:41 +04:00
struct rb_node rb_node ;
struct list_head node ;
2009-04-20 17:00:56 +04:00
unsigned long count [ MAX_COUNTERS ] ;
int skip ;
} ;
struct sym_entry * sym_filter_entry ;
2009-05-28 21:55:41 +04:00
struct dso * kernel_dso ;
/*
* Symbols will be added here in record_ip and will get out
* after decayed .
*/
static LIST_HEAD ( active_symbols ) ;
2009-04-20 17:00:56 +04:00
/*
* Ordering weight : count - 1 * count - 2 * . . . / count - n
*/
static double sym_weight ( const struct sym_entry * sym )
{
double weight ;
int counter ;
weight = sym - > count [ 0 ] ;
for ( counter = 1 ; counter < nr_counters - 1 ; counter + + )
weight * = sym - > count [ counter ] ;
weight / = ( sym - > count [ counter ] + 1 ) ;
return weight ;
}
static long events ;
static long userspace_events ;
static const char CONSOLE_CLEAR [ ] = " [H [2J" ;
2009-05-28 21:55:41 +04:00
static void list_insert_active_sym ( struct sym_entry * syme )
{
list_add ( & syme - > node , & active_symbols ) ;
}
static void rb_insert_active_sym ( struct rb_root * tree , struct sym_entry * se )
{
struct rb_node * * p = & tree - > rb_node ;
struct rb_node * parent = NULL ;
struct sym_entry * iter ;
while ( * p ! = NULL ) {
parent = * p ;
iter = rb_entry ( parent , struct sym_entry , rb_node ) ;
if ( sym_weight ( se ) > sym_weight ( iter ) )
p = & ( * p ) - > rb_left ;
else
p = & ( * p ) - > rb_right ;
}
rb_link_node ( & se - > rb_node , parent , p ) ;
rb_insert_color ( & se - > rb_node , tree ) ;
}
2009-04-20 17:00:56 +04:00
static void print_sym_table ( void )
{
2009-05-28 21:55:41 +04:00
int printed , j ;
2009-04-20 17:00:56 +04:00
int counter ;
float events_per_sec = events / delay_secs ;
float kevents_per_sec = ( events - userspace_events ) / delay_secs ;
float sum_kevents = 0.0 ;
2009-05-28 21:55:41 +04:00
struct sym_entry * syme , * n ;
struct rb_root tmp = RB_ROOT ;
struct rb_node * nd ;
2009-04-20 17:00:56 +04:00
events = userspace_events = 0 ;
2009-05-28 21:55:41 +04:00
/* Sort the active symbols */
list_for_each_entry_safe ( syme , n , & active_symbols , node ) {
if ( syme - > count [ 0 ] ! = 0 ) {
rb_insert_active_sym ( & tmp , syme ) ;
sum_kevents + = syme - > count [ 0 ] ;
perf top: Reduce display overhead
Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.
Before:
top - 10:14:53 up 4:08, 17 users, load average: 1.17, 1.53, 1.49
Tasks: 273 total, 5 running, 268 sleeping, 0 stopped, 0 zombie
Cpu(s): 6.9%us, 38.2%sy, 0.0%ni, 19.9%id, 0.0%wa, 0.0%hi, 35.0%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28504 root 20 0 1044 260 164 S 58 0.0 0:04.19 2 netserver
28499 root 20 0 1040 412 316 R 51 0.0 0:04.15 0 netperf
28500 root 20 0 1040 408 316 R 50 0.0 0:04.14 1 netperf
28503 root 20 0 1044 260 164 S 50 0.0 0:04.01 1 netserver
28501 root 20 0 1044 260 164 S 49 0.0 0:03.99 0 netserver
28502 root 20 0 1040 412 316 S 43 0.0 0:03.96 2 netperf
28468 root 20 0 1892m 325m 972 S 16 10.8 0:10.50 3 perf
28467 root 20 0 1892m 325m 972 R 2 10.8 0:00.72 3 perf
After:
top - 10:16:30 up 4:10, 17 users, load average: 2.27, 1.88, 1.62
Tasks: 273 total, 6 running, 267 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.5%us, 39.7%sy, 0.0%ni, 24.6%id, 0.0%wa, 0.0%hi, 33.3%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28590 root 20 0 1040 412 316 S 54 0.0 0:07.85 2 netperf
28589 root 20 0 1044 260 164 R 54 0.0 0:07.84 0 netserver
28588 root 20 0 1040 412 316 R 50 0.0 0:07.89 1 netperf
28591 root 20 0 1044 256 164 S 50 0.0 0:07.82 1 netserver
28587 root 20 0 1040 408 316 R 47 0.0 0:07.61 0 netperf
28592 root 20 0 1044 260 164 R 47 0.0 0:07.85 2 netserver
28378 root 20 0 8732 1300 860 R 2 0.0 0:01.81 3 top
28577 root 20 0 1892m 165m 972 R 2 5.5 0:00.48 3 perf
28578 root 20 0 1892m 165m 972 S 2 5.5 0:00.04 3 perf
[ Impact: optimization ]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-05-25 11:57:56 +04:00
for ( j = 0 ; j < nr_counters ; j + + )
2009-05-28 21:55:41 +04:00
syme - > count [ j ] = zero ? 0 : syme - > count [ j ] * 7 / 8 ;
} else
list_del_init ( & syme - > node ) ;
perf top: Reduce display overhead
Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.
Before:
top - 10:14:53 up 4:08, 17 users, load average: 1.17, 1.53, 1.49
Tasks: 273 total, 5 running, 268 sleeping, 0 stopped, 0 zombie
Cpu(s): 6.9%us, 38.2%sy, 0.0%ni, 19.9%id, 0.0%wa, 0.0%hi, 35.0%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28504 root 20 0 1044 260 164 S 58 0.0 0:04.19 2 netserver
28499 root 20 0 1040 412 316 R 51 0.0 0:04.15 0 netperf
28500 root 20 0 1040 408 316 R 50 0.0 0:04.14 1 netperf
28503 root 20 0 1044 260 164 S 50 0.0 0:04.01 1 netserver
28501 root 20 0 1044 260 164 S 49 0.0 0:03.99 0 netserver
28502 root 20 0 1040 412 316 S 43 0.0 0:03.96 2 netperf
28468 root 20 0 1892m 325m 972 S 16 10.8 0:10.50 3 perf
28467 root 20 0 1892m 325m 972 R 2 10.8 0:00.72 3 perf
After:
top - 10:16:30 up 4:10, 17 users, load average: 2.27, 1.88, 1.62
Tasks: 273 total, 6 running, 267 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.5%us, 39.7%sy, 0.0%ni, 24.6%id, 0.0%wa, 0.0%hi, 33.3%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28590 root 20 0 1040 412 316 S 54 0.0 0:07.85 2 netperf
28589 root 20 0 1044 260 164 R 54 0.0 0:07.84 0 netserver
28588 root 20 0 1040 412 316 R 50 0.0 0:07.89 1 netperf
28591 root 20 0 1044 256 164 S 50 0.0 0:07.82 1 netserver
28587 root 20 0 1040 408 316 R 47 0.0 0:07.61 0 netperf
28592 root 20 0 1044 260 164 R 47 0.0 0:07.85 2 netserver
28378 root 20 0 8732 1300 860 R 2 0.0 0:01.81 3 top
28577 root 20 0 1892m 165m 972 R 2 5.5 0:00.48 3 perf
28578 root 20 0 1892m 165m 972 S 2 5.5 0:00.04 3 perf
[ Impact: optimization ]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-05-25 11:57:56 +04:00
}
2009-04-20 17:00:56 +04:00
write ( 1 , CONSOLE_CLEAR , strlen ( CONSOLE_CLEAR ) ) ;
printf (
" ------------------------------------------------------------------------------ \n " ) ;
2009-05-26 17:25:34 +04:00
printf ( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [ " ,
2009-04-20 17:00:56 +04:00
events_per_sec ,
2009-05-26 17:25:34 +04:00
100.0 - ( 100.0 * ( ( events_per_sec - kevents_per_sec ) / events_per_sec ) ) ) ;
2009-04-20 17:00:56 +04:00
if ( nr_counters = = 1 )
printf ( " %d " , event_count [ 0 ] ) ;
for ( counter = 0 ; counter < nr_counters ; counter + + ) {
if ( counter )
printf ( " / " ) ;
printf ( " %s " , event_name ( counter ) ) ;
}
printf ( " ], " ) ;
2009-05-26 11:17:18 +04:00
if ( target_pid ! = - 1 )
printf ( " (target_pid: %d " , target_pid ) ;
2009-04-20 17:00:56 +04:00
else
printf ( " (all " ) ;
if ( profile_cpu ! = - 1 )
printf ( " , cpu: %d) \n " , profile_cpu ) ;
else {
2009-05-26 11:17:18 +04:00
if ( target_pid ! = - 1 )
2009-04-20 17:00:56 +04:00
printf ( " ) \n " ) ;
else
printf ( " , %d CPUs) \n " , nr_cpus ) ;
}
printf ( " ------------------------------------------------------------------------------ \n \n " ) ;
if ( nr_counters = = 1 )
printf ( " events pcnt " ) ;
else
printf ( " weight events pcnt " ) ;
printf ( " RIP kernel function \n "
" ______ ______ _____ ________________ _______________ \n \n "
) ;
2009-05-28 21:55:41 +04:00
for ( nd = rb_first ( & tmp ) ; nd ; nd = rb_next ( nd ) ) {
struct sym_entry * syme = rb_entry ( nd , struct sym_entry , rb_node ) ;
struct symbol * sym = ( struct symbol * ) ( syme + 1 ) ;
2009-04-20 17:00:56 +04:00
float pcnt ;
perf top: Reduce display overhead
Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.
Before:
top - 10:14:53 up 4:08, 17 users, load average: 1.17, 1.53, 1.49
Tasks: 273 total, 5 running, 268 sleeping, 0 stopped, 0 zombie
Cpu(s): 6.9%us, 38.2%sy, 0.0%ni, 19.9%id, 0.0%wa, 0.0%hi, 35.0%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28504 root 20 0 1044 260 164 S 58 0.0 0:04.19 2 netserver
28499 root 20 0 1040 412 316 R 51 0.0 0:04.15 0 netperf
28500 root 20 0 1040 408 316 R 50 0.0 0:04.14 1 netperf
28503 root 20 0 1044 260 164 S 50 0.0 0:04.01 1 netserver
28501 root 20 0 1044 260 164 S 49 0.0 0:03.99 0 netserver
28502 root 20 0 1040 412 316 S 43 0.0 0:03.96 2 netperf
28468 root 20 0 1892m 325m 972 S 16 10.8 0:10.50 3 perf
28467 root 20 0 1892m 325m 972 R 2 10.8 0:00.72 3 perf
After:
top - 10:16:30 up 4:10, 17 users, load average: 2.27, 1.88, 1.62
Tasks: 273 total, 6 running, 267 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.5%us, 39.7%sy, 0.0%ni, 24.6%id, 0.0%wa, 0.0%hi, 33.3%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28590 root 20 0 1040 412 316 S 54 0.0 0:07.85 2 netperf
28589 root 20 0 1044 260 164 R 54 0.0 0:07.84 0 netserver
28588 root 20 0 1040 412 316 R 50 0.0 0:07.89 1 netperf
28591 root 20 0 1044 256 164 S 50 0.0 0:07.82 1 netserver
28587 root 20 0 1040 408 316 R 47 0.0 0:07.61 0 netperf
28592 root 20 0 1044 260 164 R 47 0.0 0:07.85 2 netserver
28378 root 20 0 8732 1300 860 R 2 0.0 0:01.81 3 top
28577 root 20 0 1892m 165m 972 R 2 5.5 0:00.48 3 perf
28578 root 20 0 1892m 165m 972 S 2 5.5 0:00.04 3 perf
[ Impact: optimization ]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-05-25 11:57:56 +04:00
2009-05-28 21:55:41 +04:00
if ( + + printed > 18 | | syme - > count [ 0 ] < count_filter )
perf top: Reduce display overhead
Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.
Before:
top - 10:14:53 up 4:08, 17 users, load average: 1.17, 1.53, 1.49
Tasks: 273 total, 5 running, 268 sleeping, 0 stopped, 0 zombie
Cpu(s): 6.9%us, 38.2%sy, 0.0%ni, 19.9%id, 0.0%wa, 0.0%hi, 35.0%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28504 root 20 0 1044 260 164 S 58 0.0 0:04.19 2 netserver
28499 root 20 0 1040 412 316 R 51 0.0 0:04.15 0 netperf
28500 root 20 0 1040 408 316 R 50 0.0 0:04.14 1 netperf
28503 root 20 0 1044 260 164 S 50 0.0 0:04.01 1 netserver
28501 root 20 0 1044 260 164 S 49 0.0 0:03.99 0 netserver
28502 root 20 0 1040 412 316 S 43 0.0 0:03.96 2 netperf
28468 root 20 0 1892m 325m 972 S 16 10.8 0:10.50 3 perf
28467 root 20 0 1892m 325m 972 R 2 10.8 0:00.72 3 perf
After:
top - 10:16:30 up 4:10, 17 users, load average: 2.27, 1.88, 1.62
Tasks: 273 total, 6 running, 267 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.5%us, 39.7%sy, 0.0%ni, 24.6%id, 0.0%wa, 0.0%hi, 33.3%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28590 root 20 0 1040 412 316 S 54 0.0 0:07.85 2 netperf
28589 root 20 0 1044 260 164 R 54 0.0 0:07.84 0 netserver
28588 root 20 0 1040 412 316 R 50 0.0 0:07.89 1 netperf
28591 root 20 0 1044 256 164 S 50 0.0 0:07.82 1 netserver
28587 root 20 0 1040 408 316 R 47 0.0 0:07.61 0 netperf
28592 root 20 0 1044 260 164 R 47 0.0 0:07.85 2 netserver
28378 root 20 0 8732 1300 860 R 2 0.0 0:01.81 3 top
28577 root 20 0 1892m 165m 972 R 2 5.5 0:00.48 3 perf
28578 root 20 0 1892m 165m 972 S 2 5.5 0:00.04 3 perf
[ Impact: optimization ]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-05-25 11:57:56 +04:00
break ;
2009-05-28 21:55:41 +04:00
pcnt = 100.0 - ( 100.0 * ( ( sum_kevents - syme - > count [ 0 ] ) /
sum_kevents ) ) ;
perf top: Reduce display overhead
Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.
Before:
top - 10:14:53 up 4:08, 17 users, load average: 1.17, 1.53, 1.49
Tasks: 273 total, 5 running, 268 sleeping, 0 stopped, 0 zombie
Cpu(s): 6.9%us, 38.2%sy, 0.0%ni, 19.9%id, 0.0%wa, 0.0%hi, 35.0%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28504 root 20 0 1044 260 164 S 58 0.0 0:04.19 2 netserver
28499 root 20 0 1040 412 316 R 51 0.0 0:04.15 0 netperf
28500 root 20 0 1040 408 316 R 50 0.0 0:04.14 1 netperf
28503 root 20 0 1044 260 164 S 50 0.0 0:04.01 1 netserver
28501 root 20 0 1044 260 164 S 49 0.0 0:03.99 0 netserver
28502 root 20 0 1040 412 316 S 43 0.0 0:03.96 2 netperf
28468 root 20 0 1892m 325m 972 S 16 10.8 0:10.50 3 perf
28467 root 20 0 1892m 325m 972 R 2 10.8 0:00.72 3 perf
After:
top - 10:16:30 up 4:10, 17 users, load average: 2.27, 1.88, 1.62
Tasks: 273 total, 6 running, 267 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.5%us, 39.7%sy, 0.0%ni, 24.6%id, 0.0%wa, 0.0%hi, 33.3%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28590 root 20 0 1040 412 316 S 54 0.0 0:07.85 2 netperf
28589 root 20 0 1044 260 164 R 54 0.0 0:07.84 0 netserver
28588 root 20 0 1040 412 316 R 50 0.0 0:07.89 1 netperf
28591 root 20 0 1044 256 164 S 50 0.0 0:07.82 1 netserver
28587 root 20 0 1040 408 316 R 47 0.0 0:07.61 0 netperf
28592 root 20 0 1044 260 164 R 47 0.0 0:07.85 2 netserver
28378 root 20 0 8732 1300 860 R 2 0.0 0:01.81 3 top
28577 root 20 0 1892m 165m 972 R 2 5.5 0:00.48 3 perf
28578 root 20 0 1892m 165m 972 S 2 5.5 0:00.04 3 perf
[ Impact: optimization ]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-05-25 11:57:56 +04:00
if ( nr_counters = = 1 )
printf ( " %19.2f - %4.1f%% - %016llx : %s \n " ,
2009-05-28 21:55:41 +04:00
sym_weight ( syme ) ,
pcnt , sym - > start , sym - > name ) ;
perf top: Reduce display overhead
Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.
Before:
top - 10:14:53 up 4:08, 17 users, load average: 1.17, 1.53, 1.49
Tasks: 273 total, 5 running, 268 sleeping, 0 stopped, 0 zombie
Cpu(s): 6.9%us, 38.2%sy, 0.0%ni, 19.9%id, 0.0%wa, 0.0%hi, 35.0%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28504 root 20 0 1044 260 164 S 58 0.0 0:04.19 2 netserver
28499 root 20 0 1040 412 316 R 51 0.0 0:04.15 0 netperf
28500 root 20 0 1040 408 316 R 50 0.0 0:04.14 1 netperf
28503 root 20 0 1044 260 164 S 50 0.0 0:04.01 1 netserver
28501 root 20 0 1044 260 164 S 49 0.0 0:03.99 0 netserver
28502 root 20 0 1040 412 316 S 43 0.0 0:03.96 2 netperf
28468 root 20 0 1892m 325m 972 S 16 10.8 0:10.50 3 perf
28467 root 20 0 1892m 325m 972 R 2 10.8 0:00.72 3 perf
After:
top - 10:16:30 up 4:10, 17 users, load average: 2.27, 1.88, 1.62
Tasks: 273 total, 6 running, 267 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.5%us, 39.7%sy, 0.0%ni, 24.6%id, 0.0%wa, 0.0%hi, 33.3%si, 0.0%st
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ P COMMAND
28590 root 20 0 1040 412 316 S 54 0.0 0:07.85 2 netperf
28589 root 20 0 1044 260 164 R 54 0.0 0:07.84 0 netserver
28588 root 20 0 1040 412 316 R 50 0.0 0:07.89 1 netperf
28591 root 20 0 1044 256 164 S 50 0.0 0:07.82 1 netserver
28587 root 20 0 1040 408 316 R 47 0.0 0:07.61 0 netperf
28592 root 20 0 1044 260 164 R 47 0.0 0:07.85 2 netserver
28378 root 20 0 8732 1300 860 R 2 0.0 0:01.81 3 top
28577 root 20 0 1892m 165m 972 R 2 5.5 0:00.48 3 perf
28578 root 20 0 1892m 165m 972 S 2 5.5 0:00.04 3 perf
[ Impact: optimization ]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-05-25 11:57:56 +04:00
else
printf ( " %8.1f %10ld - %4.1f%% - %016llx : %s \n " ,
2009-05-28 21:55:41 +04:00
sym_weight ( syme ) , syme - > count [ 0 ] ,
pcnt , sym - > start , sym - > name ) ;
2009-04-20 17:00:56 +04:00
}
{
struct pollfd stdin_poll = { . fd = 0 , . events = POLLIN } ;
if ( poll ( & stdin_poll , 1 , 0 ) = = 1 ) {
printf ( " key pressed - exiting. \n " ) ;
exit ( 0 ) ;
}
}
}
static void * display_thread ( void * arg )
{
printf ( " KernelTop refresh period: %d seconds \n " , delay_secs ) ;
while ( ! sleep ( delay_secs ) )
print_sym_table ( ) ;
return NULL ;
}
2009-05-28 21:55:41 +04:00
static int symbol_filter ( struct dso * self , struct symbol * sym )
2009-04-20 17:00:56 +04:00
{
2009-05-28 21:55:41 +04:00
static int filter_match ;
struct sym_entry * syme ;
const char * name = sym - > name ;
if ( ! strcmp ( name , " _text " ) | |
! strcmp ( name , " _etext " ) | |
! strcmp ( name , " _sinittext " ) | |
! strncmp ( " init_module " , name , 11 ) | |
! strncmp ( " cleanup_module " , name , 14 ) | |
strstr ( name , " _text_start " ) | |
strstr ( name , " _text_end " ) )
2009-04-20 17:00:56 +04:00
return 1 ;
2009-05-28 21:55:41 +04:00
syme = dso__sym_priv ( self , sym ) ;
2009-04-20 17:00:56 +04:00
/* Tag events to be skipped. */
2009-05-28 21:55:41 +04:00
if ( ! strcmp ( " default_idle " , name ) | |
! strcmp ( " cpu_idle " , name ) | |
! strcmp ( " enter_idle " , name ) | |
! strcmp ( " exit_idle " , name ) | |
! strcmp ( " mwait_idle " , name ) )
syme - > skip = 1 ;
2009-04-20 17:00:56 +04:00
if ( filter_match = = 1 ) {
2009-05-28 21:55:41 +04:00
filter_end = sym - > start ;
2009-04-20 17:00:56 +04:00
filter_match = - 1 ;
if ( filter_end - filter_start > 10000 ) {
2009-05-28 21:55:41 +04:00
fprintf ( stderr ,
" hm, too large filter symbol <%s> - skipping. \n " ,
2009-04-20 17:00:56 +04:00
sym_filter ) ;
2009-05-28 21:55:41 +04:00
fprintf ( stderr , " symbol filter start: %016lx \n " ,
filter_start ) ;
fprintf ( stderr , " end: %016lx \n " ,
filter_end ) ;
2009-04-20 17:00:56 +04:00
filter_end = filter_start = 0 ;
sym_filter = NULL ;
sleep ( 1 ) ;
}
}
2009-05-28 21:55:41 +04:00
if ( filter_match = = 0 & & sym_filter & & ! strcmp ( name , sym_filter ) ) {
2009-04-20 17:00:56 +04:00
filter_match = 1 ;
2009-05-28 21:55:41 +04:00
filter_start = sym - > start ;
2009-04-20 17:00:56 +04:00
}
2009-05-28 21:55:41 +04:00
2009-04-20 17:00:56 +04:00
return 0 ;
}
2009-05-28 21:55:41 +04:00
static int parse_symbols ( void )
2009-04-20 17:00:56 +04:00
{
2009-05-28 21:55:41 +04:00
struct rb_node * node ;
struct symbol * sym ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
kernel_dso = dso__new ( " [kernel] " , sizeof ( struct sym_entry ) ) ;
if ( kernel_dso = = NULL )
return - 1 ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
if ( dso__load_kernel ( kernel_dso , NULL , symbol_filter ) ! = 0 )
goto out_delete_dso ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
node = rb_first ( & kernel_dso - > syms ) ;
sym = rb_entry ( node , struct symbol , rb_node ) ;
min_ip = sym - > start ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
node = rb_last ( & kernel_dso - > syms ) ;
sym = rb_entry ( node , struct symbol , rb_node ) ;
max_ip = sym - > start ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
if ( dump_symtab )
2009-05-29 08:46:46 +04:00
dso__fprintf ( kernel_dso , stderr ) ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
return 0 ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
out_delete_dso :
dso__delete ( kernel_dso ) ;
kernel_dso = NULL ;
return - 1 ;
2009-04-20 17:00:56 +04:00
}
# define TRACE_COUNT 3
/*
* Binary search in the histogram table and record the hit :
*/
static void record_ip ( uint64_t ip , int counter )
{
2009-05-28 21:55:41 +04:00
struct symbol * sym = dso__find_symbol ( kernel_dso , ip ) ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
if ( sym ! = NULL ) {
struct sym_entry * syme = dso__sym_priv ( kernel_dso , sym ) ;
2009-04-20 17:00:56 +04:00
2009-05-28 21:55:41 +04:00
if ( ! syme - > skip ) {
syme - > count [ counter ] + + ;
if ( list_empty ( & syme - > node ) | | ! syme - > node . next )
list_insert_active_sym ( syme ) ;
return ;
2009-04-20 17:00:56 +04:00
}
}
2009-05-28 21:55:41 +04:00
events - - ;
2009-04-20 17:00:56 +04:00
}
static void process_event ( uint64_t ip , int counter )
{
events + + ;
if ( ip < min_ip | | ip > max_ip ) {
userspace_events + + ;
return ;
}
record_ip ( ip , counter ) ;
}
struct mmap_data {
int counter ;
void * base ;
unsigned int mask ;
unsigned int prev ;
} ;
static unsigned int mmap_read_head ( struct mmap_data * md )
{
struct perf_counter_mmap_page * pc = md - > base ;
int head ;
head = pc - > data_head ;
rmb ( ) ;
return head ;
}
struct timeval last_read , this_read ;
static void mmap_read ( struct mmap_data * md )
{
unsigned int head = mmap_read_head ( md ) ;
unsigned int old = md - > prev ;
unsigned char * data = md - > base + page_size ;
int diff ;
gettimeofday ( & this_read , NULL ) ;
/*
* If we ' re further behind than half the buffer , there ' s a chance
* the writer will bite our tail and screw up the events under us .
*
* If we somehow ended up ahead of the head , we got messed up .
*
* In either case , truncate and restart at head .
*/
diff = head - old ;
if ( diff > md - > mask / 2 | | diff < 0 ) {
struct timeval iv ;
unsigned long msecs ;
timersub ( & this_read , & last_read , & iv ) ;
msecs = iv . tv_sec * 1000 + iv . tv_usec / 1000 ;
fprintf ( stderr , " WARNING: failed to keep up with mmap data. "
" Last read %lu msecs ago. \n " , msecs ) ;
/*
* head points to a known good entry , start there .
*/
old = head ;
}
last_read = this_read ;
for ( ; old ! = head ; ) {
struct ip_event {
struct perf_event_header header ;
__u64 ip ;
2009-05-26 11:17:18 +04:00
__u32 pid , target_pid ;
2009-04-20 17:00:56 +04:00
} ;
struct mmap_event {
struct perf_event_header header ;
2009-05-26 11:17:18 +04:00
__u32 pid , target_pid ;
2009-04-20 17:00:56 +04:00
__u64 start ;
__u64 len ;
__u64 pgoff ;
char filename [ PATH_MAX ] ;
} ;
typedef union event_union {
struct perf_event_header header ;
struct ip_event ip ;
struct mmap_event mmap ;
} event_t ;
event_t * event = ( event_t * ) & data [ old & md - > mask ] ;
event_t event_copy ;
2009-04-20 17:22:22 +04:00
size_t size = event - > header . size ;
2009-04-20 17:00:56 +04:00
/*
* Event straddles the mmap boundary - - header should always
* be inside due to u64 alignment of output .
*/
if ( ( old & md - > mask ) + size ! = ( ( old + size ) & md - > mask ) ) {
unsigned int offset = old ;
unsigned int len = min ( sizeof ( * event ) , size ) , cpy ;
void * dst = & event_copy ;
do {
cpy = min ( md - > mask + 1 - ( offset & md - > mask ) , len ) ;
memcpy ( dst , & data [ offset & md - > mask ] , cpy ) ;
offset + = cpy ;
dst + = cpy ;
len - = cpy ;
} while ( len ) ;
event = & event_copy ;
}
old + = size ;
if ( event - > header . misc & PERF_EVENT_MISC_OVERFLOW ) {
if ( event - > header . type & PERF_RECORD_IP )
process_event ( event - > ip . ip , md - > counter ) ;
} else {
switch ( event - > header . type ) {
case PERF_EVENT_MMAP :
case PERF_EVENT_MUNMAP :
printf ( " %s: %Lu %Lu %Lu %s \n " ,
event - > header . type = = PERF_EVENT_MMAP
? " mmap " : " munmap " ,
event - > mmap . start ,
event - > mmap . len ,
event - > mmap . pgoff ,
event - > mmap . filename ) ;
break ;
}
}
}
md - > prev = old ;
}
2009-05-24 10:35:49 +04:00
static struct pollfd event_array [ MAX_NR_CPUS * MAX_COUNTERS ] ;
static struct mmap_data mmap_array [ MAX_NR_CPUS ] [ MAX_COUNTERS ] ;
2009-05-26 11:17:18 +04:00
static int __cmd_top ( void )
2009-04-20 17:00:56 +04:00
{
struct perf_counter_hw_event hw_event ;
pthread_t thread ;
int i , counter , group_fd , nr_poll = 0 ;
unsigned int cpu ;
int ret ;
for ( i = 0 ; i < nr_cpus ; i + + ) {
group_fd = - 1 ;
for ( counter = 0 ; counter < nr_counters ; counter + + ) {
cpu = profile_cpu ;
2009-05-26 11:17:18 +04:00
if ( target_pid = = - 1 & & profile_cpu = = - 1 )
2009-04-20 17:00:56 +04:00
cpu = i ;
memset ( & hw_event , 0 , sizeof ( hw_event ) ) ;
hw_event . config = event_id [ counter ] ;
hw_event . irq_period = event_count [ counter ] ;
hw_event . record_type = PERF_RECORD_IP | PERF_RECORD_TID ;
2009-05-26 17:25:34 +04:00
hw_event . nmi = 1 ;
2009-04-20 17:00:56 +04:00
hw_event . mmap = use_mmap ;
hw_event . munmap = use_munmap ;
2009-05-15 17:19:29 +04:00
hw_event . freq = freq ;
2009-04-20 17:00:56 +04:00
2009-05-26 11:17:18 +04:00
fd [ i ] [ counter ] = sys_perf_counter_open ( & hw_event , target_pid , cpu , group_fd , 0 ) ;
2009-04-20 17:00:56 +04:00
if ( fd [ i ] [ counter ] < 0 ) {
int err = errno ;
printf ( " kerneltop error: syscall returned with %d (%s) \n " ,
fd [ i ] [ counter ] , strerror ( err ) ) ;
if ( err = = EPERM )
printf ( " Are you root? \n " ) ;
exit ( - 1 ) ;
}
assert ( fd [ i ] [ counter ] > = 0 ) ;
fcntl ( fd [ i ] [ counter ] , F_SETFL , O_NONBLOCK ) ;
/*
* First counter acts as the group leader :
*/
if ( group & & group_fd = = - 1 )
group_fd = fd [ i ] [ counter ] ;
event_array [ nr_poll ] . fd = fd [ i ] [ counter ] ;
event_array [ nr_poll ] . events = POLLIN ;
nr_poll + + ;
mmap_array [ i ] [ counter ] . counter = counter ;
mmap_array [ i ] [ counter ] . prev = 0 ;
mmap_array [ i ] [ counter ] . mask = mmap_pages * page_size - 1 ;
mmap_array [ i ] [ counter ] . base = mmap ( NULL , ( mmap_pages + 1 ) * page_size ,
PROT_READ , MAP_SHARED , fd [ i ] [ counter ] , 0 ) ;
if ( mmap_array [ i ] [ counter ] . base = = MAP_FAILED ) {
printf ( " kerneltop error: failed to mmap with %d (%s) \n " ,
errno , strerror ( errno ) ) ;
exit ( - 1 ) ;
}
}
}
if ( pthread_create ( & thread , NULL , display_thread , NULL ) ) {
printf ( " Could not create display thread. \n " ) ;
exit ( - 1 ) ;
}
if ( realtime_prio ) {
struct sched_param param ;
param . sched_priority = realtime_prio ;
if ( sched_setscheduler ( 0 , SCHED_FIFO , & param ) ) {
printf ( " Could not set realtime priority. \n " ) ;
exit ( - 1 ) ;
}
}
while ( 1 ) {
int hits = events ;
for ( i = 0 ; i < nr_cpus ; i + + ) {
for ( counter = 0 ; counter < nr_counters ; counter + + )
mmap_read ( & mmap_array [ i ] [ counter ] ) ;
}
if ( hits = = events )
ret = poll ( event_array , nr_poll , 100 ) ;
}
return 0 ;
}
2009-05-26 11:17:18 +04:00
static const char * const top_usage [ ] = {
" perf top [<options>] " ,
NULL
} ;
static char events_help_msg [ EVENTS_HELP_MAX ] ;
static const struct option options [ ] = {
OPT_CALLBACK ( ' e ' , " event " , NULL , " event " ,
events_help_msg , parse_events ) ,
OPT_INTEGER ( ' c ' , " count " , & default_interval ,
" event period to sample " ) ,
OPT_INTEGER ( ' p ' , " pid " , & target_pid ,
" profile events on existing pid " ) ,
OPT_BOOLEAN ( ' a ' , " all-cpus " , & system_wide ,
" system-wide collection from all CPUs " ) ,
OPT_INTEGER ( ' C ' , " CPU " , & profile_cpu ,
" CPU to profile on " ) ,
OPT_INTEGER ( ' m ' , " mmap-pages " , & mmap_pages ,
" number of mmap data pages " ) ,
OPT_INTEGER ( ' r ' , " realtime " , & realtime_prio ,
" collect data with this RT SCHED_FIFO priority " ) ,
2009-05-26 17:25:34 +04:00
OPT_INTEGER ( ' d ' , " delay " , & delay_secs ,
2009-05-26 11:17:18 +04:00
" number of seconds to delay between refreshes " ) ,
OPT_BOOLEAN ( ' D ' , " dump-symtab " , & dump_symtab ,
" dump the symbol table used for profiling " ) ,
OPT_INTEGER ( ' f ' , " --count-filter " , & count_filter ,
" only display functions with more events than this " ) ,
OPT_BOOLEAN ( ' g ' , " group " , & group ,
" put the counters into a counter group " ) ,
OPT_STRING ( ' s ' , " sym-filter " , & sym_filter , " pattern " ,
" only display symbols matchig this pattern " ) ,
OPT_BOOLEAN ( ' z ' , " zero " , & group ,
" zero history across updates " ) ,
OPT_BOOLEAN ( ' M ' , " use-mmap " , & use_mmap ,
" track mmap events " ) ,
OPT_BOOLEAN ( ' U ' , " use-munmap " , & use_munmap ,
" track munmap events " ) ,
OPT_INTEGER ( ' F ' , " --freq " , & freq ,
" profile at this frequency " ) ,
OPT_END ( )
} ;
int cmd_top ( int argc , const char * * argv , const char * prefix )
{
int counter ;
page_size = sysconf ( _SC_PAGE_SIZE ) ;
create_events_help ( events_help_msg ) ;
memcpy ( event_id , default_event_id , sizeof ( default_event_id ) ) ;
argc = parse_options ( argc , argv , options , top_usage , 0 ) ;
if ( argc )
usage_with_options ( top_usage , options ) ;
if ( freq ) {
default_interval = freq ;
freq = 1 ;
}
/* CPU and PID are mutually exclusive */
if ( target_pid ! = - 1 & & profile_cpu ! = - 1 ) {
printf ( " WARNING: PID switch overriding CPU \n " ) ;
sleep ( 1 ) ;
profile_cpu = - 1 ;
}
if ( ! nr_counters ) {
nr_counters = 1 ;
event_id [ 0 ] = 0 ;
}
for ( counter = 0 ; counter < nr_counters ; counter + + ) {
if ( event_count [ counter ] )
continue ;
event_count [ counter ] = default_interval ;
}
nr_cpus = sysconf ( _SC_NPROCESSORS_ONLN ) ;
assert ( nr_cpus < = MAX_NR_CPUS ) ;
assert ( nr_cpus > = 0 ) ;
if ( target_pid ! = - 1 | | profile_cpu ! = - 1 )
nr_cpus = 1 ;
parse_symbols ( ) ;
return __cmd_top ( ) ;
}