2008-12-03 10:39:53 +01:00
/*
* Performance counter x86 architecture code
*
* Copyright ( C ) 2008 Thomas Gleixner < tglx @ linutronix . de >
* Copyright ( C ) 2008 Red Hat , Inc . , Ingo Molnar
*
* For licencing details see kernel - base / COPYING
*/
# include <linux/perf_counter.h>
# include <linux/capability.h>
# include <linux/notifier.h>
# include <linux/hardirq.h>
# include <linux/kprobes.h>
2008-12-09 21:43:39 +01:00
# include <linux/module.h>
2008-12-03 10:39:53 +01:00
# include <linux/kdebug.h>
# include <linux/sched.h>
2008-12-17 09:02:19 +01:00
# include <asm/perf_counter.h>
2008-12-03 10:39:53 +01:00
# include <asm/apic.h>
static bool perf_counters_initialized __read_mostly ;
/*
* Number of ( generic ) HW counters :
*/
2008-12-17 13:09:20 +01:00
static int nr_counters_generic __read_mostly ;
static u64 perf_counter_mask __read_mostly ;
2008-12-22 11:10:42 +01:00
static u64 counter_value_mask __read_mostly ;
2008-12-03 10:39:53 +01:00
2008-12-17 13:09:20 +01:00
static int nr_counters_fixed __read_mostly ;
2008-12-17 10:51:15 +01:00
2008-12-03 10:39:53 +01:00
struct cpu_hw_counters {
2008-12-17 13:09:20 +01:00
struct perf_counter * counters [ X86_PMC_IDX_MAX ] ;
unsigned long used [ BITS_TO_LONGS ( X86_PMC_IDX_MAX ) ] ;
2009-01-23 14:36:16 +01:00
unsigned long interrupts ;
2009-01-23 10:13:01 +01:00
u64 global_enable ;
2008-12-03 10:39:53 +01:00
} ;
/*
* Intel PerfMon v3 . Used on Core2 and later .
*/
static DEFINE_PER_CPU ( struct cpu_hw_counters , cpu_hw_counters ) ;
2008-12-19 22:37:58 +05:30
static const int intel_perfmon_event_map [ ] =
2008-12-03 10:39:53 +01:00
{
2008-12-23 12:17:29 +01:00
[ PERF_COUNT_CPU_CYCLES ] = 0x003c ,
2008-12-03 10:39:53 +01:00
[ PERF_COUNT_INSTRUCTIONS ] = 0x00c0 ,
[ PERF_COUNT_CACHE_REFERENCES ] = 0x4f2e ,
[ PERF_COUNT_CACHE_MISSES ] = 0x412e ,
[ PERF_COUNT_BRANCH_INSTRUCTIONS ] = 0x00c4 ,
[ PERF_COUNT_BRANCH_MISSES ] = 0x00c5 ,
2008-12-23 12:17:29 +01:00
[ PERF_COUNT_BUS_CYCLES ] = 0x013c ,
2008-12-03 10:39:53 +01:00
} ;
2008-12-19 22:37:58 +05:30
static const int max_intel_perfmon_events = ARRAY_SIZE ( intel_perfmon_event_map ) ;
2008-12-03 10:39:53 +01:00
2008-12-13 09:00:03 +01:00
/*
* Propagate counter elapsed time into the generic counter .
* Can only be executed on the CPU where the counter is active .
* Returns the delta events processed .
*/
static void
x86_perf_counter_update ( struct perf_counter * counter ,
struct hw_perf_counter * hwc , int idx )
{
u64 prev_raw_count , new_raw_count , delta ;
/*
* Careful : an NMI might modify the previous counter value .
*
* Our tactic to handle this is to first atomically read and
* exchange a new raw count - then add that new - prev delta
* count to the generic counter atomically :
*/
again :
prev_raw_count = atomic64_read ( & hwc - > prev_count ) ;
rdmsrl ( hwc - > counter_base + idx , new_raw_count ) ;
if ( atomic64_cmpxchg ( & hwc - > prev_count , prev_raw_count ,
new_raw_count ) ! = prev_raw_count )
goto again ;
/*
* Now we have the new raw value and have updated the prev
* timestamp already . We can now calculate the elapsed delta
* ( counter - ) time and add that to the generic counter .
*
* Careful , not all hw sign - extends above the physical width
* of the count , so we do that by clipping the delta to 32 bits :
*/
delta = ( u64 ) ( u32 ) ( ( s32 ) new_raw_count - ( s32 ) prev_raw_count ) ;
atomic64_add ( delta , & counter - > count ) ;
atomic64_sub ( delta , & hwc - > period_left ) ;
}
2008-12-03 10:39:53 +01:00
/*
* Setup the hardware configuration for a given hw_event_type
*/
2008-12-11 12:46:46 +01:00
static int __hw_perf_counter_init ( struct perf_counter * counter )
2008-12-03 10:39:53 +01:00
{
2008-12-10 12:33:23 +01:00
struct perf_counter_hw_event * hw_event = & counter - > hw_event ;
2008-12-03 10:39:53 +01:00
struct hw_perf_counter * hwc = & counter - > hw ;
if ( unlikely ( ! perf_counters_initialized ) )
return - EINVAL ;
/*
* Count user events , and generate PMC IRQs :
* ( keep ' enabled ' bit clear for now )
*/
hwc - > config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT ;
/*
* If privileged enough , count OS events too , and allow
* NMI events as well :
*/
hwc - > nmi = 0 ;
if ( capable ( CAP_SYS_ADMIN ) ) {
hwc - > config | = ARCH_PERFMON_EVENTSEL_OS ;
2008-12-10 12:33:23 +01:00
if ( hw_event - > nmi )
2008-12-03 10:39:53 +01:00
hwc - > nmi = 1 ;
}
2008-12-10 12:33:23 +01:00
hwc - > irq_period = hw_event - > irq_period ;
2008-12-03 10:39:53 +01:00
/*
* Intel PMCs cannot be accessed sanely above 32 bit width ,
* so we install an artificial 1 < < 31 period regardless of
* the generic counter period :
*/
2008-12-13 09:00:03 +01:00
if ( ( s64 ) hwc - > irq_period < = 0 | | hwc - > irq_period > 0x7FFFFFFF )
2008-12-03 10:39:53 +01:00
hwc - > irq_period = 0x7FFFFFFF ;
2008-12-13 09:00:03 +01:00
atomic64_set ( & hwc - > period_left , hwc - > irq_period ) ;
2008-12-03 10:39:53 +01:00
/*
2008-12-08 19:35:37 +01:00
* Raw event type provide the config in the event structure
2008-12-03 10:39:53 +01:00
*/
2008-12-10 12:33:23 +01:00
if ( hw_event - > raw ) {
hwc - > config | = hw_event - > type ;
2008-12-03 10:39:53 +01:00
} else {
2008-12-10 12:33:23 +01:00
if ( hw_event - > type > = max_intel_perfmon_events )
2008-12-03 10:39:53 +01:00
return - EINVAL ;
/*
* The generic map :
*/
2008-12-10 12:33:23 +01:00
hwc - > config | = intel_perfmon_event_map [ hw_event - > type ] ;
2008-12-03 10:39:53 +01:00
}
counter - > wakeup_pending = 0 ;
return 0 ;
}
2008-12-11 13:45:51 +01:00
u64 hw_perf_save_disable ( void )
2008-12-09 21:43:39 +01:00
{
u64 ctrl ;
2008-12-14 18:36:30 +01:00
if ( unlikely ( ! perf_counters_initialized ) )
return 0 ;
2008-12-09 21:43:39 +01:00
rdmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , ctrl ) ;
2008-12-17 13:09:20 +01:00
wrmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , 0 ) ;
2008-12-14 18:36:30 +01:00
2008-12-09 21:43:39 +01:00
return ctrl ;
2008-12-03 10:39:53 +01:00
}
2008-12-11 13:45:51 +01:00
EXPORT_SYMBOL_GPL ( hw_perf_save_disable ) ;
2008-12-03 10:39:53 +01:00
2008-12-13 09:00:03 +01:00
void hw_perf_restore ( u64 ctrl )
{
2008-12-14 18:36:30 +01:00
if ( unlikely ( ! perf_counters_initialized ) )
return ;
2008-12-17 13:09:20 +01:00
wrmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , ctrl ) ;
2008-12-13 09:00:03 +01:00
}
EXPORT_SYMBOL_GPL ( hw_perf_restore ) ;
2008-12-22 11:10:42 +01:00
static inline void
__pmc_fixed_disable ( struct perf_counter * counter ,
struct hw_perf_counter * hwc , unsigned int __idx )
{
int idx = __idx - X86_PMC_IDX_FIXED ;
u64 ctrl_val , mask ;
int err ;
mask = 0xfULL < < ( idx * 4 ) ;
rdmsrl ( hwc - > config_base , ctrl_val ) ;
ctrl_val & = ~ mask ;
err = checking_wrmsrl ( hwc - > config_base , ctrl_val ) ;
}
2008-12-09 11:40:46 +01:00
static inline void
2008-12-17 09:09:13 +01:00
__pmc_generic_disable ( struct perf_counter * counter ,
2008-12-13 09:00:03 +01:00
struct hw_perf_counter * hwc , unsigned int idx )
2008-12-09 11:40:46 +01:00
{
2008-12-22 11:10:42 +01:00
if ( unlikely ( hwc - > config_base = = MSR_ARCH_PERFMON_FIXED_CTR_CTRL ) )
2008-12-27 19:15:43 +05:30
__pmc_fixed_disable ( counter , hwc , idx ) ;
else
wrmsr_safe ( hwc - > config_base + idx , hwc - > config , 0 ) ;
2008-12-09 11:40:46 +01:00
}
2008-12-22 11:10:42 +01:00
static DEFINE_PER_CPU ( u64 , prev_left [ X86_PMC_IDX_MAX ] ) ;
2008-12-03 10:39:53 +01:00
2008-12-13 09:00:03 +01:00
/*
* Set the next IRQ period , based on the hwc - > period_left value .
* To be called with the counter disabled in hw :
*/
static void
__hw_perf_counter_set_period ( struct perf_counter * counter ,
struct hw_perf_counter * hwc , int idx )
2008-12-03 10:39:53 +01:00
{
2008-12-22 11:10:42 +01:00
s64 left = atomic64_read ( & hwc - > period_left ) ;
2008-12-13 09:00:03 +01:00
s32 period = hwc - > irq_period ;
2008-12-22 11:10:42 +01:00
int err ;
2008-12-13 09:00:03 +01:00
/*
* If we are way outside a reasoable range then just skip forward :
*/
if ( unlikely ( left < = - period ) ) {
left = period ;
atomic64_set ( & hwc - > period_left , left ) ;
}
if ( unlikely ( left < = 0 ) ) {
left + = period ;
atomic64_set ( & hwc - > period_left , left ) ;
}
2008-12-03 10:39:53 +01:00
2008-12-13 09:00:03 +01:00
per_cpu ( prev_left [ idx ] , smp_processor_id ( ) ) = left ;
/*
* The hw counter starts counting from this counter offset ,
* mark it to be able to extra future deltas :
*/
2008-12-22 11:10:42 +01:00
atomic64_set ( & hwc - > prev_count , ( u64 ) - left ) ;
2008-12-13 09:00:03 +01:00
2008-12-22 11:10:42 +01:00
err = checking_wrmsrl ( hwc - > counter_base + idx ,
( u64 ) ( - left ) & counter_value_mask ) ;
}
static inline void
__pmc_fixed_enable ( struct perf_counter * counter ,
struct hw_perf_counter * hwc , unsigned int __idx )
{
int idx = __idx - X86_PMC_IDX_FIXED ;
u64 ctrl_val , bits , mask ;
int err ;
/*
* Enable IRQ generation ( 0x8 ) and ring - 3 counting ( 0x2 ) ,
* and enable ring - 0 counting if allowed :
*/
bits = 0x8ULL | 0x2ULL ;
if ( hwc - > config & ARCH_PERFMON_EVENTSEL_OS )
bits | = 0x1 ;
bits < < = ( idx * 4 ) ;
mask = 0xfULL < < ( idx * 4 ) ;
rdmsrl ( hwc - > config_base , ctrl_val ) ;
ctrl_val & = ~ mask ;
ctrl_val | = bits ;
err = checking_wrmsrl ( hwc - > config_base , ctrl_val ) ;
2008-12-09 11:40:46 +01:00
}
2008-12-13 09:00:03 +01:00
static void
2008-12-17 09:09:13 +01:00
__pmc_generic_enable ( struct perf_counter * counter ,
2008-12-13 09:00:03 +01:00
struct hw_perf_counter * hwc , int idx )
2008-12-09 11:40:46 +01:00
{
2008-12-22 11:10:42 +01:00
if ( unlikely ( hwc - > config_base = = MSR_ARCH_PERFMON_FIXED_CTR_CTRL ) )
2008-12-27 19:15:43 +05:30
__pmc_fixed_enable ( counter , hwc , idx ) ;
else
wrmsr ( hwc - > config_base + idx ,
hwc - > config | ARCH_PERFMON_EVENTSEL0_ENABLE , 0 ) ;
2008-12-03 10:39:53 +01:00
}
2008-12-22 11:10:42 +01:00
static int
fixed_mode_idx ( struct perf_counter * counter , struct hw_perf_counter * hwc )
2008-12-17 13:09:20 +01:00
{
2008-12-22 11:10:42 +01:00
unsigned int event ;
if ( unlikely ( hwc - > nmi ) )
return - 1 ;
event = hwc - > config & ARCH_PERFMON_EVENT_MASK ;
if ( unlikely ( event = = intel_perfmon_event_map [ PERF_COUNT_INSTRUCTIONS ] ) )
return X86_PMC_IDX_FIXED_INSTRUCTIONS ;
if ( unlikely ( event = = intel_perfmon_event_map [ PERF_COUNT_CPU_CYCLES ] ) )
return X86_PMC_IDX_FIXED_CPU_CYCLES ;
if ( unlikely ( event = = intel_perfmon_event_map [ PERF_COUNT_BUS_CYCLES ] ) )
return X86_PMC_IDX_FIXED_BUS_CYCLES ;
2008-12-17 13:09:20 +01:00
return - 1 ;
}
2008-12-13 09:00:03 +01:00
/*
* Find a PMC slot for the freshly enabled / scheduled in counter :
*/
2008-12-21 13:50:42 +01:00
static int pmc_generic_enable ( struct perf_counter * counter )
2008-12-03 10:39:53 +01:00
{
struct cpu_hw_counters * cpuc = & __get_cpu_var ( cpu_hw_counters ) ;
struct hw_perf_counter * hwc = & counter - > hw ;
2008-12-22 11:10:42 +01:00
int idx ;
2008-12-03 10:39:53 +01:00
2008-12-22 11:10:42 +01:00
idx = fixed_mode_idx ( counter , hwc ) ;
if ( idx > = 0 ) {
/*
* Try to get the fixed counter , if that is already taken
* then try to get a generic counter :
*/
if ( test_and_set_bit ( idx , cpuc - > used ) )
goto try_generic ;
2008-12-23 12:28:12 +01:00
2008-12-22 11:10:42 +01:00
hwc - > config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL ;
/*
* We set it so that counter_base + idx in wrmsr / rdmsr maps to
* MSR_ARCH_PERFMON_FIXED_CTR0 . . . CTR2 :
*/
hwc - > counter_base =
MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED ;
2008-12-03 10:39:53 +01:00
hwc - > idx = idx ;
2008-12-22 11:10:42 +01:00
} else {
idx = hwc - > idx ;
/* Try to get the previous generic counter again */
if ( test_and_set_bit ( idx , cpuc - > used ) ) {
try_generic :
idx = find_first_zero_bit ( cpuc - > used , nr_counters_generic ) ;
if ( idx = = nr_counters_generic )
return - EAGAIN ;
set_bit ( idx , cpuc - > used ) ;
hwc - > idx = idx ;
}
hwc - > config_base = MSR_ARCH_PERFMON_EVENTSEL0 ;
hwc - > counter_base = MSR_ARCH_PERFMON_PERFCTR0 ;
2008-12-03 10:39:53 +01:00
}
perf_counters_lapic_init ( hwc - > nmi ) ;
2008-12-17 09:09:13 +01:00
__pmc_generic_disable ( counter , hwc , idx ) ;
2008-12-03 10:39:53 +01:00
2008-12-17 13:09:20 +01:00
cpuc - > counters [ idx ] = counter ;
2008-12-22 11:10:42 +01:00
/*
* Make it visible before enabling the hw :
*/
smp_wmb ( ) ;
2008-12-09 11:40:46 +01:00
2008-12-13 09:00:03 +01:00
__hw_perf_counter_set_period ( counter , hwc , idx ) ;
2008-12-17 09:09:13 +01:00
__pmc_generic_enable ( counter , hwc , idx ) ;
2008-12-21 13:50:42 +01:00
return 0 ;
2008-12-03 10:39:53 +01:00
}
void perf_counter_print_debug ( void )
{
2008-12-22 11:10:42 +01:00
u64 ctrl , status , overflow , pmc_ctrl , pmc_count , prev_left , fixed ;
2008-12-23 12:28:12 +01:00
struct cpu_hw_counters * cpuc ;
2008-12-09 12:18:18 +01:00
int cpu , idx ;
2008-12-17 13:09:20 +01:00
if ( ! nr_counters_generic )
2008-12-09 12:18:18 +01:00
return ;
2008-12-03 10:39:53 +01:00
local_irq_disable ( ) ;
cpu = smp_processor_id ( ) ;
2008-12-23 12:28:12 +01:00
cpuc = & per_cpu ( cpu_hw_counters , cpu ) ;
2008-12-03 10:39:53 +01:00
2008-12-09 12:18:18 +01:00
rdmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , ctrl ) ;
rdmsrl ( MSR_CORE_PERF_GLOBAL_STATUS , status ) ;
rdmsrl ( MSR_CORE_PERF_GLOBAL_OVF_CTRL , overflow ) ;
2008-12-22 11:10:42 +01:00
rdmsrl ( MSR_ARCH_PERFMON_FIXED_CTR_CTRL , fixed ) ;
2008-12-03 10:39:53 +01:00
printk ( KERN_INFO " \n " ) ;
printk ( KERN_INFO " CPU#%d: ctrl: %016llx \n " , cpu , ctrl ) ;
printk ( KERN_INFO " CPU#%d: status: %016llx \n " , cpu , status ) ;
printk ( KERN_INFO " CPU#%d: overflow: %016llx \n " , cpu , overflow ) ;
2008-12-22 11:10:42 +01:00
printk ( KERN_INFO " CPU#%d: fixed: %016llx \n " , cpu , fixed ) ;
2008-12-23 12:28:12 +01:00
printk ( KERN_INFO " CPU#%d: used: %016llx \n " , cpu , * ( u64 * ) cpuc - > used ) ;
2008-12-03 10:39:53 +01:00
2008-12-17 13:09:20 +01:00
for ( idx = 0 ; idx < nr_counters_generic ; idx + + ) {
2008-12-09 12:18:18 +01:00
rdmsrl ( MSR_ARCH_PERFMON_EVENTSEL0 + idx , pmc_ctrl ) ;
rdmsrl ( MSR_ARCH_PERFMON_PERFCTR0 + idx , pmc_count ) ;
2008-12-03 10:39:53 +01:00
2008-12-13 09:00:03 +01:00
prev_left = per_cpu ( prev_left [ idx ] , cpu ) ;
2008-12-03 10:39:53 +01:00
2008-12-22 11:10:42 +01:00
printk ( KERN_INFO " CPU#%d: gen-PMC%d ctrl: %016llx \n " ,
2008-12-03 10:39:53 +01:00
cpu , idx , pmc_ctrl ) ;
2008-12-22 11:10:42 +01:00
printk ( KERN_INFO " CPU#%d: gen-PMC%d count: %016llx \n " ,
2008-12-03 10:39:53 +01:00
cpu , idx , pmc_count ) ;
2008-12-22 11:10:42 +01:00
printk ( KERN_INFO " CPU#%d: gen-PMC%d left: %016llx \n " ,
2008-12-13 09:00:03 +01:00
cpu , idx , prev_left ) ;
2008-12-03 10:39:53 +01:00
}
2008-12-22 11:10:42 +01:00
for ( idx = 0 ; idx < nr_counters_fixed ; idx + + ) {
rdmsrl ( MSR_ARCH_PERFMON_FIXED_CTR0 + idx , pmc_count ) ;
printk ( KERN_INFO " CPU#%d: fixed-PMC%d count: %016llx \n " ,
cpu , idx , pmc_count ) ;
}
2008-12-03 10:39:53 +01:00
local_irq_enable ( ) ;
}
2008-12-17 09:09:13 +01:00
static void pmc_generic_disable ( struct perf_counter * counter )
2008-12-03 10:39:53 +01:00
{
struct cpu_hw_counters * cpuc = & __get_cpu_var ( cpu_hw_counters ) ;
struct hw_perf_counter * hwc = & counter - > hw ;
unsigned int idx = hwc - > idx ;
2008-12-17 09:09:13 +01:00
__pmc_generic_disable ( counter , hwc , idx ) ;
2008-12-03 10:39:53 +01:00
clear_bit ( idx , cpuc - > used ) ;
2008-12-17 13:09:20 +01:00
cpuc - > counters [ idx ] = NULL ;
2008-12-22 11:10:42 +01:00
/*
* Make sure the cleared pointer becomes visible before we
* ( potentially ) free the counter :
*/
smp_wmb ( ) ;
2008-12-03 10:39:53 +01:00
2008-12-13 09:00:03 +01:00
/*
* Drain the remaining delta count out of a counter
* that we are disabling :
*/
x86_perf_counter_update ( counter , hwc , idx ) ;
2008-12-03 10:39:53 +01:00
}
static void perf_store_irq_data ( struct perf_counter * counter , u64 data )
{
struct perf_data * irqdata = counter - > irqdata ;
if ( irqdata - > len > PERF_DATA_BUFLEN - sizeof ( u64 ) ) {
irqdata - > overrun + + ;
} else {
u64 * p = ( u64 * ) & irqdata - > data [ irqdata - > len ] ;
* p = data ;
irqdata - > len + = sizeof ( u64 ) ;
}
}
2008-12-09 11:40:46 +01:00
/*
2008-12-13 09:00:03 +01:00
* Save and restart an expired counter . Called by NMI contexts ,
* so it has to be careful about preempting normal counter ops :
2008-12-09 11:40:46 +01:00
*/
2008-12-03 10:39:53 +01:00
static void perf_save_and_restart ( struct perf_counter * counter )
{
struct hw_perf_counter * hwc = & counter - > hw ;
int idx = hwc - > idx ;
2008-12-13 09:00:03 +01:00
x86_perf_counter_update ( counter , hwc , idx ) ;
__hw_perf_counter_set_period ( counter , hwc , idx ) ;
2008-12-09 11:40:46 +01:00
2008-12-22 11:10:42 +01:00
if ( counter - > state = = PERF_COUNTER_STATE_ACTIVE )
2008-12-17 09:09:13 +01:00
__pmc_generic_enable ( counter , hwc , idx ) ;
2008-12-03 10:39:53 +01:00
}
static void
2008-12-11 08:38:42 +01:00
perf_handle_group ( struct perf_counter * sibling , u64 * status , u64 * overflown )
2008-12-03 10:39:53 +01:00
{
2008-12-11 08:38:42 +01:00
struct perf_counter * counter , * group_leader = sibling - > group_leader ;
2008-12-03 10:39:53 +01:00
2008-12-11 08:38:42 +01:00
/*
2008-12-13 09:00:03 +01:00
* Store sibling timestamps ( if any ) :
2008-12-11 08:38:42 +01:00
*/
list_for_each_entry ( counter , & group_leader - > sibling_list , list_entry ) {
2008-12-22 11:10:42 +01:00
2008-12-13 09:00:03 +01:00
x86_perf_counter_update ( counter , & counter - > hw , counter - > hw . idx ) ;
2008-12-11 08:38:42 +01:00
perf_store_irq_data ( sibling , counter - > hw_event . type ) ;
2008-12-13 09:00:03 +01:00
perf_store_irq_data ( sibling , atomic64_read ( & counter - > count ) ) ;
2008-12-03 10:39:53 +01:00
}
}
2009-01-23 14:36:16 +01:00
/*
* Maximum interrupt frequency of 100 KHz per CPU
*/
# define PERFMON_MAX_INTERRUPTS 100000 / HZ
2008-12-03 10:39:53 +01:00
/*
* This handler is triggered by the local APIC , so the APIC IRQ handling
* rules apply :
*/
static void __smp_perf_counter_interrupt ( struct pt_regs * regs , int nmi )
{
int bit , cpu = smp_processor_id ( ) ;
2009-01-23 14:36:16 +01:00
u64 ack , status ;
2009-01-23 10:13:01 +01:00
struct cpu_hw_counters * cpuc = & per_cpu ( cpu_hw_counters , cpu ) ;
2008-12-09 12:23:59 +01:00
2009-01-23 10:13:01 +01:00
rdmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , cpuc - > global_enable ) ;
2008-12-03 10:39:53 +01:00
/* Disable counters globally */
2008-12-17 13:09:20 +01:00
wrmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , 0 ) ;
2008-12-03 10:39:53 +01:00
ack_APIC_irq ( ) ;
2008-12-08 14:20:16 +01:00
rdmsrl ( MSR_CORE_PERF_GLOBAL_STATUS , status ) ;
if ( ! status )
goto out ;
2008-12-03 10:39:53 +01:00
again :
2009-02-09 07:38:50 +01:00
inc_irq_stat ( apic_perf_irqs ) ;
2008-12-03 10:39:53 +01:00
ack = status ;
2008-12-22 11:10:42 +01:00
for_each_bit ( bit , ( unsigned long * ) & status , X86_PMC_IDX_MAX ) {
2008-12-17 13:09:20 +01:00
struct perf_counter * counter = cpuc - > counters [ bit ] ;
2008-12-03 10:39:53 +01:00
clear_bit ( bit , ( unsigned long * ) & status ) ;
if ( ! counter )
continue ;
perf_save_and_restart ( counter ) ;
2008-12-10 12:33:23 +01:00
switch ( counter - > hw_event . record_type ) {
2008-12-03 10:39:53 +01:00
case PERF_RECORD_SIMPLE :
continue ;
case PERF_RECORD_IRQ :
perf_store_irq_data ( counter , instruction_pointer ( regs ) ) ;
break ;
case PERF_RECORD_GROUP :
perf_handle_group ( counter , & status , & ack ) ;
break ;
}
/*
* From NMI context we cannot call into the scheduler to
2008-12-17 09:09:13 +01:00
* do a task wakeup - but we mark these generic as
2008-12-03 10:39:53 +01:00
* wakeup_pending and initate a wakeup callback :
*/
if ( nmi ) {
counter - > wakeup_pending = 1 ;
set_tsk_thread_flag ( current , TIF_PERF_COUNTERS ) ;
} else {
wake_up ( & counter - > waitq ) ;
}
}
2008-12-17 13:09:20 +01:00
wrmsrl ( MSR_CORE_PERF_GLOBAL_OVF_CTRL , ack ) ;
2008-12-03 10:39:53 +01:00
/*
* Repeat if there is more work to be done :
*/
rdmsrl ( MSR_CORE_PERF_GLOBAL_STATUS , status ) ;
if ( status )
goto again ;
2008-12-08 14:20:16 +01:00
out :
2008-12-03 10:39:53 +01:00
/*
2009-01-23 10:13:01 +01:00
* Restore - do not reenable when global enable is off or throttled :
2008-12-03 10:39:53 +01:00
*/
2009-01-23 14:36:16 +01:00
if ( + + cpuc - > interrupts < PERFMON_MAX_INTERRUPTS )
2009-01-23 10:13:01 +01:00
wrmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , cpuc - > global_enable ) ;
}
void perf_counter_unthrottle ( void )
{
struct cpu_hw_counters * cpuc ;
2009-01-23 14:36:16 +01:00
u64 global_enable ;
2009-01-23 10:13:01 +01:00
if ( ! cpu_has ( & boot_cpu_data , X86_FEATURE_ARCH_PERFMON ) )
return ;
if ( unlikely ( ! perf_counters_initialized ) )
return ;
cpuc = & per_cpu ( cpu_hw_counters , smp_processor_id ( ) ) ;
2009-01-23 14:36:16 +01:00
if ( cpuc - > interrupts > = PERFMON_MAX_INTERRUPTS ) {
2009-01-23 10:13:01 +01:00
if ( printk_ratelimit ( ) )
2009-01-23 14:36:16 +01:00
printk ( KERN_WARNING " PERFMON: max interrupts exceeded! \n " ) ;
2009-01-23 10:13:01 +01:00
wrmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , cpuc - > global_enable ) ;
}
2009-01-23 14:36:16 +01:00
rdmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , global_enable ) ;
if ( unlikely ( cpuc - > global_enable & & ! global_enable ) )
wrmsrl ( MSR_CORE_PERF_GLOBAL_CTRL , cpuc - > global_enable ) ;
cpuc - > interrupts = 0 ;
2008-12-03 10:39:53 +01:00
}
void smp_perf_counter_interrupt ( struct pt_regs * regs )
{
irq_enter ( ) ;
apic_write ( APIC_LVTPC , LOCAL_PERF_VECTOR ) ;
__smp_perf_counter_interrupt ( regs , 0 ) ;
irq_exit ( ) ;
}
/*
* This handler is triggered by NMI contexts :
*/
void perf_counter_notify ( struct pt_regs * regs )
{
struct cpu_hw_counters * cpuc ;
unsigned long flags ;
int bit , cpu ;
local_irq_save ( flags ) ;
cpu = smp_processor_id ( ) ;
cpuc = & per_cpu ( cpu_hw_counters , cpu ) ;
2008-12-17 13:09:20 +01:00
for_each_bit ( bit , cpuc - > used , X86_PMC_IDX_MAX ) {
struct perf_counter * counter = cpuc - > counters [ bit ] ;
2008-12-03 10:39:53 +01:00
if ( ! counter )
continue ;
if ( counter - > wakeup_pending ) {
counter - > wakeup_pending = 0 ;
wake_up ( & counter - > waitq ) ;
}
}
local_irq_restore ( flags ) ;
}
2009-01-23 14:16:53 +01:00
void perf_counters_lapic_init ( int nmi )
2008-12-03 10:39:53 +01:00
{
u32 apic_val ;
if ( ! perf_counters_initialized )
return ;
/*
* Enable the performance counter vector in the APIC LVT :
*/
apic_val = apic_read ( APIC_LVTERR ) ;
apic_write ( APIC_LVTERR , apic_val | APIC_LVT_MASKED ) ;
if ( nmi )
apic_write ( APIC_LVTPC , APIC_DM_NMI ) ;
else
apic_write ( APIC_LVTPC , LOCAL_PERF_VECTOR ) ;
apic_write ( APIC_LVTERR , apic_val ) ;
}
static int __kprobes
perf_counter_nmi_handler ( struct notifier_block * self ,
unsigned long cmd , void * __args )
{
struct die_args * args = __args ;
struct pt_regs * regs ;
if ( likely ( cmd ! = DIE_NMI_IPI ) )
return NOTIFY_DONE ;
regs = args - > regs ;
apic_write ( APIC_LVTPC , APIC_DM_NMI ) ;
__smp_perf_counter_interrupt ( regs , 1 ) ;
return NOTIFY_STOP ;
}
static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
2009-02-04 17:11:34 +01:00
. notifier_call = perf_counter_nmi_handler ,
. next = NULL ,
. priority = 1
2008-12-03 10:39:53 +01:00
} ;
void __init init_hw_perf_counters ( void )
{
union cpuid10_eax eax ;
unsigned int ebx ;
2008-12-17 10:51:15 +01:00
unsigned int unused ;
union cpuid10_edx edx ;
2008-12-03 10:39:53 +01:00
if ( ! cpu_has ( & boot_cpu_data , X86_FEATURE_ARCH_PERFMON ) )
return ;
/*
* Check whether the Architectural PerfMon supports
* Branch Misses Retired Event or not .
*/
2008-12-17 10:51:15 +01:00
cpuid ( 10 , & eax . full , & ebx , & unused , & edx . full ) ;
2008-12-03 10:39:53 +01:00
if ( eax . split . mask_length < = ARCH_PERFMON_BRANCH_MISSES_RETIRED )
return ;
printk ( KERN_INFO " Intel Performance Monitoring support detected. \n " ) ;
2008-12-17 10:51:15 +01:00
printk ( KERN_INFO " ... version: %d \n " , eax . split . version_id ) ;
printk ( KERN_INFO " ... num counters: %d \n " , eax . split . num_counters ) ;
2008-12-17 13:09:20 +01:00
nr_counters_generic = eax . split . num_counters ;
if ( nr_counters_generic > X86_PMC_MAX_GENERIC ) {
nr_counters_generic = X86_PMC_MAX_GENERIC ;
2008-12-03 10:39:53 +01:00
WARN ( 1 , KERN_ERR " hw perf counters %d > max(%d), clipping! " ,
2008-12-17 13:09:20 +01:00
nr_counters_generic , X86_PMC_MAX_GENERIC ) ;
2008-12-03 10:39:53 +01:00
}
2008-12-17 13:09:20 +01:00
perf_counter_mask = ( 1 < < nr_counters_generic ) - 1 ;
perf_max_counters = nr_counters_generic ;
2008-12-03 10:39:53 +01:00
2008-12-17 10:51:15 +01:00
printk ( KERN_INFO " ... bit width: %d \n " , eax . split . bit_width ) ;
2008-12-22 11:10:42 +01:00
counter_value_mask = ( 1ULL < < eax . split . bit_width ) - 1 ;
printk ( KERN_INFO " ... value mask: %016Lx \n " , counter_value_mask ) ;
2008-12-17 10:51:15 +01:00
printk ( KERN_INFO " ... mask length: %d \n " , eax . split . mask_length ) ;
2008-12-17 13:09:20 +01:00
nr_counters_fixed = edx . split . num_counters_fixed ;
if ( nr_counters_fixed > X86_PMC_MAX_FIXED ) {
nr_counters_fixed = X86_PMC_MAX_FIXED ;
2008-12-17 10:51:15 +01:00
WARN ( 1 , KERN_ERR " hw perf counters fixed %d > max(%d), clipping! " ,
2008-12-17 13:09:20 +01:00
nr_counters_fixed , X86_PMC_MAX_FIXED ) ;
2008-12-17 10:51:15 +01:00
}
2008-12-17 13:09:20 +01:00
printk ( KERN_INFO " ... fixed counters: %d \n " , nr_counters_fixed ) ;
perf_counter_mask | = ( ( 1LL < < nr_counters_fixed ) - 1 ) < < X86_PMC_IDX_FIXED ;
2008-12-03 10:39:53 +01:00
2008-12-17 13:09:20 +01:00
printk ( KERN_INFO " ... counter mask: %016Lx \n " , perf_counter_mask ) ;
2008-12-14 21:58:46 +01:00
perf_counters_initialized = true ;
2008-12-03 10:39:53 +01:00
perf_counters_lapic_init ( 0 ) ;
register_die_notifier ( & perf_counter_nmi_notifier ) ;
}
2008-12-11 12:46:46 +01:00
2008-12-17 09:09:13 +01:00
static void pmc_generic_read ( struct perf_counter * counter )
2008-12-13 09:00:03 +01:00
{
x86_perf_counter_update ( counter , & counter - > hw , counter - > hw . idx ) ;
}
2008-12-11 13:21:10 +01:00
static const struct hw_perf_counter_ops x86_perf_counter_ops = {
2008-12-17 14:20:28 +01:00
. enable = pmc_generic_enable ,
. disable = pmc_generic_disable ,
. read = pmc_generic_read ,
2008-12-11 12:46:46 +01:00
} ;
2008-12-11 13:21:10 +01:00
const struct hw_perf_counter_ops *
hw_perf_counter_init ( struct perf_counter * counter )
2008-12-11 12:46:46 +01:00
{
int err ;
err = __hw_perf_counter_init ( counter ) ;
if ( err )
return NULL ;
return & x86_perf_counter_ops ;
}