2011-09-21 11:30:18 +02:00
/*
* Performance events - AMD IBS
*
* Copyright ( C ) 2011 Advanced Micro Devices , Inc . , Robert Richter
*
* For licencing details see kernel - base / COPYING
*/
# include <linux/perf_event.h>
# include <linux/module.h>
# include <linux/pci.h>
2012-04-02 20:19:11 +02:00
# include <linux/ptrace.h>
2011-09-21 11:30:18 +02:00
# include <asm/apic.h>
static u32 ibs_caps ;
# if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
2011-12-15 17:56:37 +01:00
# include <linux/kprobes.h>
# include <linux/hardirq.h>
# include <asm/nmi.h>
2011-12-15 17:56:36 +01:00
# define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
# define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
2011-12-15 17:56:38 +01:00
enum ibs_states {
IBS_ENABLED = 0 ,
IBS_STARTED = 1 ,
IBS_STOPPING = 2 ,
IBS_MAX_STATES ,
} ;
struct cpu_perf_ibs {
struct perf_event * event ;
unsigned long state [ BITS_TO_LONGS ( IBS_MAX_STATES ) ] ;
} ;
2011-12-15 17:56:36 +01:00
struct perf_ibs {
struct pmu pmu ;
unsigned int msr ;
u64 config_mask ;
u64 cnt_mask ;
u64 enable_mask ;
2011-12-15 17:56:37 +01:00
u64 valid_mask ;
2011-12-15 17:56:39 +01:00
u64 max_period ;
2011-12-15 17:56:37 +01:00
unsigned long offset_mask [ 1 ] ;
int offset_max ;
2011-12-15 17:56:38 +01:00
struct cpu_perf_ibs __percpu * pcpu ;
2011-12-15 17:56:39 +01:00
u64 ( * get_count ) ( u64 config ) ;
2011-12-15 17:56:37 +01:00
} ;
struct perf_ibs_data {
u32 size ;
union {
u32 data [ 0 ] ; /* data buffer starts here */
u32 caps ;
} ;
u64 regs [ MSR_AMD64_IBS_REG_COUNT_MAX ] ;
2011-12-15 17:56:36 +01:00
} ;
2011-12-15 17:56:39 +01:00
static int
2012-04-02 20:19:13 +02:00
perf_event_set_period ( struct hw_perf_event * hwc , u64 min , u64 max , u64 * hw_period )
2011-12-15 17:56:39 +01:00
{
s64 left = local64_read ( & hwc - > period_left ) ;
s64 period = hwc - > sample_period ;
int overflow = 0 ;
/*
* If we are way outside a reasonable range then just skip forward :
*/
if ( unlikely ( left < = - period ) ) {
left = period ;
local64_set ( & hwc - > period_left , left ) ;
hwc - > last_period = period ;
overflow = 1 ;
}
2012-04-02 20:19:14 +02:00
if ( unlikely ( left < ( s64 ) min ) ) {
2011-12-15 17:56:39 +01:00
left + = period ;
local64_set ( & hwc - > period_left , left ) ;
hwc - > last_period = period ;
overflow = 1 ;
}
2012-04-02 20:19:15 +02:00
/*
* If the hw period that triggers the sw overflow is too short
* we might hit the irq handler . This biases the results .
* Thus we shorten the next - to - last period and set the last
* period to the max period .
*/
if ( left > max ) {
left - = max ;
if ( left > max )
left = max ;
else if ( left < min )
left = min ;
}
2011-12-15 17:56:39 +01:00
2012-04-02 20:19:13 +02:00
* hw_period = ( u64 ) left ;
2011-12-15 17:56:39 +01:00
return overflow ;
}
static int
perf_event_try_update ( struct perf_event * event , u64 new_raw_count , int width )
{
struct hw_perf_event * hwc = & event - > hw ;
int shift = 64 - width ;
u64 prev_raw_count ;
u64 delta ;
/*
* Careful : an NMI might modify the previous event value .
*
* Our tactic to handle this is to first atomically read and
* exchange a new raw count - then add that new - prev delta
* count to the generic event atomically :
*/
prev_raw_count = local64_read ( & hwc - > prev_count ) ;
if ( local64_cmpxchg ( & hwc - > prev_count , prev_raw_count ,
new_raw_count ) ! = prev_raw_count )
return 0 ;
/*
* Now we have the new raw value and have updated the prev
* timestamp already . We can now calculate the elapsed delta
* ( event - ) time and add that to the generic event .
*
* Careful , not all hw sign - extends above the physical width
* of the count .
*/
delta = ( new_raw_count < < shift ) - ( prev_raw_count < < shift ) ;
delta > > = shift ;
local64_add ( delta , & event - > count ) ;
local64_sub ( delta , & hwc - > period_left ) ;
return 1 ;
}
2011-12-15 17:56:36 +01:00
static struct perf_ibs perf_ibs_fetch ;
static struct perf_ibs perf_ibs_op ;
static struct perf_ibs * get_ibs_pmu ( int type )
{
if ( perf_ibs_fetch . pmu . type = = type )
return & perf_ibs_fetch ;
if ( perf_ibs_op . pmu . type = = type )
return & perf_ibs_op ;
return NULL ;
}
2011-09-21 11:30:18 +02:00
2012-03-12 12:54:32 +01:00
/*
* Use IBS for precise event sampling :
*
* perf record - a - e cpu - cycles : p . . . # use ibs op counting cycle count
* perf record - a - e r076 : p . . . # same as - e cpu - cycles : p
* perf record - a - e r0C1 : p . . . # use ibs op counting micro - ops
*
* IbsOpCntCtl ( bit 19 ) of IBS Execution Control Register ( IbsOpCtl ,
* MSRC001_1033 ) is used to select either cycle or micro - ops counting
* mode .
*
* The rip of IBS samples has skid 0. Thus , IBS supports precise
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set . In rare cases the
* rip is invalid when IBS was not able to record the rip correctly .
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then .
*
*/
static int perf_ibs_precise_event ( struct perf_event * event , u64 * config )
{
switch ( event - > attr . precise_ip ) {
case 0 :
return - ENOENT ;
case 1 :
case 2 :
break ;
default :
return - EOPNOTSUPP ;
}
switch ( event - > attr . type ) {
case PERF_TYPE_HARDWARE :
switch ( event - > attr . config ) {
case PERF_COUNT_HW_CPU_CYCLES :
* config = 0 ;
return 0 ;
}
break ;
case PERF_TYPE_RAW :
switch ( event - > attr . config ) {
case 0x0076 :
* config = 0 ;
return 0 ;
case 0x00C1 :
* config = IBS_OP_CNT_CTL ;
return 0 ;
}
break ;
default :
return - ENOENT ;
}
return - EOPNOTSUPP ;
}
2011-09-21 11:30:18 +02:00
static int perf_ibs_init ( struct perf_event * event )
{
2011-12-15 17:56:36 +01:00
struct hw_perf_event * hwc = & event - > hw ;
struct perf_ibs * perf_ibs ;
u64 max_cnt , config ;
2012-03-12 12:54:32 +01:00
int ret ;
2011-12-15 17:56:36 +01:00
perf_ibs = get_ibs_pmu ( event - > attr . type ) ;
2012-03-12 12:54:32 +01:00
if ( perf_ibs ) {
config = event - > attr . config ;
} else {
perf_ibs = & perf_ibs_op ;
ret = perf_ibs_precise_event ( event , & config ) ;
if ( ret )
return ret ;
}
if ( event - > pmu ! = & perf_ibs - > pmu )
2011-09-21 11:30:18 +02:00
return - ENOENT ;
2011-12-15 17:56:36 +01:00
if ( config & ~ perf_ibs - > config_mask )
return - EINVAL ;
if ( hwc - > sample_period ) {
if ( config & perf_ibs - > cnt_mask )
/* raw max_cnt may not be set */
return - EINVAL ;
2012-04-02 20:19:10 +02:00
if ( ! event - > attr . sample_freq & & hwc - > sample_period & 0x0f )
/*
* lower 4 bits can not be set in ibs max cnt ,
* but allowing it in case we adjust the
* sample period to set a frequency .
*/
2011-12-15 17:56:36 +01:00
return - EINVAL ;
2012-04-02 20:19:10 +02:00
hwc - > sample_period & = ~ 0x0FULL ;
if ( ! hwc - > sample_period )
hwc - > sample_period = 0x10 ;
2011-12-15 17:56:36 +01:00
} else {
max_cnt = config & perf_ibs - > cnt_mask ;
2011-12-15 17:56:39 +01:00
config & = ~ perf_ibs - > cnt_mask ;
2011-12-15 17:56:36 +01:00
event - > attr . sample_period = max_cnt < < 4 ;
hwc - > sample_period = event - > attr . sample_period ;
}
2011-12-15 17:56:39 +01:00
if ( ! hwc - > sample_period )
2011-12-15 17:56:36 +01:00
return - EINVAL ;
2012-04-02 20:19:10 +02:00
/*
* If we modify hwc - > sample_period , we also need to update
* hwc - > last_period and hwc - > period_left .
*/
hwc - > last_period = hwc - > sample_period ;
local64_set ( & hwc - > period_left , hwc - > sample_period ) ;
2011-12-15 17:56:36 +01:00
hwc - > config_base = perf_ibs - > msr ;
hwc - > config = config ;
2011-09-21 11:30:18 +02:00
return 0 ;
}
2011-12-15 17:56:39 +01:00
static int perf_ibs_set_period ( struct perf_ibs * perf_ibs ,
struct hw_perf_event * hwc , u64 * period )
{
2012-04-02 20:19:13 +02:00
int overflow ;
2011-12-15 17:56:39 +01:00
/* ignore lower 4 bits in min count: */
2012-04-02 20:19:13 +02:00
overflow = perf_event_set_period ( hwc , 1 < < 4 , perf_ibs - > max_period , period ) ;
2011-12-15 17:56:39 +01:00
local64_set ( & hwc - > prev_count , 0 ) ;
2012-04-02 20:19:13 +02:00
return overflow ;
2011-12-15 17:56:39 +01:00
}
static u64 get_ibs_fetch_count ( u64 config )
{
return ( config & IBS_FETCH_CNT ) > > 12 ;
}
static u64 get_ibs_op_count ( u64 config )
{
2012-04-02 20:19:18 +02:00
u64 count = 0 ;
if ( config & IBS_OP_VAL )
count + = ( config & IBS_OP_MAX_CNT ) < < 4 ; /* cnt rolled over */
if ( ibs_caps & IBS_CAPS_RDWROPCNT )
count + = ( config & IBS_OP_CUR_CNT ) > > 32 ;
return count ;
2011-12-15 17:56:39 +01:00
}
static void
perf_ibs_event_update ( struct perf_ibs * perf_ibs , struct perf_event * event ,
2012-04-02 20:19:16 +02:00
u64 * config )
2011-12-15 17:56:39 +01:00
{
2012-04-02 20:19:16 +02:00
u64 count = perf_ibs - > get_count ( * config ) ;
2011-12-15 17:56:39 +01:00
2012-04-02 20:19:18 +02:00
/*
* Set width to 64 since we do not overflow on max width but
* instead on max count . In perf_ibs_set_period ( ) we clear
* prev count manually on overflow .
*/
while ( ! perf_event_try_update ( event , count , 64 ) ) {
2012-04-02 20:19:16 +02:00
rdmsrl ( event - > hw . config_base , * config ) ;
count = perf_ibs - > get_count ( * config ) ;
2011-12-15 17:56:39 +01:00
}
}
2012-04-02 20:19:16 +02:00
static inline void perf_ibs_enable_event ( struct perf_ibs * perf_ibs ,
struct hw_perf_event * hwc , u64 config )
2011-12-15 17:56:39 +01:00
{
2012-04-02 20:19:16 +02:00
wrmsrl ( hwc - > config_base , hwc - > config | config | perf_ibs - > enable_mask ) ;
}
/*
* Erratum # 420 Instruction - Based Sampling Engine May Generate
* Interrupt that Cannot Be Cleared :
*
* Must clear counter mask first , then clear the enable bit . See
* Revision Guide for AMD Family 10 h Processors , Publication # 41322.
*/
static inline void perf_ibs_disable_event ( struct perf_ibs * perf_ibs ,
struct hw_perf_event * hwc , u64 config )
{
config & = ~ perf_ibs - > cnt_mask ;
wrmsrl ( hwc - > config_base , config ) ;
config & = ~ perf_ibs - > enable_mask ;
wrmsrl ( hwc - > config_base , config ) ;
2011-12-15 17:56:39 +01:00
}
/*
* We cannot restore the ibs pmu state , so we always needs to update
* the event while stopping it and then reset the state when starting
* again . Thus , ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
* perf_ibs_start ( ) / perf_ibs_stop ( ) and instead always do it .
*/
2011-12-15 17:56:38 +01:00
static void perf_ibs_start ( struct perf_event * event , int flags )
{
struct hw_perf_event * hwc = & event - > hw ;
struct perf_ibs * perf_ibs = container_of ( event - > pmu , struct perf_ibs , pmu ) ;
struct cpu_perf_ibs * pcpu = this_cpu_ptr ( perf_ibs - > pcpu ) ;
2012-04-02 20:19:16 +02:00
u64 period ;
2011-12-15 17:56:38 +01:00
2011-12-15 17:56:39 +01:00
if ( WARN_ON_ONCE ( ! ( hwc - > state & PERF_HES_STOPPED ) ) )
2011-12-15 17:56:38 +01:00
return ;
2011-12-15 17:56:39 +01:00
WARN_ON_ONCE ( ! ( hwc - > state & PERF_HES_UPTODATE ) ) ;
hwc - > state = 0 ;
2012-04-02 20:19:16 +02:00
perf_ibs_set_period ( perf_ibs , hwc , & period ) ;
2011-12-15 17:56:39 +01:00
set_bit ( IBS_STARTED , pcpu - > state ) ;
2012-04-02 20:19:16 +02:00
perf_ibs_enable_event ( perf_ibs , hwc , period > > 4 ) ;
2011-12-15 17:56:39 +01:00
perf_event_update_userpage ( event ) ;
2011-12-15 17:56:38 +01:00
}
static void perf_ibs_stop ( struct perf_event * event , int flags )
{
struct hw_perf_event * hwc = & event - > hw ;
struct perf_ibs * perf_ibs = container_of ( event - > pmu , struct perf_ibs , pmu ) ;
struct cpu_perf_ibs * pcpu = this_cpu_ptr ( perf_ibs - > pcpu ) ;
2012-04-02 20:19:16 +02:00
u64 config ;
2011-12-15 17:56:39 +01:00
int stopping ;
2011-12-15 17:56:38 +01:00
2011-12-15 17:56:39 +01:00
stopping = test_and_clear_bit ( IBS_STARTED , pcpu - > state ) ;
2011-12-15 17:56:38 +01:00
2011-12-15 17:56:39 +01:00
if ( ! stopping & & ( hwc - > state & PERF_HES_UPTODATE ) )
return ;
2011-12-15 17:56:38 +01:00
2012-04-02 20:19:16 +02:00
rdmsrl ( hwc - > config_base , config ) ;
2011-12-15 17:56:39 +01:00
if ( stopping ) {
set_bit ( IBS_STOPPING , pcpu - > state ) ;
2012-04-02 20:19:16 +02:00
perf_ibs_disable_event ( perf_ibs , hwc , config ) ;
2011-12-15 17:56:39 +01:00
WARN_ON_ONCE ( hwc - > state & PERF_HES_STOPPED ) ;
hwc - > state | = PERF_HES_STOPPED ;
}
if ( hwc - > state & PERF_HES_UPTODATE )
return ;
2012-04-02 20:19:18 +02:00
/*
* Clear valid bit to not count rollovers on update , rollovers
* are only updated in the irq handler .
*/
config & = ~ perf_ibs - > valid_mask ;
2012-04-02 20:19:16 +02:00
perf_ibs_event_update ( perf_ibs , event , & config ) ;
2011-12-15 17:56:39 +01:00
hwc - > state | = PERF_HES_UPTODATE ;
2011-12-15 17:56:38 +01:00
}
2011-09-21 11:30:18 +02:00
static int perf_ibs_add ( struct perf_event * event , int flags )
{
2011-12-15 17:56:38 +01:00
struct perf_ibs * perf_ibs = container_of ( event - > pmu , struct perf_ibs , pmu ) ;
struct cpu_perf_ibs * pcpu = this_cpu_ptr ( perf_ibs - > pcpu ) ;
if ( test_and_set_bit ( IBS_ENABLED , pcpu - > state ) )
return - ENOSPC ;
2011-12-15 17:56:39 +01:00
event - > hw . state = PERF_HES_UPTODATE | PERF_HES_STOPPED ;
2011-12-15 17:56:38 +01:00
pcpu - > event = event ;
if ( flags & PERF_EF_START )
perf_ibs_start ( event , PERF_EF_RELOAD ) ;
2011-09-21 11:30:18 +02:00
return 0 ;
}
static void perf_ibs_del ( struct perf_event * event , int flags )
{
2011-12-15 17:56:38 +01:00
struct perf_ibs * perf_ibs = container_of ( event - > pmu , struct perf_ibs , pmu ) ;
struct cpu_perf_ibs * pcpu = this_cpu_ptr ( perf_ibs - > pcpu ) ;
if ( ! test_and_clear_bit ( IBS_ENABLED , pcpu - > state ) )
return ;
2011-12-15 17:56:39 +01:00
perf_ibs_stop ( event , PERF_EF_UPDATE ) ;
2011-12-15 17:56:38 +01:00
pcpu - > event = NULL ;
2011-12-15 17:56:39 +01:00
perf_event_update_userpage ( event ) ;
2011-09-21 11:30:18 +02:00
}
2011-12-15 17:56:38 +01:00
static void perf_ibs_read ( struct perf_event * event ) { }
2011-12-15 17:56:36 +01:00
static struct perf_ibs perf_ibs_fetch = {
. pmu = {
. task_ctx_nr = perf_invalid_context ,
. event_init = perf_ibs_init ,
. add = perf_ibs_add ,
. del = perf_ibs_del ,
2011-12-15 17:56:38 +01:00
. start = perf_ibs_start ,
. stop = perf_ibs_stop ,
. read = perf_ibs_read ,
2011-12-15 17:56:36 +01:00
} ,
. msr = MSR_AMD64_IBSFETCHCTL ,
. config_mask = IBS_FETCH_CONFIG_MASK ,
. cnt_mask = IBS_FETCH_MAX_CNT ,
. enable_mask = IBS_FETCH_ENABLE ,
2011-12-15 17:56:37 +01:00
. valid_mask = IBS_FETCH_VAL ,
2011-12-15 17:56:39 +01:00
. max_period = IBS_FETCH_MAX_CNT < < 4 ,
2011-12-15 17:56:37 +01:00
. offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK } ,
. offset_max = MSR_AMD64_IBSFETCH_REG_COUNT ,
2011-12-15 17:56:39 +01:00
. get_count = get_ibs_fetch_count ,
2011-12-15 17:56:36 +01:00
} ;
static struct perf_ibs perf_ibs_op = {
. pmu = {
. task_ctx_nr = perf_invalid_context ,
. event_init = perf_ibs_init ,
. add = perf_ibs_add ,
. del = perf_ibs_del ,
2011-12-15 17:56:38 +01:00
. start = perf_ibs_start ,
. stop = perf_ibs_stop ,
. read = perf_ibs_read ,
2011-12-15 17:56:36 +01:00
} ,
. msr = MSR_AMD64_IBSOPCTL ,
. config_mask = IBS_OP_CONFIG_MASK ,
. cnt_mask = IBS_OP_MAX_CNT ,
. enable_mask = IBS_OP_ENABLE ,
2011-12-15 17:56:37 +01:00
. valid_mask = IBS_OP_VAL ,
2011-12-15 17:56:39 +01:00
. max_period = IBS_OP_MAX_CNT < < 4 ,
2011-12-15 17:56:37 +01:00
. offset_mask = { MSR_AMD64_IBSOP_REG_MASK } ,
. offset_max = MSR_AMD64_IBSOP_REG_COUNT ,
2011-12-15 17:56:39 +01:00
. get_count = get_ibs_op_count ,
2011-09-21 11:30:18 +02:00
} ;
2011-12-15 17:56:37 +01:00
static int perf_ibs_handle_irq ( struct perf_ibs * perf_ibs , struct pt_regs * iregs )
{
2011-12-15 17:56:38 +01:00
struct cpu_perf_ibs * pcpu = this_cpu_ptr ( perf_ibs - > pcpu ) ;
struct perf_event * event = pcpu - > event ;
2011-12-15 17:56:37 +01:00
struct hw_perf_event * hwc = & event - > hw ;
struct perf_sample_data data ;
struct perf_raw_record raw ;
struct pt_regs regs ;
struct perf_ibs_data ibs_data ;
2012-04-02 20:19:11 +02:00
int offset , size , check_rip , offset_max , throttle = 0 ;
2011-12-15 17:56:37 +01:00
unsigned int msr ;
2012-04-02 20:19:16 +02:00
u64 * buf , * config , period ;
2011-12-15 17:56:37 +01:00
2011-12-15 17:56:38 +01:00
if ( ! test_bit ( IBS_STARTED , pcpu - > state ) ) {
2012-04-02 20:19:17 +02:00
/*
* Catch spurious interrupts after stopping IBS : After
* disabling IBS there could be still incomming NMIs
* with samples that even have the valid bit cleared .
* Mark all this NMIs as handled .
*/
return test_and_clear_bit ( IBS_STOPPING , pcpu - > state ) ? 1 : 0 ;
2011-12-15 17:56:38 +01:00
}
2011-12-15 17:56:37 +01:00
msr = hwc - > config_base ;
buf = ibs_data . regs ;
rdmsrl ( msr , * buf ) ;
if ( ! ( * buf + + & perf_ibs - > valid_mask ) )
return 0 ;
2012-04-02 20:19:16 +02:00
config = & ibs_data . regs [ 0 ] ;
2012-04-02 20:19:07 +02:00
perf_ibs_event_update ( perf_ibs , event , config ) ;
2012-04-02 20:19:08 +02:00
perf_sample_data_init ( & data , 0 , hwc - > last_period ) ;
2012-04-02 20:19:16 +02:00
if ( ! perf_ibs_set_period ( perf_ibs , hwc , & period ) )
2012-04-02 20:19:11 +02:00
goto out ; /* no sw counter overflow */
ibs_data . caps = ibs_caps ;
size = 1 ;
offset = 1 ;
check_rip = ( perf_ibs = = & perf_ibs_op & & ( ibs_caps & IBS_CAPS_RIPINVALIDCHK ) ) ;
if ( event - > attr . sample_type & PERF_SAMPLE_RAW )
offset_max = perf_ibs - > offset_max ;
else if ( check_rip )
offset_max = 2 ;
else
offset_max = 1 ;
do {
rdmsrl ( msr + offset , * buf + + ) ;
size + + ;
offset = find_next_bit ( perf_ibs - > offset_mask ,
perf_ibs - > offset_max ,
offset + 1 ) ;
} while ( offset < offset_max ) ;
ibs_data . size = sizeof ( u64 ) * size ;
regs = * iregs ;
2012-03-12 12:54:32 +01:00
if ( check_rip & & ( ibs_data . regs [ 2 ] & IBS_RIP_INVALID ) ) {
regs . flags & = ~ PERF_EFLAGS_EXACT ;
} else {
2012-04-02 20:19:11 +02:00
instruction_pointer_set ( & regs , ibs_data . regs [ 1 ] ) ;
2012-03-12 12:54:32 +01:00
regs . flags | = PERF_EFLAGS_EXACT ;
}
2012-04-02 20:19:07 +02:00
2011-12-15 17:56:37 +01:00
if ( event - > attr . sample_type & PERF_SAMPLE_RAW ) {
2012-04-02 20:19:11 +02:00
raw . size = sizeof ( u32 ) + ibs_data . size ;
2011-12-15 17:56:37 +01:00
raw . data = ibs_data . data ;
data . raw = & raw ;
}
2012-04-02 20:19:11 +02:00
throttle = perf_event_overflow ( event , & data , & regs ) ;
out :
2012-04-02 20:19:16 +02:00
if ( throttle )
perf_ibs_disable_event ( perf_ibs , hwc , * config ) ;
else
perf_ibs_enable_event ( perf_ibs , hwc , period > > 4 ) ;
2011-12-15 17:56:39 +01:00
perf_event_update_userpage ( event ) ;
2011-12-15 17:56:37 +01:00
return 1 ;
}
static int __kprobes
perf_ibs_nmi_handler ( unsigned int cmd , struct pt_regs * regs )
{
int handled = 0 ;
handled + = perf_ibs_handle_irq ( & perf_ibs_fetch , regs ) ;
handled + = perf_ibs_handle_irq ( & perf_ibs_op , regs ) ;
if ( handled )
inc_irq_stat ( apic_perf_irqs ) ;
return handled ;
}
2011-12-15 17:56:38 +01:00
static __init int perf_ibs_pmu_init ( struct perf_ibs * perf_ibs , char * name )
{
struct cpu_perf_ibs __percpu * pcpu ;
int ret ;
pcpu = alloc_percpu ( struct cpu_perf_ibs ) ;
if ( ! pcpu )
return - ENOMEM ;
perf_ibs - > pcpu = pcpu ;
ret = perf_pmu_register ( & perf_ibs - > pmu , name , - 1 ) ;
if ( ret ) {
perf_ibs - > pcpu = NULL ;
free_percpu ( pcpu ) ;
}
return ret ;
}
2011-09-21 11:30:18 +02:00
static __init int perf_event_ibs_init ( void )
{
if ( ! ibs_caps )
return - ENODEV ; /* ibs not supported by the cpu */
2011-12-15 17:56:38 +01:00
perf_ibs_pmu_init ( & perf_ibs_fetch , " ibs_fetch " ) ;
2012-04-02 20:19:09 +02:00
if ( ibs_caps & IBS_CAPS_OPCNT )
perf_ibs_op . config_mask | = IBS_OP_CNT_CTL ;
2011-12-15 17:56:38 +01:00
perf_ibs_pmu_init ( & perf_ibs_op , " ibs_op " ) ;
2012-04-25 12:55:22 +02:00
register_nmi_handler ( NMI_LOCAL , perf_ibs_nmi_handler , 0 , " perf_ibs " ) ;
2011-09-21 11:30:18 +02:00
printk ( KERN_INFO " perf: AMD IBS detected (0x%08x) \n " , ibs_caps ) ;
return 0 ;
}
# else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
static __init int perf_event_ibs_init ( void ) { return 0 ; }
# endif
/* IBS - apic initialization, for perf and oprofile */
static __init u32 __get_ibs_caps ( void )
{
u32 caps ;
unsigned int max_level ;
if ( ! boot_cpu_has ( X86_FEATURE_IBS ) )
return 0 ;
/* check IBS cpuid feature flags */
max_level = cpuid_eax ( 0x80000000 ) ;
if ( max_level < IBS_CPUID_FEATURES )
return IBS_CAPS_DEFAULT ;
caps = cpuid_eax ( IBS_CPUID_FEATURES ) ;
if ( ! ( caps & IBS_CAPS_AVAIL ) )
/* cpuid flags not valid */
return IBS_CAPS_DEFAULT ;
return caps ;
}
u32 get_ibs_caps ( void )
{
return ibs_caps ;
}
EXPORT_SYMBOL ( get_ibs_caps ) ;
static inline int get_eilvt ( int offset )
{
return ! setup_APIC_eilvt ( offset , 0 , APIC_EILVT_MSG_NMI , 1 ) ;
}
static inline int put_eilvt ( int offset )
{
return ! setup_APIC_eilvt ( offset , 0 , 0 , 1 ) ;
}
/*
* Check and reserve APIC extended interrupt LVT offset for IBS if available .
*/
static inline int ibs_eilvt_valid ( void )
{
int offset ;
u64 val ;
int valid = 0 ;
preempt_disable ( ) ;
rdmsrl ( MSR_AMD64_IBSCTL , val ) ;
offset = val & IBSCTL_LVT_OFFSET_MASK ;
if ( ! ( val & IBSCTL_LVT_OFFSET_VALID ) ) {
pr_err ( FW_BUG " cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx) \n " ,
smp_processor_id ( ) , offset , MSR_AMD64_IBSCTL , val ) ;
goto out ;
}
if ( ! get_eilvt ( offset ) ) {
pr_err ( FW_BUG " cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx) \n " ,
smp_processor_id ( ) , offset , MSR_AMD64_IBSCTL , val ) ;
goto out ;
}
valid = 1 ;
out :
preempt_enable ( ) ;
return valid ;
}
static int setup_ibs_ctl ( int ibs_eilvt_off )
{
struct pci_dev * cpu_cfg ;
int nodes ;
u32 value = 0 ;
nodes = 0 ;
cpu_cfg = NULL ;
do {
cpu_cfg = pci_get_device ( PCI_VENDOR_ID_AMD ,
PCI_DEVICE_ID_AMD_10H_NB_MISC ,
cpu_cfg ) ;
if ( ! cpu_cfg )
break ;
+ + nodes ;
pci_write_config_dword ( cpu_cfg , IBSCTL , ibs_eilvt_off
| IBSCTL_LVT_OFFSET_VALID ) ;
pci_read_config_dword ( cpu_cfg , IBSCTL , & value ) ;
if ( value ! = ( ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID ) ) {
pci_dev_put ( cpu_cfg ) ;
printk ( KERN_DEBUG " Failed to setup IBS LVT offset, "
" IBSCTL = 0x%08x \n " , value ) ;
return - EINVAL ;
}
} while ( 1 ) ;
if ( ! nodes ) {
printk ( KERN_DEBUG " No CPU node configured for IBS \n " ) ;
return - ENODEV ;
}
return 0 ;
}
/*
* This runs only on the current cpu . We try to find an LVT offset and
* setup the local APIC . For this we must disable preemption . On
* success we initialize all nodes with this offset . This updates then
* the offset in the IBS_CTL per - node msr . The per - core APIC setup of
* the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
* is using the new offset .
*/
static int force_ibs_eilvt_setup ( void )
{
int offset ;
int ret ;
preempt_disable ( ) ;
/* find the next free available EILVT entry, skip offset 0 */
for ( offset = 1 ; offset < APIC_EILVT_NR_MAX ; offset + + ) {
if ( get_eilvt ( offset ) )
break ;
}
preempt_enable ( ) ;
if ( offset = = APIC_EILVT_NR_MAX ) {
printk ( KERN_DEBUG " No EILVT entry available \n " ) ;
return - EBUSY ;
}
ret = setup_ibs_ctl ( offset ) ;
if ( ret )
goto out ;
if ( ! ibs_eilvt_valid ( ) ) {
ret = - EFAULT ;
goto out ;
}
2011-11-08 19:20:44 +01:00
pr_info ( " IBS: LVT offset %d assigned \n " , offset ) ;
2011-09-21 11:30:18 +02:00
return 0 ;
out :
preempt_disable ( ) ;
put_eilvt ( offset ) ;
preempt_enable ( ) ;
return ret ;
}
static inline int get_ibs_lvt_offset ( void )
{
u64 val ;
rdmsrl ( MSR_AMD64_IBSCTL , val ) ;
if ( ! ( val & IBSCTL_LVT_OFFSET_VALID ) )
return - EINVAL ;
return val & IBSCTL_LVT_OFFSET_MASK ;
}
static void setup_APIC_ibs ( void * dummy )
{
int offset ;
offset = get_ibs_lvt_offset ( ) ;
if ( offset < 0 )
goto failed ;
if ( ! setup_APIC_eilvt ( offset , 0 , APIC_EILVT_MSG_NMI , 0 ) )
return ;
failed :
pr_warn ( " perf: IBS APIC setup failed on cpu #%d \n " ,
smp_processor_id ( ) ) ;
}
static void clear_APIC_ibs ( void * dummy )
{
int offset ;
offset = get_ibs_lvt_offset ( ) ;
if ( offset > = 0 )
setup_APIC_eilvt ( offset , 0 , APIC_EILVT_MSG_FIX , 1 ) ;
}
static int __cpuinit
perf_ibs_cpu_notifier ( struct notifier_block * self , unsigned long action , void * hcpu )
{
switch ( action & ~ CPU_TASKS_FROZEN ) {
case CPU_STARTING :
setup_APIC_ibs ( NULL ) ;
break ;
case CPU_DYING :
clear_APIC_ibs ( NULL ) ;
break ;
default :
break ;
}
return NOTIFY_OK ;
}
static __init int amd_ibs_init ( void )
{
u32 caps ;
2011-11-08 19:20:44 +01:00
int ret = - EINVAL ;
2011-09-21 11:30:18 +02:00
caps = __get_ibs_caps ( ) ;
if ( ! caps )
return - ENODEV ; /* ibs not supported by the cpu */
2011-11-08 19:20:44 +01:00
/*
* Force LVT offset assignment for family 10 h : The offsets are
* not assigned by the BIOS for this family , so the OS is
* responsible for doing it . If the OS assignment fails , fall
* back to BIOS settings and try to setup this .
*/
if ( boot_cpu_data . x86 = = 0x10 )
force_ibs_eilvt_setup ( ) ;
if ( ! ibs_eilvt_valid ( ) )
goto out ;
2011-09-21 11:30:18 +02:00
get_online_cpus ( ) ;
ibs_caps = caps ;
/* make ibs_caps visible to other cpus: */
smp_mb ( ) ;
perf_cpu_notifier ( perf_ibs_cpu_notifier ) ;
smp_call_function ( setup_APIC_ibs , NULL , 1 ) ;
put_online_cpus ( ) ;
2011-11-08 19:20:44 +01:00
ret = perf_event_ibs_init ( ) ;
out :
if ( ret )
pr_err ( " Failed to setup IBS, %d \n " , ret ) ;
return ret ;
2011-09-21 11:30:18 +02:00
}
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
device_initcall ( amd_ibs_init ) ;