2010-03-11 19:54:39 +03:00
/*
2011-03-17 16:24:16 -03:00
* Netburst Performance Events ( P4 , old Xeon )
2010-03-11 19:54:39 +03:00
*
* Copyright ( C ) 2010 Parallels , Inc . , Cyrill Gorcunov < gorcunov @ openvz . org >
* Copyright ( C ) 2010 Intel Corporation , Lin Ming < ming . m . lin @ intel . com >
*
* For licencing details see kernel - base / COPYING
*/
2011-08-30 20:41:05 -03:00
# include <linux/perf_event.h>
2010-03-11 19:54:39 +03:00
# include <asm/perf_event_p4.h>
2011-08-30 20:41:05 -03:00
# include <asm/hardirq.h>
# include <asm/apic.h>
2016-02-10 10:55:23 +01:00
# include "../perf_event.h"
2010-03-11 19:54:39 +03:00
2010-03-24 12:09:26 +08:00
# define P4_CNTR_LIMIT 3
2010-03-11 19:54:39 +03:00
/*
* array indices : 0 , 1 - HT threads , used with HT enabled cpu
*/
2010-03-24 12:09:26 +08:00
struct p4_event_bind {
unsigned int opcode ; /* Event code and ESCR selector */
unsigned int escr_msr [ 2 ] ; /* ESCR MSR for this event */
2010-08-25 22:23:34 +04:00
unsigned int escr_emask ; /* valid ESCR EventMask bits */
unsigned int shared ; /* event is shared across threads */
2022-10-20 12:49:45 +03:00
signed char cntr [ 2 ] [ P4_CNTR_LIMIT ] ; /* counter index (offset), -1 on absence */
2010-03-11 19:54:39 +03:00
} ;
2010-07-05 10:09:29 +08:00
struct p4_pebs_bind {
2010-03-24 12:09:26 +08:00
unsigned int metric_pebs ;
unsigned int metric_vert ;
2010-03-11 19:54:39 +03:00
} ;
2010-07-05 10:09:29 +08:00
/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
# define P4_GEN_PEBS_BIND(name, pebs, vert) \
[ P4_PEBS_METRIC__ # # name ] = { \
. metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG , \
. metric_vert = vert , \
2010-03-24 12:09:26 +08:00
}
2010-07-05 10:09:29 +08:00
/*
* note we have P4_PEBS_ENABLE_UOP_TAG always set here
*
* it ' s needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
* event configuration to find out which values are to be
* written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
2021-03-18 15:28:01 +01:00
* registers
2010-07-05 10:09:29 +08:00
*/
static struct p4_pebs_bind p4_pebs_bind_map [ ] = {
P4_GEN_PEBS_BIND ( 1 stl_cache_load_miss_retired , 0x0000001 , 0x0000001 ) ,
P4_GEN_PEBS_BIND ( 2 ndl_cache_load_miss_retired , 0x0000002 , 0x0000001 ) ,
P4_GEN_PEBS_BIND ( dtlb_load_miss_retired , 0x0000004 , 0x0000001 ) ,
P4_GEN_PEBS_BIND ( dtlb_store_miss_retired , 0x0000004 , 0x0000002 ) ,
P4_GEN_PEBS_BIND ( dtlb_all_miss_retired , 0x0000004 , 0x0000003 ) ,
P4_GEN_PEBS_BIND ( tagged_mispred_branch , 0x0018000 , 0x0000010 ) ,
P4_GEN_PEBS_BIND ( mob_load_replay_retired , 0x0000200 , 0x0000001 ) ,
P4_GEN_PEBS_BIND ( split_load_retired , 0x0000400 , 0x0000001 ) ,
P4_GEN_PEBS_BIND ( split_store_retired , 0x0000400 , 0x0000002 ) ,
2010-03-24 12:09:26 +08:00
} ;
/*
* Note that we don ' t use CCCR1 here , there is an
* exception for P4_BSQ_ALLOCATION but we just have
* no workaround
*
* consider this binding as resources which particular
* event may borrow , it doesn ' t contain EventMask ,
* Tags and friends - - they are left to a caller
*/
static struct p4_event_bind p4_event_bind_map [ ] = {
[ P4_EVENT_TC_DELIVER_MODE ] = {
. opcode = P4_OPCODE ( P4_EVENT_TC_DELIVER_MODE ) ,
. escr_msr = { MSR_P4_TC_ESCR0 , MSR_P4_TC_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , DD ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , DB ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , DI ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , BD ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , BB ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , BI ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_DELIVER_MODE , ID ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 4 , 5 , - 1 } , { 6 , 7 , - 1 } } ,
} ,
[ P4_EVENT_BPU_FETCH_REQUEST ] = {
. opcode = P4_OPCODE ( P4_EVENT_BPU_FETCH_REQUEST ) ,
. escr_msr = { MSR_P4_BPU_ESCR0 , MSR_P4_BPU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_BPU_FETCH_REQUEST , TCMISS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_ITLB_REFERENCE ] = {
. opcode = P4_OPCODE ( P4_EVENT_ITLB_REFERENCE ) ,
. escr_msr = { MSR_P4_ITLB_ESCR0 , MSR_P4_ITLB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_ITLB_REFERENCE , HIT ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_ITLB_REFERENCE , MISS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_ITLB_REFERENCE , HIT_UK ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_MEMORY_CANCEL ] = {
. opcode = P4_OPCODE ( P4_EVENT_MEMORY_CANCEL ) ,
. escr_msr = { MSR_P4_DAC_ESCR0 , MSR_P4_DAC_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_MEMORY_CANCEL , ST_RB_FULL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MEMORY_CANCEL , 64 K_CONF ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_MEMORY_COMPLETE ] = {
. opcode = P4_OPCODE ( P4_EVENT_MEMORY_COMPLETE ) ,
. escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_MEMORY_COMPLETE , LSC ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MEMORY_COMPLETE , SSC ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_LOAD_PORT_REPLAY ] = {
. opcode = P4_OPCODE ( P4_EVENT_LOAD_PORT_REPLAY ) ,
. escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_LOAD_PORT_REPLAY , SPLIT_LD ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_STORE_PORT_REPLAY ] = {
. opcode = P4_OPCODE ( P4_EVENT_STORE_PORT_REPLAY ) ,
. escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_STORE_PORT_REPLAY , SPLIT_ST ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_MOB_LOAD_REPLAY ] = {
. opcode = P4_OPCODE ( P4_EVENT_MOB_LOAD_REPLAY ) ,
. escr_msr = { MSR_P4_MOB_ESCR0 , MSR_P4_MOB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_MOB_LOAD_REPLAY , NO_STA ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MOB_LOAD_REPLAY , NO_STD ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MOB_LOAD_REPLAY , PARTIAL_DATA ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MOB_LOAD_REPLAY , UNALGN_ADDR ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_PAGE_WALK_TYPE ] = {
. opcode = P4_OPCODE ( P4_EVENT_PAGE_WALK_TYPE ) ,
. escr_msr = { MSR_P4_PMH_ESCR0 , MSR_P4_PMH_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_PAGE_WALK_TYPE , DTMISS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_PAGE_WALK_TYPE , ITMISS ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_BSQ_CACHE_REFERENCE ] = {
. opcode = P4_OPCODE ( P4_EVENT_BSQ_CACHE_REFERENCE ) ,
. escr_msr = { MSR_P4_BSU_ESCR0 , MSR_P4_BSU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_HITS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_HITE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_HITM ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_HITS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_HITE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_HITM ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_MISS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_MISS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , WR_2ndL_MISS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_IOQ_ALLOCATION ] = {
. opcode = P4_OPCODE ( P4_EVENT_IOQ_ALLOCATION ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , DEFAULT ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , ALL_READ ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , ALL_WRITE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , MEM_UC ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , MEM_WC ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , MEM_WT ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , MEM_WP ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , MEM_WB ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , OWN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , OTHER ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ALLOCATION , PREFETCH ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_IOQ_ACTIVE_ENTRIES ] = { /* shared ESCR */
. opcode = P4_OPCODE ( P4_EVENT_IOQ_ACTIVE_ENTRIES ) ,
. escr_msr = { MSR_P4_FSB_ESCR1 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , DEFAULT ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , ALL_READ ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , ALL_WRITE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , MEM_UC ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , MEM_WC ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , MEM_WT ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , MEM_WP ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , MEM_WB ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , OWN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , OTHER ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_IOQ_ACTIVE_ENTRIES , PREFETCH ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 2 , - 1 , - 1 } , { 3 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_FSB_DATA_ACTIVITY ] = {
. opcode = P4_OPCODE ( P4_EVENT_FSB_DATA_ACTIVITY ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DRDY_DRV ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DRDY_OWN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DRDY_OTHER ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DBSY_DRV ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DBSY_OWN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DBSY_OTHER ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_BSQ_ALLOCATION ] = { /* shared ESCR, broken CCCR1 */
. opcode = P4_OPCODE ( P4_EVENT_BSQ_ALLOCATION ) ,
. escr_msr = { MSR_P4_BSU_ESCR0 , MSR_P4_BSU_ESCR0 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_TYPE0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_TYPE1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_LEN0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_LEN1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_IO_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_LOCK_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_CACHE_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_SPLIT_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_DEM_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , REQ_ORD_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , MEM_TYPE0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , MEM_TYPE1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ALLOCATION , MEM_TYPE2 ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 1 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_BSQ_ACTIVE_ENTRIES ] = { /* shared ESCR */
. opcode = P4_OPCODE ( P4_EVENT_BSQ_ACTIVE_ENTRIES ) ,
. escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_TYPE0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_TYPE1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_LEN0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_LEN1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_IO_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_LOCK_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_CACHE_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_SPLIT_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_DEM_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , REQ_ORD_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , MEM_TYPE0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , MEM_TYPE1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_ACTIVE_ENTRIES , MEM_TYPE2 ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 2 , - 1 , - 1 } , { 3 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_SSE_INPUT_ASSIST ] = {
. opcode = P4_OPCODE ( P4_EVENT_SSE_INPUT_ASSIST ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_SSE_INPUT_ASSIST , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_PACKED_SP_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_PACKED_SP_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_PACKED_SP_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_PACKED_DP_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_PACKED_DP_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_PACKED_DP_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_SCALAR_SP_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_SCALAR_SP_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_SCALAR_SP_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_SCALAR_DP_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_SCALAR_DP_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_SCALAR_DP_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_64BIT_MMX_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_64BIT_MMX_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_64BIT_MMX_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_128BIT_MMX_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_128BIT_MMX_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_128BIT_MMX_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_X87_FP_UOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_X87_FP_UOP ) ,
. escr_msr = { MSR_P4_FIRM_ESCR0 , MSR_P4_FIRM_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_X87_FP_UOP , ALL ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_TC_MISC ] = {
. opcode = P4_OPCODE ( P4_EVENT_TC_MISC ) ,
. escr_msr = { MSR_P4_TC_ESCR0 , MSR_P4_TC_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_MISC , FLUSH ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 4 , 5 , - 1 } , { 6 , 7 , - 1 } } ,
} ,
[ P4_EVENT_GLOBAL_POWER_EVENTS ] = {
. opcode = P4_OPCODE ( P4_EVENT_GLOBAL_POWER_EVENTS ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_GLOBAL_POWER_EVENTS , RUNNING ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_TC_MS_XFER ] = {
. opcode = P4_OPCODE ( P4_EVENT_TC_MS_XFER ) ,
. escr_msr = { MSR_P4_MS_ESCR0 , MSR_P4_MS_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_TC_MS_XFER , CISC ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 4 , 5 , - 1 } , { 6 , 7 , - 1 } } ,
} ,
[ P4_EVENT_UOP_QUEUE_WRITES ] = {
. opcode = P4_OPCODE ( P4_EVENT_UOP_QUEUE_WRITES ) ,
. escr_msr = { MSR_P4_MS_ESCR0 , MSR_P4_MS_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_UOP_QUEUE_WRITES , FROM_TC_BUILD ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_UOP_QUEUE_WRITES , FROM_TC_DELIVER ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_UOP_QUEUE_WRITES , FROM_ROM ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 4 , 5 , - 1 } , { 6 , 7 , - 1 } } ,
} ,
[ P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE ] = {
. opcode = P4_OPCODE ( P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE ) ,
. escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE , CONDITIONAL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE , CALL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE , RETURN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE , INDIRECT ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 4 , 5 , - 1 } , { 6 , 7 , - 1 } } ,
} ,
[ P4_EVENT_RETIRED_BRANCH_TYPE ] = {
. opcode = P4_OPCODE ( P4_EVENT_RETIRED_BRANCH_TYPE ) ,
. escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , CONDITIONAL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , CALL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , RETURN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , INDIRECT ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 4 , 5 , - 1 } , { 6 , 7 , - 1 } } ,
} ,
[ P4_EVENT_RESOURCE_STALL ] = {
. opcode = P4_OPCODE ( P4_EVENT_RESOURCE_STALL ) ,
. escr_msr = { MSR_P4_ALF_ESCR0 , MSR_P4_ALF_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_RESOURCE_STALL , SBFULL ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_WC_BUFFER ] = {
. opcode = P4_OPCODE ( P4_EVENT_WC_BUFFER ) ,
. escr_msr = { MSR_P4_DAC_ESCR0 , MSR_P4_DAC_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_WC_BUFFER , WCB_EVICTS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_WC_BUFFER , WCB_FULL_EVICTS ) ,
. shared = 1 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 8 , 9 , - 1 } , { 10 , 11 , - 1 } } ,
} ,
[ P4_EVENT_B2B_CYCLES ] = {
. opcode = P4_OPCODE ( P4_EVENT_B2B_CYCLES ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask = 0 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_BNR ] = {
. opcode = P4_OPCODE ( P4_EVENT_BNR ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask = 0 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_SNOOP ] = {
. opcode = P4_OPCODE ( P4_EVENT_SNOOP ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask = 0 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_RESPONSE ] = {
. opcode = P4_OPCODE ( P4_EVENT_RESPONSE ) ,
. escr_msr = { MSR_P4_FSB_ESCR0 , MSR_P4_FSB_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask = 0 ,
2010-03-24 12:09:26 +08:00
. cntr = { { 0 , - 1 , - 1 } , { 2 , - 1 , - 1 } } ,
} ,
[ P4_EVENT_FRONT_END_EVENT ] = {
. opcode = P4_OPCODE ( P4_EVENT_FRONT_END_EVENT ) ,
. escr_msr = { MSR_P4_CRU_ESCR2 , MSR_P4_CRU_ESCR3 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_FRONT_END_EVENT , NBOGUS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FRONT_END_EVENT , BOGUS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_EXECUTION_EVENT ] = {
. opcode = P4_OPCODE ( P4_EVENT_EXECUTION_EVENT ) ,
. escr_msr = { MSR_P4_CRU_ESCR2 , MSR_P4_CRU_ESCR3 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS2 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS3 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS2 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS3 ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_REPLAY_EVENT ] = {
. opcode = P4_OPCODE ( P4_EVENT_REPLAY_EVENT ) ,
. escr_msr = { MSR_P4_CRU_ESCR2 , MSR_P4_CRU_ESCR3 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_REPLAY_EVENT , NBOGUS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_REPLAY_EVENT , BOGUS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_INSTR_RETIRED ] = {
. opcode = P4_OPCODE ( P4_EVENT_INSTR_RETIRED ) ,
. escr_msr = { MSR_P4_CRU_ESCR0 , MSR_P4_CRU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_RETIRED , NBOGUSNTAG ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_RETIRED , NBOGUSTAG ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_RETIRED , BOGUSNTAG ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_RETIRED , BOGUSTAG ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_UOPS_RETIRED ] = {
. opcode = P4_OPCODE ( P4_EVENT_UOPS_RETIRED ) ,
. escr_msr = { MSR_P4_CRU_ESCR0 , MSR_P4_CRU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_UOPS_RETIRED , NBOGUS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_UOPS_RETIRED , BOGUS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_UOP_TYPE ] = {
. opcode = P4_OPCODE ( P4_EVENT_UOP_TYPE ) ,
. escr_msr = { MSR_P4_RAT_ESCR0 , MSR_P4_RAT_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_UOP_TYPE , TAGLOADS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_UOP_TYPE , TAGSTORES ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_BRANCH_RETIRED ] = {
. opcode = P4_OPCODE ( P4_EVENT_BRANCH_RETIRED ) ,
. escr_msr = { MSR_P4_CRU_ESCR2 , MSR_P4_CRU_ESCR3 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_BRANCH_RETIRED , MMNP ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BRANCH_RETIRED , MMNM ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BRANCH_RETIRED , MMTP ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BRANCH_RETIRED , MMTM ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_MISPRED_BRANCH_RETIRED ] = {
. opcode = P4_OPCODE ( P4_EVENT_MISPRED_BRANCH_RETIRED ) ,
. escr_msr = { MSR_P4_CRU_ESCR0 , MSR_P4_CRU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
2011-03-28 00:46:11 +04:00
P4_ESCR_EMASK_BIT ( P4_EVENT_MISPRED_BRANCH_RETIRED , NBOGUS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_X87_ASSIST ] = {
. opcode = P4_OPCODE ( P4_EVENT_X87_ASSIST ) ,
. escr_msr = { MSR_P4_CRU_ESCR2 , MSR_P4_CRU_ESCR3 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_X87_ASSIST , FPSU ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_X87_ASSIST , FPSO ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_X87_ASSIST , POAO ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_X87_ASSIST , POAU ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_X87_ASSIST , PREA ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_MACHINE_CLEAR ] = {
. opcode = P4_OPCODE ( P4_EVENT_MACHINE_CLEAR ) ,
. escr_msr = { MSR_P4_CRU_ESCR2 , MSR_P4_CRU_ESCR3 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_MACHINE_CLEAR , CLEAR ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MACHINE_CLEAR , MOCLEAR ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MACHINE_CLEAR , SMCLEAR ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
[ P4_EVENT_INSTR_COMPLETED ] = {
. opcode = P4_OPCODE ( P4_EVENT_INSTR_COMPLETED ) ,
. escr_msr = { MSR_P4_CRU_ESCR0 , MSR_P4_CRU_ESCR1 } ,
2010-08-25 22:23:34 +04:00
. escr_emask =
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_COMPLETED , NBOGUS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_COMPLETED , BOGUS ) ,
2010-03-24 12:09:26 +08:00
. cntr = { { 12 , 13 , 16 } , { 14 , 15 , 17 } } ,
} ,
} ;
2010-03-11 19:54:39 +03:00
2010-07-05 10:09:29 +08:00
# define P4_GEN_CACHE_EVENT(event, bit, metric) \
2010-03-24 12:09:26 +08:00
p4_config_pack_escr ( P4_ESCR_EVENT ( event ) | \
P4_ESCR_EMASK_BIT ( event , bit ) ) | \
2010-07-05 10:09:29 +08:00
p4_config_pack_cccr ( metric | \
2010-03-24 12:09:26 +08:00
P4_CCCR_ESEL ( P4_OPCODE_ESEL ( P4_OPCODE ( event ) ) ) )
2010-03-18 18:33:12 +08:00
2010-03-29 13:09:53 +02:00
static __initconst const u64 p4_hw_cache_event_ids
2010-03-18 18:33:12 +08:00
[ PERF_COUNT_HW_CACHE_MAX ]
[ PERF_COUNT_HW_CACHE_OP_MAX ]
[ PERF_COUNT_HW_CACHE_RESULT_MAX ] =
{
[ C ( L1D ) ] = {
[ C ( OP_READ ) ] = {
[ C ( RESULT_ACCESS ) ] = 0x0 ,
2010-03-24 12:09:26 +08:00
[ C ( RESULT_MISS ) ] = P4_GEN_CACHE_EVENT ( P4_EVENT_REPLAY_EVENT , NBOGUS ,
2010-07-05 10:09:29 +08:00
P4_PEBS_METRIC__1stl_cache_load_miss_retired ) ,
2010-03-18 18:33:12 +08:00
} ,
} ,
[ C ( LL ) ] = {
[ C ( OP_READ ) ] = {
[ C ( RESULT_ACCESS ) ] = 0x0 ,
2010-03-24 12:09:26 +08:00
[ C ( RESULT_MISS ) ] = P4_GEN_CACHE_EVENT ( P4_EVENT_REPLAY_EVENT , NBOGUS ,
2010-07-05 10:09:29 +08:00
P4_PEBS_METRIC__2ndl_cache_load_miss_retired ) ,
2010-03-18 18:33:12 +08:00
} ,
2010-03-24 12:09:26 +08:00
} ,
2010-03-18 18:33:12 +08:00
[ C ( DTLB ) ] = {
[ C ( OP_READ ) ] = {
[ C ( RESULT_ACCESS ) ] = 0x0 ,
2010-03-24 12:09:26 +08:00
[ C ( RESULT_MISS ) ] = P4_GEN_CACHE_EVENT ( P4_EVENT_REPLAY_EVENT , NBOGUS ,
2010-07-05 10:09:29 +08:00
P4_PEBS_METRIC__dtlb_load_miss_retired ) ,
2010-03-18 18:33:12 +08:00
} ,
[ C ( OP_WRITE ) ] = {
[ C ( RESULT_ACCESS ) ] = 0x0 ,
2010-03-24 12:09:26 +08:00
[ C ( RESULT_MISS ) ] = P4_GEN_CACHE_EVENT ( P4_EVENT_REPLAY_EVENT , NBOGUS ,
2010-07-05 10:09:29 +08:00
P4_PEBS_METRIC__dtlb_store_miss_retired ) ,
2010-03-18 18:33:12 +08:00
} ,
} ,
[ C ( ITLB ) ] = {
[ C ( OP_READ ) ] = {
2010-03-24 12:09:26 +08:00
[ C ( RESULT_ACCESS ) ] = P4_GEN_CACHE_EVENT ( P4_EVENT_ITLB_REFERENCE , HIT ,
2010-07-05 10:09:29 +08:00
P4_PEBS_METRIC__none ) ,
2010-03-24 12:09:26 +08:00
[ C ( RESULT_MISS ) ] = P4_GEN_CACHE_EVENT ( P4_EVENT_ITLB_REFERENCE , MISS ,
2010-07-05 10:09:29 +08:00
P4_PEBS_METRIC__none ) ,
2010-03-18 18:33:12 +08:00
} ,
[ C ( OP_WRITE ) ] = {
[ C ( RESULT_ACCESS ) ] = - 1 ,
[ C ( RESULT_MISS ) ] = - 1 ,
} ,
[ C ( OP_PREFETCH ) ] = {
[ C ( RESULT_ACCESS ) ] = - 1 ,
[ C ( RESULT_MISS ) ] = - 1 ,
} ,
} ,
2011-04-22 23:37:06 +02:00
[ C ( NODE ) ] = {
[ C ( OP_READ ) ] = {
[ C ( RESULT_ACCESS ) ] = - 1 ,
[ C ( RESULT_MISS ) ] = - 1 ,
} ,
[ C ( OP_WRITE ) ] = {
[ C ( RESULT_ACCESS ) ] = - 1 ,
[ C ( RESULT_MISS ) ] = - 1 ,
} ,
[ C ( OP_PREFETCH ) ] = {
[ C ( RESULT_ACCESS ) ] = - 1 ,
[ C ( RESULT_MISS ) ] = - 1 ,
} ,
} ,
2010-03-18 18:33:12 +08:00
} ;
2011-07-09 00:17:12 +04:00
/*
2011-07-21 20:06:25 +04:00
* Because of Netburst being quite restricted in how many
* identical events may run simultaneously , we introduce event aliases ,
* ie the different events which have the same functionality but
* utilize non - intersected resources ( ESCR / CCCR / counter registers ) .
2011-07-09 00:17:12 +04:00
*
2011-07-21 20:06:25 +04:00
* This allow us to relax restrictions a bit and run two or more
* identical events together .
2011-07-09 00:17:12 +04:00
*
2011-07-21 20:06:25 +04:00
* Never set any custom internal bits such as P4_CONFIG_HT ,
* P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC , they are
* either up to date automatically or not applicable at all .
2011-07-09 00:17:12 +04:00
*/
2017-08-10 16:57:09 +01:00
static struct p4_event_alias {
2011-07-21 20:06:25 +04:00
u64 original ;
u64 alternative ;
2011-07-09 00:17:12 +04:00
} p4_event_aliases [ ] = {
{
/*
2011-07-21 20:06:25 +04:00
* Non - halted cycles can be substituted with non - sleeping cycles ( see
* Intel SDM Vol3b for details ) . We need this alias to be able
* to run nmi - watchdog and ' perf top ' ( or any other user space tool
* which is interested in running PERF_COUNT_HW_CPU_CYCLES )
* simultaneously .
2011-07-09 00:17:12 +04:00
*/
2011-07-21 20:06:25 +04:00
. original =
2011-07-09 00:17:12 +04:00
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_GLOBAL_POWER_EVENTS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_GLOBAL_POWER_EVENTS , RUNNING ) ) ,
2011-07-21 20:06:25 +04:00
. alternative =
2011-07-09 00:17:12 +04:00
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_EXECUTION_EVENT ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS2 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , NBOGUS3 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS0 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS1 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS2 ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_EXECUTION_EVENT , BOGUS3 ) ) |
p4_config_pack_cccr ( P4_CCCR_THRESHOLD ( 15 ) | P4_CCCR_COMPLEMENT |
P4_CCCR_COMPARE ) ,
} ,
} ;
static u64 p4_get_alias_event ( u64 config )
{
u64 config_match ;
int i ;
/*
2011-07-21 20:06:25 +04:00
* Only event with special mark is allowed ,
* we ' re to be sure it didn ' t come as malformed
* RAW event .
2011-07-09 00:17:12 +04:00
*/
if ( ! ( config & P4_CONFIG_ALIASABLE ) )
return 0 ;
config_match = config & P4_CONFIG_EVENT_ALIAS_MASK ;
for ( i = 0 ; i < ARRAY_SIZE ( p4_event_aliases ) ; i + + ) {
2011-07-21 20:06:25 +04:00
if ( config_match = = p4_event_aliases [ i ] . original ) {
config_match = p4_event_aliases [ i ] . alternative ;
2011-07-09 00:17:12 +04:00
break ;
2011-07-21 20:06:25 +04:00
} else if ( config_match = = p4_event_aliases [ i ] . alternative ) {
config_match = p4_event_aliases [ i ] . original ;
2011-07-09 00:17:12 +04:00
break ;
}
}
if ( i > = ARRAY_SIZE ( p4_event_aliases ) )
return 0 ;
2011-07-21 20:06:25 +04:00
return config_match | ( config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS ) ;
2011-07-09 00:17:12 +04:00
}
2010-03-24 12:09:26 +08:00
static u64 p4_general_events [ PERF_COUNT_HW_MAX ] = {
/* non-halted CPU clocks */
[ PERF_COUNT_HW_CPU_CYCLES ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_GLOBAL_POWER_EVENTS ) |
2011-07-09 00:17:12 +04:00
P4_ESCR_EMASK_BIT ( P4_EVENT_GLOBAL_POWER_EVENTS , RUNNING ) ) |
P4_CONFIG_ALIASABLE ,
2010-03-24 12:09:26 +08:00
/*
* retired instructions
* in a sake of simplicity we don ' t use the FSB tagging
*/
[ PERF_COUNT_HW_INSTRUCTIONS ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_INSTR_RETIRED ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_RETIRED , NBOGUSNTAG ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_INSTR_RETIRED , BOGUSNTAG ) ) ,
/* cache hits */
[ PERF_COUNT_HW_CACHE_REFERENCES ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_BSQ_CACHE_REFERENCE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_HITS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_HITE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_HITM ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_HITS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_HITE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_HITM ) ) ,
/* cache misses */
[ PERF_COUNT_HW_CACHE_MISSES ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_BSQ_CACHE_REFERENCE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_2ndL_MISS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , RD_3rdL_MISS ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_BSQ_CACHE_REFERENCE , WR_2ndL_MISS ) ) ,
/* branch instructions retired */
[ PERF_COUNT_HW_BRANCH_INSTRUCTIONS ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_RETIRED_BRANCH_TYPE ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , CONDITIONAL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , CALL ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , RETURN ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_RETIRED_BRANCH_TYPE , INDIRECT ) ) ,
/* mispredicted branches retired */
[ PERF_COUNT_HW_BRANCH_MISSES ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_MISPRED_BRANCH_RETIRED ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_MISPRED_BRANCH_RETIRED , NBOGUS ) ) ,
/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
[ PERF_COUNT_HW_BUS_CYCLES ] =
p4_config_pack_escr ( P4_ESCR_EVENT ( P4_EVENT_FSB_DATA_ACTIVITY ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DRDY_DRV ) |
P4_ESCR_EMASK_BIT ( P4_EVENT_FSB_DATA_ACTIVITY , DRDY_OWN ) ) |
p4_config_pack_cccr ( P4_CCCR_EDGE | P4_CCCR_COMPARE ) ,
2010-03-11 19:54:39 +03:00
} ;
2010-03-24 12:09:26 +08:00
static struct p4_event_bind * p4_config_get_bind ( u64 config )
{
unsigned int evnt = p4_config_unpack_event ( config ) ;
struct p4_event_bind * bind = NULL ;
if ( evnt < ARRAY_SIZE ( p4_event_bind_map ) )
bind = & p4_event_bind_map [ evnt ] ;
return bind ;
}
2010-03-11 19:54:39 +03:00
static u64 p4_pmu_event_map ( int hw_event )
{
2010-03-24 12:09:26 +08:00
struct p4_event_bind * bind ;
unsigned int esel ;
2010-03-11 19:54:39 +03:00
u64 config ;
2010-03-24 12:09:26 +08:00
config = p4_general_events [ hw_event ] ;
bind = p4_config_get_bind ( config ) ;
esel = P4_OPCODE_ESEL ( bind - > opcode ) ;
config | = p4_config_pack_cccr ( P4_CCCR_ESEL ( esel ) ) ;
2010-03-11 19:54:39 +03:00
return config ;
}
2010-08-25 22:23:34 +04:00
/* check cpu model specifics */
static bool p4_event_match_cpu_model ( unsigned int event_idx )
{
/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
if ( event_idx = = P4_EVENT_INSTR_COMPLETED ) {
if ( boot_cpu_data . x86_model ! = 3 & &
boot_cpu_data . x86_model ! = 4 & &
boot_cpu_data . x86_model ! = 6 )
return false ;
}
/*
* For info
* - IQ_ESCR0 , IQ_ESCR1 only for models 1 and 2
*/
return true ;
}
2010-07-05 10:09:29 +08:00
static int p4_validate_raw_event ( struct perf_event * event )
{
2010-08-25 22:23:34 +04:00
unsigned int v , emask ;
2010-07-05 10:09:29 +08:00
2010-08-25 22:23:34 +04:00
/* User data may have out-of-bound event index */
2010-07-05 10:09:29 +08:00
v = p4_config_unpack_event ( event - > attr . config ) ;
2010-08-25 22:23:34 +04:00
if ( v > = ARRAY_SIZE ( p4_event_bind_map ) )
return - EINVAL ;
/* It may be unsupported: */
if ( ! p4_event_match_cpu_model ( v ) )
2010-07-05 10:09:29 +08:00
return - EINVAL ;
2010-08-25 22:23:34 +04:00
/*
* NOTE : P4_CCCR_THREAD_ANY has not the same meaning as
* in Architectural Performance Monitoring , it means not
* on _which_ logical cpu to count but rather _when_ , ie it
* depends on logical cpu state - - count event if one cpu active ,
* none , both or any , so we just allow user to pass any value
* desired .
*
* In turn we always set Tx_OS / Tx_USR bits bound to logical
* cpu without their propagation to another cpu
*/
/*
2011-03-17 16:24:16 -03:00
* if an event is shared across the logical threads
2010-08-25 22:23:34 +04:00
* the user needs special permissions to be able to use it
*/
2011-01-25 17:32:01 +02:00
if ( p4_ht_active ( ) & & p4_event_bind_map [ v ] . shared ) {
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
v = perf_allow_cpu ( & event - > attr ) ;
if ( v )
return v ;
2010-07-05 10:09:29 +08:00
}
2010-08-25 22:23:34 +04:00
/* ESCR EventMask bits may be invalid */
emask = p4_config_unpack_escr ( event - > attr . config ) & P4_ESCR_EVENTMASK_MASK ;
if ( emask & ~ p4_event_bind_map [ v ] . escr_emask )
return - EINVAL ;
2010-07-05 10:09:29 +08:00
/*
2010-08-25 22:23:34 +04:00
* it may have some invalid PEBS bits
2010-07-05 10:09:29 +08:00
*/
2010-08-25 22:23:34 +04:00
if ( p4_config_pebs_has ( event - > attr . config , P4_PEBS_CONFIG_ENABLE ) )
2010-07-05 10:09:29 +08:00
return - EINVAL ;
2010-08-25 22:23:34 +04:00
2010-07-05 10:09:29 +08:00
v = p4_config_unpack_metric ( event - > attr . config ) ;
2010-08-25 22:23:34 +04:00
if ( v > = ARRAY_SIZE ( p4_pebs_bind_map ) )
2010-07-05 10:09:29 +08:00
return - EINVAL ;
return 0 ;
}
2010-03-30 17:00:06 +02:00
static int p4_hw_config ( struct perf_event * event )
2010-03-11 19:54:39 +03:00
{
2010-05-08 15:25:52 +04:00
int cpu = get_cpu ( ) ;
int rc = 0 ;
2010-03-24 12:09:26 +08:00
u32 escr , cccr ;
2010-03-11 19:54:39 +03:00
/*
* the reason we use cpu that early is that : if we get scheduled
* first time on the same cpu - - we will not need swap thread
* specific flags in config ( and will save some cpu cycles )
*/
2010-03-24 12:09:26 +08:00
cccr = p4_default_cccr_conf ( cpu ) ;
2010-03-30 17:00:06 +02:00
escr = p4_default_escr_conf ( cpu , event - > attr . exclude_kernel ,
event - > attr . exclude_user ) ;
event - > hw . config = p4_config_pack_escr ( escr ) |
p4_config_pack_cccr ( cccr ) ;
2010-03-11 19:54:39 +03:00
2010-03-18 18:33:12 +08:00
if ( p4_ht_active ( ) & & p4_ht_thread ( cpu ) )
2010-03-30 17:00:06 +02:00
event - > hw . config = p4_set_ht_bit ( event - > hw . config ) ;
2010-05-08 15:39:52 +04:00
if ( event - > attr . type = = PERF_TYPE_RAW ) {
2011-01-25 17:32:01 +02:00
struct p4_event_bind * bind ;
unsigned int esel ;
2010-08-25 22:23:34 +04:00
/*
* Clear bits we reserve to be managed by kernel itself
* and never allowed from a user space
*/
2016-05-17 17:40:15 +01:00
event - > attr . config & = P4_CONFIG_MASK ;
2010-08-25 22:23:34 +04:00
2010-07-05 10:09:29 +08:00
rc = p4_validate_raw_event ( event ) ;
if ( rc )
2010-05-08 15:25:54 +04:00
goto out ;
2010-05-08 15:39:52 +04:00
/*
2010-07-05 10:09:29 +08:00
* Note that for RAW events we allow user to use P4_CCCR_RESERVED
* bits since we keep additional info here ( for cache events and etc )
2010-05-08 15:39:52 +04:00
*/
2010-08-25 22:23:34 +04:00
event - > hw . config | = event - > attr . config ;
2011-01-25 17:32:01 +02:00
bind = p4_config_get_bind ( event - > attr . config ) ;
if ( ! bind ) {
rc = - EINVAL ;
goto out ;
}
esel = P4_OPCODE_ESEL ( bind - > opcode ) ;
event - > hw . config | = p4_config_pack_cccr ( P4_CCCR_ESEL ( esel ) ) ;
2010-05-08 15:39:52 +04:00
}
2010-03-18 18:33:12 +08:00
2010-05-08 15:25:52 +04:00
rc = x86_setup_perfctr ( event ) ;
2010-05-08 15:25:54 +04:00
out :
2010-05-08 15:25:52 +04:00
put_cpu ( ) ;
return rc ;
2010-03-11 19:54:39 +03:00
}
2010-05-17 16:13:04 +08:00
static inline int p4_pmu_clear_cccr_ovf ( struct hw_perf_event * hwc )
2010-03-11 19:54:39 +03:00
{
2011-01-07 21:42:06 +03:00
u64 v ;
2010-03-11 19:54:39 +03:00
2011-01-07 21:42:06 +03:00
/* an official way for overflow indication */
2011-02-02 17:40:59 +01:00
rdmsrl ( hwc - > config_base , v ) ;
2011-01-07 21:42:06 +03:00
if ( v & P4_CCCR_OVF ) {
2011-02-02 17:40:59 +01:00
wrmsrl ( hwc - > config_base , v & ~ P4_CCCR_OVF ) ;
2011-01-07 21:42:06 +03:00
return 1 ;
2010-03-11 19:54:39 +03:00
}
2010-05-17 16:13:04 +08:00
2011-02-16 14:08:02 +03:00
/*
* In some circumstances the overflow might issue an NMI but did
* not set P4_CCCR_OVF bit . Because a counter holds a negative value
* we simply check for high bit being set , if it ' s cleared it means
* the counter has reached zero value and continued counting before
* real NMI signal was received :
*/
2011-03-24 23:36:25 +03:00
rdmsrl ( hwc - > event_base , v ) ;
2011-02-16 14:08:02 +03:00
if ( ! ( v & ARCH_P4_UNFLAGGED_BIT ) )
2011-01-07 21:42:06 +03:00
return 1 ;
return 0 ;
2010-03-11 19:54:39 +03:00
}
2010-07-05 10:09:29 +08:00
static void p4_pmu_disable_pebs ( void )
{
/*
* FIXME
*
* It ' s still allowed that two threads setup same cache
* events so we can ' t simply clear metrics until we knew
2011-03-17 16:24:16 -03:00
* no one is depending on us , so we need kind of counter
2010-07-05 10:09:29 +08:00
* for " ReplayEvent " users .
*
* What is more complex - - RAW events , if user ( for some
* reason ) will pass some cache event metric with improper
* event opcode - - it ' s fine from hardware point of view
2011-03-17 16:24:16 -03:00
* but completely nonsense from " meaning " of such action .
2010-07-05 10:09:29 +08:00
*
* So at moment let leave metrics turned on forever - - it ' s
* ok for now but need to be revisited !
*
2013-04-24 09:26:30 +02:00
* ( void ) wrmsrl_safe ( MSR_IA32_PEBS_ENABLE , 0 ) ;
* ( void ) wrmsrl_safe ( MSR_P4_PEBS_MATRIX_VERT , 0 ) ;
2010-07-05 10:09:29 +08:00
*/
}
2010-03-11 19:54:39 +03:00
static inline void p4_pmu_disable_event ( struct perf_event * event )
{
struct hw_perf_event * hwc = & event - > hw ;
/*
* If event gets disabled while counter is in overflowed
* state we need to clear P4_CCCR_OVF , otherwise interrupt get
* asserted again and again
*/
2012-06-07 13:32:04 -07:00
( void ) wrmsrl_safe ( hwc - > config_base ,
2013-04-24 09:26:30 +02:00
p4_config_unpack_cccr ( hwc - > config ) & ~ P4_CCCR_ENABLE & ~ P4_CCCR_OVF & ~ P4_CCCR_RESERVED ) ;
2010-03-11 19:54:39 +03:00
}
static void p4_pmu_disable_all ( void )
{
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
struct cpu_hw_events * cpuc = this_cpu_ptr ( & cpu_hw_events ) ;
2010-03-11 19:54:39 +03:00
int idx ;
2010-03-29 18:36:50 +02:00
for ( idx = 0 ; idx < x86_pmu . num_counters ; idx + + ) {
2010-03-11 19:54:39 +03:00
struct perf_event * event = cpuc - > events [ idx ] ;
if ( ! test_bit ( idx , cpuc - > active_mask ) )
continue ;
p4_pmu_disable_event ( event ) ;
}
2010-07-05 10:09:29 +08:00
p4_pmu_disable_pebs ( ) ;
}
/* configuration must be valid */
static void p4_pmu_enable_pebs ( u64 config )
{
struct p4_pebs_bind * bind ;
unsigned int idx ;
BUILD_BUG_ON ( P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK ) ;
idx = p4_config_unpack_metric ( config ) ;
if ( idx = = P4_PEBS_METRIC__none )
return ;
bind = & p4_pebs_bind_map [ idx ] ;
2012-06-07 13:32:04 -07:00
( void ) wrmsrl_safe ( MSR_IA32_PEBS_ENABLE , ( u64 ) bind - > metric_pebs ) ;
( void ) wrmsrl_safe ( MSR_P4_PEBS_MATRIX_VERT , ( u64 ) bind - > metric_vert ) ;
2010-03-11 19:54:39 +03:00
}
2021-04-14 07:36:29 -07:00
static void __p4_pmu_enable_event ( struct perf_event * event )
2010-03-11 19:54:39 +03:00
{
struct hw_perf_event * hwc = & event - > hw ;
int thread = p4_ht_config_thread ( hwc - > config ) ;
u64 escr_conf = p4_config_unpack_escr ( p4_clear_ht_bit ( hwc - > config ) ) ;
2010-03-24 12:09:26 +08:00
unsigned int idx = p4_config_unpack_event ( hwc - > config ) ;
struct p4_event_bind * bind ;
u64 escr_addr , cccr ;
2010-03-11 19:54:39 +03:00
2010-03-24 12:09:26 +08:00
bind = & p4_event_bind_map [ idx ] ;
2013-04-24 09:26:30 +02:00
escr_addr = bind - > escr_msr [ thread ] ;
2010-03-11 19:54:39 +03:00
/*
* - we dont support cascaded counters yet
* - and counter 1 is broken ( erratum )
*/
WARN_ON_ONCE ( p4_is_event_cascaded ( hwc - > config ) ) ;
WARN_ON_ONCE ( hwc - > idx = = 1 ) ;
2010-03-24 12:09:26 +08:00
/* we need a real Event value */
escr_conf & = ~ P4_ESCR_EVENT_MASK ;
escr_conf | = P4_ESCR_EVENT ( P4_OPCODE_EVNT ( bind - > opcode ) ) ;
cccr = p4_config_unpack_cccr ( hwc - > config ) ;
/*
2010-07-05 10:09:29 +08:00
* it could be Cache event so we need to write metrics
* into additional MSRs
2010-03-24 12:09:26 +08:00
*/
2010-07-05 10:09:29 +08:00
p4_pmu_enable_pebs ( hwc - > config ) ;
2010-03-24 12:09:26 +08:00
2012-06-07 13:32:04 -07:00
( void ) wrmsrl_safe ( escr_addr , escr_conf ) ;
( void ) wrmsrl_safe ( hwc - > config_base ,
2010-03-24 12:09:26 +08:00
( cccr & ~ P4_CCCR_RESERVED ) | P4_CCCR_ENABLE ) ;
2010-03-11 19:54:39 +03:00
}
2021-04-14 07:36:29 -07:00
static DEFINE_PER_CPU ( unsigned long [ BITS_TO_LONGS ( X86_PMC_IDX_MAX ) ] , p4_running ) ;
static void p4_pmu_enable_event ( struct perf_event * event )
{
int idx = event - > hw . idx ;
__set_bit ( idx , per_cpu ( p4_running , smp_processor_id ( ) ) ) ;
__p4_pmu_enable_event ( event ) ;
}
2010-03-26 14:08:44 +01:00
static void p4_pmu_enable_all ( int added )
2010-03-11 19:54:39 +03:00
{
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
struct cpu_hw_events * cpuc = this_cpu_ptr ( & cpu_hw_events ) ;
2010-03-11 19:54:39 +03:00
int idx ;
2010-03-29 18:36:50 +02:00
for ( idx = 0 ; idx < x86_pmu . num_counters ; idx + + ) {
2010-03-11 19:54:39 +03:00
struct perf_event * event = cpuc - > events [ idx ] ;
if ( ! test_bit ( idx , cpuc - > active_mask ) )
continue ;
2021-04-14 07:36:29 -07:00
__p4_pmu_enable_event ( event ) ;
2010-03-11 19:54:39 +03:00
}
}
2022-05-20 15:38:43 +02:00
static int p4_pmu_set_period ( struct perf_event * event )
{
struct hw_perf_event * hwc = & event - > hw ;
s64 left = this_cpu_read ( pmc_prev_left [ hwc - > idx ] ) ;
int ret ;
ret = x86_perf_event_set_period ( event ) ;
if ( hwc - > event_base ) {
/*
* This handles erratum N15 in intel doc 249199 - 02 9 ,
* the counter may not be updated correctly on write
* so we need a second write operation to do the trick
* ( the official workaround didn ' t work )
*
* the former idea is taken from OProfile code
*/
wrmsrl ( hwc - > event_base , ( u64 ) ( - left ) & x86_pmu . cntval_mask ) ;
}
return ret ;
}
2010-03-11 19:54:39 +03:00
static int p4_pmu_handle_irq ( struct pt_regs * regs )
{
struct perf_sample_data data ;
struct cpu_hw_events * cpuc ;
struct perf_event * event ;
struct hw_perf_event * hwc ;
int idx , handled = 0 ;
u64 val ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
cpuc = this_cpu_ptr ( & cpu_hw_events ) ;
2010-03-11 19:54:39 +03:00
2010-03-29 18:36:50 +02:00
for ( idx = 0 ; idx < x86_pmu . num_counters ; idx + + ) {
2010-08-05 19:09:17 +04:00
int overflow ;
2010-03-11 19:54:39 +03:00
2010-09-29 23:01:38 -04:00
if ( ! test_bit ( idx , cpuc - > active_mask ) ) {
/* catch in-flight IRQs */
2021-04-14 07:36:29 -07:00
if ( __test_and_clear_bit ( idx , per_cpu ( p4_running , smp_processor_id ( ) ) ) )
2010-09-29 23:01:38 -04:00
handled + + ;
2010-03-11 19:54:39 +03:00
continue ;
2010-09-29 23:01:38 -04:00
}
2010-03-11 19:54:39 +03:00
event = cpuc - > events [ idx ] ;
hwc = & event - > hw ;
WARN_ON_ONCE ( hwc - > idx ! = idx ) ;
2010-05-17 16:13:04 +08:00
/* it might be unflagged overflow */
2010-08-05 19:09:17 +04:00
overflow = p4_pmu_clear_cccr_ovf ( hwc ) ;
2010-03-11 19:54:39 +03:00
val = x86_perf_event_update ( event ) ;
2010-08-05 19:09:17 +04:00
if ( ! overflow & & ( val & ( 1ULL < < ( x86_pmu . cntval_bits - 1 ) ) ) )
2010-03-11 19:54:39 +03:00
continue ;
2010-08-05 19:09:17 +04:00
handled + = overflow ;
2010-05-17 16:13:04 +08:00
/* event overflow for sure */
2012-04-02 20:19:08 +02:00
perf_sample_data_init ( & data , 0 , hwc - > last_period ) ;
2010-03-11 19:54:39 +03:00
2022-05-20 15:38:43 +02:00
if ( ! static_call ( x86_pmu_set_period ) ( event ) )
2010-03-11 19:54:39 +03:00
continue ;
2012-04-02 20:19:08 +02:00
2011-06-27 14:41:57 +02:00
if ( perf_event_overflow ( event , & data , regs ) )
2011-04-21 11:03:21 -04:00
x86_pmu_stop ( event , 0 ) ;
2010-03-11 19:54:39 +03:00
}
2011-04-27 06:32:33 -04:00
if ( handled )
2010-03-11 19:54:39 +03:00
inc_irq_stat ( apic_perf_irqs ) ;
2011-04-27 06:32:33 -04:00
/*
* When dealing with the unmasking of the LVTPC on P4 perf hw , it has
* been observed that the OVF bit flag has to be cleared first _before_
* the LVTPC can be unmasked .
*
* The reason is the NMI line will continue to be asserted while the OVF
* bit is set . This causes a second NMI to generate if the LVTPC is
* unmasked before the OVF bit is cleared , leading to unknown NMI
* messages .
*/
apic_write ( APIC_LVTPC , APIC_DM_NMI ) ;
2010-03-11 19:54:39 +03:00
2010-09-02 15:07:49 -04:00
return handled ;
2010-03-11 19:54:39 +03:00
}
/*
* swap thread specific fields according to a thread
* we are going to run on
*/
static void p4_pmu_swap_config_ts ( struct hw_perf_event * hwc , int cpu )
{
u32 escr , cccr ;
/*
* we either lucky and continue on same cpu or no HT support
*/
if ( ! p4_should_swap_ts ( hwc - > config , cpu ) )
return ;
/*
* the event is migrated from an another logical
* cpu , so we need to swap thread specific flags
*/
escr = p4_config_unpack_escr ( hwc - > config ) ;
cccr = p4_config_unpack_cccr ( hwc - > config ) ;
if ( p4_ht_thread ( cpu ) ) {
cccr & = ~ P4_CCCR_OVF_PMI_T0 ;
cccr | = P4_CCCR_OVF_PMI_T1 ;
2010-03-24 12:09:26 +08:00
if ( escr & P4_ESCR_T0_OS ) {
escr & = ~ P4_ESCR_T0_OS ;
escr | = P4_ESCR_T1_OS ;
2010-03-11 19:54:39 +03:00
}
2010-03-24 12:09:26 +08:00
if ( escr & P4_ESCR_T0_USR ) {
escr & = ~ P4_ESCR_T0_USR ;
escr | = P4_ESCR_T1_USR ;
2010-03-11 19:54:39 +03:00
}
hwc - > config = p4_config_pack_escr ( escr ) ;
hwc - > config | = p4_config_pack_cccr ( cccr ) ;
hwc - > config | = P4_CONFIG_HT ;
} else {
cccr & = ~ P4_CCCR_OVF_PMI_T1 ;
cccr | = P4_CCCR_OVF_PMI_T0 ;
2010-03-24 12:09:26 +08:00
if ( escr & P4_ESCR_T1_OS ) {
escr & = ~ P4_ESCR_T1_OS ;
escr | = P4_ESCR_T0_OS ;
2010-03-11 19:54:39 +03:00
}
2010-03-24 12:09:26 +08:00
if ( escr & P4_ESCR_T1_USR ) {
escr & = ~ P4_ESCR_T1_USR ;
escr | = P4_ESCR_T0_USR ;
2010-03-11 19:54:39 +03:00
}
hwc - > config = p4_config_pack_escr ( escr ) ;
hwc - > config | = p4_config_pack_cccr ( cccr ) ;
hwc - > config & = ~ P4_CONFIG_HT ;
}
}
2010-05-12 21:42:42 +04:00
/*
* ESCR address hashing is tricky , ESCRs are not sequential
2010-05-19 01:19:17 +04:00
* in memory but all starts from MSR_P4_BSU_ESCR0 ( 0x03a0 ) and
2010-05-12 21:42:42 +04:00
* the metric between any ESCRs is laid in range [ 0xa0 , 0xe1 ]
*
* so we make ~ 70 % filled hashtable
*/
# define P4_ESCR_MSR_BASE 0x000003a0
# define P4_ESCR_MSR_MAX 0x000003e1
# define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
# define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
# define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
static const unsigned int p4_escr_table [ P4_ESCR_MSR_TABLE_SIZE ] = {
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_ALF_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_ALF_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_BPU_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_BPU_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_BSU_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_BSU_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_CRU_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_CRU_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_CRU_ESCR2 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_CRU_ESCR3 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_CRU_ESCR4 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_CRU_ESCR5 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_DAC_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_DAC_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_FIRM_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_FIRM_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_FLAME_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_FLAME_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_FSB_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_FSB_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_IQ_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_IQ_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_IS_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_IS_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_ITLB_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_ITLB_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_IX_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_IX_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_MOB_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_MOB_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_MS_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_MS_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_PMH_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_PMH_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_RAT_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_RAT_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_SAAT_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_SAAT_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_SSU_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_SSU_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_TBPU_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_TBPU_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_TC_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_TC_ESCR1 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_U2L_ESCR0 ) ,
P4_ESCR_MSR_TABLE_ENTRY ( MSR_P4_U2L_ESCR1 ) ,
2010-03-11 19:54:39 +03:00
} ;
static int p4_get_escr_idx ( unsigned int addr )
{
2010-05-12 21:42:42 +04:00
unsigned int idx = P4_ESCR_MSR_IDX ( addr ) ;
2010-03-11 19:54:39 +03:00
2010-05-19 01:19:17 +04:00
if ( unlikely ( idx > = P4_ESCR_MSR_TABLE_SIZE | |
! p4_escr_table [ idx ] | |
p4_escr_table [ idx ] ! = addr ) ) {
2010-05-12 21:42:42 +04:00
WARN_ONCE ( 1 , " P4 PMU: Wrong address passed: %x \n " , addr ) ;
return - 1 ;
2010-03-11 19:54:39 +03:00
}
2010-05-12 21:42:42 +04:00
return idx ;
2010-03-11 19:54:39 +03:00
}
2010-03-24 12:09:26 +08:00
static int p4_next_cntr ( int thread , unsigned long * used_mask ,
struct p4_event_bind * bind )
{
2010-05-14 23:08:15 +04:00
int i , j ;
2010-03-24 12:09:26 +08:00
for ( i = 0 ; i < P4_CNTR_LIMIT ; i + + ) {
2010-05-14 23:08:15 +04:00
j = bind - > cntr [ thread ] [ i ] ;
if ( j ! = - 1 & & ! test_bit ( j , used_mask ) )
2010-03-24 12:09:26 +08:00
return j ;
}
return - 1 ;
}
2010-03-11 19:54:39 +03:00
static int p4_pmu_schedule_events ( struct cpu_hw_events * cpuc , int n , int * assign )
{
unsigned long used_mask [ BITS_TO_LONGS ( X86_PMC_IDX_MAX ) ] ;
2010-05-12 21:42:42 +04:00
unsigned long escr_mask [ BITS_TO_LONGS ( P4_ESCR_MSR_TABLE_SIZE ) ] ;
2010-05-19 01:19:18 +04:00
int cpu = smp_processor_id ( ) ;
2010-03-24 12:09:26 +08:00
struct hw_perf_event * hwc ;
struct p4_event_bind * bind ;
unsigned int i , thread , num ;
int cntr_idx , escr_idx ;
2011-07-09 00:17:12 +04:00
u64 config_alias ;
int pass ;
2010-03-11 19:54:39 +03:00
bitmap_zero ( used_mask , X86_PMC_IDX_MAX ) ;
2010-05-12 21:42:42 +04:00
bitmap_zero ( escr_mask , P4_ESCR_MSR_TABLE_SIZE ) ;
2010-03-11 19:54:39 +03:00
for ( i = 0 , num = n ; i < n ; i + + , num - - ) {
2010-03-24 12:09:26 +08:00
2010-03-11 19:54:39 +03:00
hwc = & cpuc - > event_list [ i ] - > hw ;
thread = p4_ht_thread ( cpu ) ;
2011-07-09 00:17:12 +04:00
pass = 0 ;
again :
/*
2011-07-21 20:06:25 +04:00
* It ' s possible to hit a circular lock
* between original and alternative events
* if both are scheduled already .
2011-07-09 00:17:12 +04:00
*/
if ( pass > 2 )
goto done ;
2010-03-24 12:09:26 +08:00
bind = p4_config_get_bind ( hwc - > config ) ;
escr_idx = p4_get_escr_idx ( bind - > escr_msr [ thread ] ) ;
2010-05-12 21:42:42 +04:00
if ( unlikely ( escr_idx = = - 1 ) )
goto done ;
2010-03-11 19:54:39 +03:00
if ( hwc - > idx ! = - 1 & & ! p4_should_swap_ts ( hwc - > config , cpu ) ) {
2010-03-24 12:09:26 +08:00
cntr_idx = hwc - > idx ;
2010-03-11 19:54:39 +03:00
if ( assign )
assign [ i ] = hwc - > idx ;
goto reserve ;
}
2010-03-24 12:09:26 +08:00
cntr_idx = p4_next_cntr ( thread , used_mask , bind ) ;
2011-07-09 00:17:12 +04:00
if ( cntr_idx = = - 1 | | test_bit ( escr_idx , escr_mask ) ) {
/*
2011-07-21 20:06:25 +04:00
* Check whether an event alias is still available .
2011-07-09 00:17:12 +04:00
*/
config_alias = p4_get_alias_event ( hwc - > config ) ;
if ( ! config_alias )
goto done ;
hwc - > config = config_alias ;
pass + + ;
goto again ;
}
perf/x86/p4: Fix counter corruption when using lots of perf groups
On a P4 box stressing perf with:
./perf record -o perf.data ./perf stat -v ./perf bench all
it was noticed that a slew of unknown NMIs would pop out rather quickly.
Painfully debugging this ancient platform, led me to notice cross cpu counter
corruption.
The P4 machine is special in that it has 18 counters, half are used for cpu0
and the other half is for cpu1 (or all 18 if hyperthreading is disabled). But
the splitting of the counters has to be actively managed by the software.
In this particular bug, one of the cpu0 specific counters was being used by
cpu1 and caused all sorts of random unknown nmis.
I am not entirely sure on the corruption path, but what happens is:
o perf schedules a group with p4_pmu_schedule_events()
o inside p4_pmu_schedule_events(), it notices an hwc pointer is being reused
but for a different cpu, so it 'swaps' the config bits and returns the
updated 'assign' array with a _new_ index.
o perf schedules another group with p4_pmu_schedule_events()
o inside p4_pmu_schedule_events(), it notices an hwc pointer is being reused
(the same one as above) but for the _same_ cpu [BUG!!], so it updates the
'assign' array to use the _old_ (wrong cpu) index because the _new_ index is in
an earlier part of the 'assign' array (and hasn't been committed yet).
o perf commits the transaction using the wrong index and corrupts the other cpu
The [BUG!!] is because the 'hwc->config' is updated but not the 'hwc->idx'. So
the check for 'p4_should_swap_ts()' is correct the first time around but
incorrect the second time around (because hwc->config was updated in between).
I think the spirit of perf was to not modify anything until all the
transactions had a chance to 'test' if they would succeed, and if so, commit
atomically. However, P4 breaks this spirit by touching the hwc->config
element.
So my fix is to continue the un-perf like breakage, by assigning hwc->idx to -1
on swap to tell follow up group scheduling to find a new index.
Of course if the transaction fails rolling this back will be difficult, but
that is not different than how the current code works. :-) And I wasn't sure
how much effort to cleanup the code I should do for a platform that is almost
10 years old by now.
Hence the lazy fix.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1391024270-19469-1-git-send-email-dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-01-29 14:37:50 -05:00
/*
* Perf does test runs to see if a whole group can be assigned
2018-12-03 10:47:34 +01:00
* together successfully . There can be multiple rounds of this .
perf/x86/p4: Fix counter corruption when using lots of perf groups
On a P4 box stressing perf with:
./perf record -o perf.data ./perf stat -v ./perf bench all
it was noticed that a slew of unknown NMIs would pop out rather quickly.
Painfully debugging this ancient platform, led me to notice cross cpu counter
corruption.
The P4 machine is special in that it has 18 counters, half are used for cpu0
and the other half is for cpu1 (or all 18 if hyperthreading is disabled). But
the splitting of the counters has to be actively managed by the software.
In this particular bug, one of the cpu0 specific counters was being used by
cpu1 and caused all sorts of random unknown nmis.
I am not entirely sure on the corruption path, but what happens is:
o perf schedules a group with p4_pmu_schedule_events()
o inside p4_pmu_schedule_events(), it notices an hwc pointer is being reused
but for a different cpu, so it 'swaps' the config bits and returns the
updated 'assign' array with a _new_ index.
o perf schedules another group with p4_pmu_schedule_events()
o inside p4_pmu_schedule_events(), it notices an hwc pointer is being reused
(the same one as above) but for the _same_ cpu [BUG!!], so it updates the
'assign' array to use the _old_ (wrong cpu) index because the _new_ index is in
an earlier part of the 'assign' array (and hasn't been committed yet).
o perf commits the transaction using the wrong index and corrupts the other cpu
The [BUG!!] is because the 'hwc->config' is updated but not the 'hwc->idx'. So
the check for 'p4_should_swap_ts()' is correct the first time around but
incorrect the second time around (because hwc->config was updated in between).
I think the spirit of perf was to not modify anything until all the
transactions had a chance to 'test' if they would succeed, and if so, commit
atomically. However, P4 breaks this spirit by touching the hwc->config
element.
So my fix is to continue the un-perf like breakage, by assigning hwc->idx to -1
on swap to tell follow up group scheduling to find a new index.
Of course if the transaction fails rolling this back will be difficult, but
that is not different than how the current code works. :-) And I wasn't sure
how much effort to cleanup the code I should do for a platform that is almost
10 years old by now.
Hence the lazy fix.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1391024270-19469-1-git-send-email-dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-01-29 14:37:50 -05:00
* Unfortunately , p4_pmu_swap_config_ts touches the hwc - > config
* bits , such that the next round of group assignments will
* cause the above p4_should_swap_ts to pass instead of fail .
* This leads to counters exclusive to thread0 being used by
* thread1 .
*
* Solve this with a cheap hack , reset the idx back to - 1 to
* force a new lookup ( p4_next_cntr ) to get the right counter
* for the right thread .
*
* This probably doesn ' t comply with the general spirit of how
* perf wants to work , but P4 is special . : - (
*/
if ( p4_should_swap_ts ( hwc - > config , cpu ) )
hwc - > idx = - 1 ;
2010-03-11 19:54:39 +03:00
p4_pmu_swap_config_ts ( hwc , cpu ) ;
2010-03-24 12:09:26 +08:00
if ( assign )
assign [ i ] = cntr_idx ;
2010-03-11 19:54:39 +03:00
reserve :
2010-03-24 12:09:26 +08:00
set_bit ( cntr_idx , used_mask ) ;
2010-03-11 19:54:39 +03:00
set_bit ( escr_idx , escr_mask ) ;
}
done :
2011-11-09 17:56:37 +01:00
return num ? - EINVAL : 0 ;
2010-03-11 19:54:39 +03:00
}
2012-03-27 16:50:42 +02:00
PMU_FORMAT_ATTR ( cccr , " config:0-31 " ) ;
PMU_FORMAT_ATTR ( escr , " config:32-62 " ) ;
PMU_FORMAT_ATTR ( ht , " config:63 " ) ;
static struct attribute * intel_p4_formats_attr [ ] = {
& format_attr_cccr . attr ,
& format_attr_escr . attr ,
& format_attr_ht . attr ,
NULL ,
} ;
2010-03-29 13:09:53 +02:00
static __initconst const struct x86_pmu p4_pmu = {
2010-03-11 19:54:39 +03:00
. name = " Netburst P4/Xeon " ,
. handle_irq = p4_pmu_handle_irq ,
. disable_all = p4_pmu_disable_all ,
. enable_all = p4_pmu_enable_all ,
. enable = p4_pmu_enable_event ,
. disable = p4_pmu_disable_event ,
2022-05-20 15:38:43 +02:00
. set_period = p4_pmu_set_period ,
2010-03-11 19:54:39 +03:00
. eventsel = MSR_P4_BPU_CCCR0 ,
. perfctr = MSR_P4_BPU_PERFCTR0 ,
. event_map = p4_pmu_event_map ,
2010-03-24 12:09:26 +08:00
. max_events = ARRAY_SIZE ( p4_general_events ) ,
2010-03-11 19:54:39 +03:00
. get_event_constraints = x86_get_event_constraints ,
/*
* IF HT disabled we may need to use all
2021-03-21 22:28:53 +01:00
* ARCH_P4_MAX_CCCR counters simultaneously
2010-03-11 19:54:39 +03:00
* though leave it restricted at moment assuming
* HT is on
*/
2010-03-29 18:36:50 +02:00
. num_counters = ARCH_P4_MAX_CCCR ,
2010-03-11 19:54:39 +03:00
. apic = 1 ,
2011-01-07 21:42:06 +03:00
. cntval_bits = ARCH_P4_CNTRVAL_BITS ,
. cntval_mask = ARCH_P4_CNTRVAL_MASK ,
. max_period = ( 1ULL < < ( ARCH_P4_CNTRVAL_BITS - 1 ) ) - 1 ,
2010-03-11 19:54:39 +03:00
. hw_config = p4_hw_config ,
. schedule_events = p4_pmu_schedule_events ,
2012-03-27 16:50:42 +02:00
. format_attrs = intel_p4_formats_attr ,
2010-03-11 19:54:39 +03:00
} ;
2011-08-30 20:41:05 -03:00
__init int p4_pmu_init ( void )
2010-03-11 19:54:39 +03:00
{
unsigned int low , high ;
perf/x86/p4: Block PMIs on init to prevent a stream of unkown NMIs
A bunch of unknown NMIs have popped up on a Pentium4 recently when booting
into a kdump kernel. This was exposed because the watchdog timer went
from 60 seconds down to 10 seconds (increasing the ability to reproduce
this problem).
What is happening is on boot up of the second kernel (the kdump one),
the previous nmi_watchdogs were enabled on thread 0 and thread 1. The
second kernel only initializes one cpu but the perf counter on thread 1
still counts.
Normally in a kdump scenario, the other cpus are blocking in an NMI loop,
but more importantly their local apics have the performance counters disabled
(iow LVTPC is masked). So any counters that fire are masked and never get
through to the second kernel.
However, on a P4 the local apic is shared by both threads and thread1's PMI
(despite being configured to only interrupt thread1) will generate an NMI on
thread0. Because thread0 knows nothing about this NMI, it is seen as an
unknown NMI.
This would be fine because it is a kdump kernel, strange things happen
what is the big deal about a single unknown NMI.
Unfortunately, the P4 comes with another quirk: clearing the overflow bit
to prevent a stream of NMIs. This is the problem.
The kdump kernel can not execute because of the endless NMIs that happen.
To solve this, I instrumented the p4 perf init code, to walk all the counters
and zero them out (just like a normal reset would).
Now when the counters go off, they do not generate anything and no unknown
NMIs are seen.
I tested this on a P4 we have in our lab. After two or three crashes, I could
normally reproduce the problem. Now after 10 crashes, everything continues
to boot correctly.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140120154115.GZ25953@redhat.com
[ Fixed a stylistic detail. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-02-09 13:20:18 +01:00
int i , reg ;
2010-03-11 19:54:39 +03:00
2011-04-22 10:08:52 -07:00
/* If we get stripped -- indexing fails */
2012-06-20 20:46:33 +02:00
BUILD_BUG_ON ( ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC ) ;
2010-03-11 19:54:39 +03:00
rdmsr ( MSR_IA32_MISC_ENABLE , low , high ) ;
if ( ! ( low & ( 1 < < 7 ) ) ) {
pr_cont ( " unsupported Netburst CPU model %d " ,
boot_cpu_data . x86_model ) ;
return - ENODEV ;
}
2010-03-18 18:33:12 +08:00
memcpy ( hw_cache_event_ids , p4_hw_cache_event_ids ,
2010-03-24 12:09:26 +08:00
sizeof ( hw_cache_event_ids ) ) ;
2010-03-18 18:33:12 +08:00
2010-03-11 19:54:39 +03:00
pr_cont ( " Netburst events, " ) ;
x86_pmu = p4_pmu ;
perf/x86/p4: Block PMIs on init to prevent a stream of unkown NMIs
A bunch of unknown NMIs have popped up on a Pentium4 recently when booting
into a kdump kernel. This was exposed because the watchdog timer went
from 60 seconds down to 10 seconds (increasing the ability to reproduce
this problem).
What is happening is on boot up of the second kernel (the kdump one),
the previous nmi_watchdogs were enabled on thread 0 and thread 1. The
second kernel only initializes one cpu but the perf counter on thread 1
still counts.
Normally in a kdump scenario, the other cpus are blocking in an NMI loop,
but more importantly their local apics have the performance counters disabled
(iow LVTPC is masked). So any counters that fire are masked and never get
through to the second kernel.
However, on a P4 the local apic is shared by both threads and thread1's PMI
(despite being configured to only interrupt thread1) will generate an NMI on
thread0. Because thread0 knows nothing about this NMI, it is seen as an
unknown NMI.
This would be fine because it is a kdump kernel, strange things happen
what is the big deal about a single unknown NMI.
Unfortunately, the P4 comes with another quirk: clearing the overflow bit
to prevent a stream of NMIs. This is the problem.
The kdump kernel can not execute because of the endless NMIs that happen.
To solve this, I instrumented the p4 perf init code, to walk all the counters
and zero them out (just like a normal reset would).
Now when the counters go off, they do not generate anything and no unknown
NMIs are seen.
I tested this on a P4 we have in our lab. After two or three crashes, I could
normally reproduce the problem. Now after 10 crashes, everything continues
to boot correctly.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140120154115.GZ25953@redhat.com
[ Fixed a stylistic detail. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-02-09 13:20:18 +01:00
/*
* Even though the counters are configured to interrupt a particular
* logical processor when an overflow happens , testing has shown that
* on kdump kernels ( which uses a single cpu ) , thread1 ' s counter
* continues to run and will report an NMI on thread0 . Due to the
* overflow bug , this leads to a stream of unknown NMIs .
*
* Solve this by zero ' ing out the registers to mimic a reset .
*/
for ( i = 0 ; i < x86_pmu . num_counters ; i + + ) {
reg = x86_pmu_config_addr ( i ) ;
wrmsrl_safe ( reg , 0ULL ) ;
}
2010-03-11 19:54:39 +03:00
return 0 ;
}