Merge branches 'perf/powerpc' and 'perf/bench' into perf/core
Merge reason: Both 'perf bench' and the pending PowerPC changes are now ready for the next merge window. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
commit
0ffa798d94
@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
|
||||
|
||||
config HCALL_STATS
|
||||
bool "Hypervisor call instrumentation"
|
||||
depends on PPC_PSERIES && DEBUG_FS
|
||||
depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
|
||||
help
|
||||
Adds code to keep track of the number of hypervisor calls made and
|
||||
the amount of time spent in hypervisor calls. Wall time spent in
|
||||
|
@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
|
||||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_STACK_USAGE is not set
|
||||
# CONFIG_DEBUG_PAGEALLOC is not set
|
||||
CONFIG_HCALL_STATS=y
|
||||
# CONFIG_HCALL_STATS is not set
|
||||
# CONFIG_CODE_PATCHING_SELFTEST is not set
|
||||
# CONFIG_FTR_FIXUP_SELFTEST is not set
|
||||
# CONFIG_MSI_BITMAP_SELFTEST is not set
|
||||
|
@ -19,6 +19,7 @@
|
||||
#define _ASM_POWERPC_EMULATED_OPS_H
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_PPC_EMULATED_STATS
|
||||
@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated;
|
||||
|
||||
extern void ppc_warn_emulated_print(const char *type);
|
||||
|
||||
#define PPC_WARN_EMULATED(type) \
|
||||
#define __PPC_WARN_EMULATED(type) \
|
||||
do { \
|
||||
atomic_inc(&ppc_emulated.type.val); \
|
||||
if (ppc_warn_emulated) \
|
||||
@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type);
|
||||
|
||||
#else /* !CONFIG_PPC_EMULATED_STATS */
|
||||
|
||||
#define PPC_WARN_EMULATED(type) do { } while (0)
|
||||
#define __PPC_WARN_EMULATED(type) do { } while (0)
|
||||
|
||||
#endif /* !CONFIG_PPC_EMULATED_STATS */
|
||||
|
||||
#define PPC_WARN_EMULATED(type, regs) \
|
||||
do { \
|
||||
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
|
||||
1, 0, regs, 0); \
|
||||
__PPC_WARN_EMULATED(type); \
|
||||
} while (0)
|
||||
|
||||
#define PPC_WARN_ALIGNMENT(type, regs) \
|
||||
do { \
|
||||
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
|
||||
1, 0, regs, regs->dar); \
|
||||
__PPC_WARN_EMULATED(type); \
|
||||
} while (0)
|
||||
|
||||
#endif /* _ASM_POWERPC_EMULATED_OPS_H */
|
||||
|
@ -274,6 +274,8 @@ struct hcall_stats {
|
||||
unsigned long num_calls; /* number of calls (on this CPU) */
|
||||
unsigned long tb_total; /* total wall time (mftb) of calls. */
|
||||
unsigned long purr_total; /* total cpu time (PURR) of calls. */
|
||||
unsigned long tb_start;
|
||||
unsigned long purr_start;
|
||||
};
|
||||
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
|
||||
|
||||
|
@ -489,6 +489,8 @@
|
||||
#define SPRN_MMCR1 798
|
||||
#define SPRN_MMCRA 0x312
|
||||
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
|
||||
#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
|
||||
#define MMCRA_SDAR_ERAT_MISS 0x20000000UL
|
||||
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
|
||||
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
|
||||
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
|
||||
|
133
arch/powerpc/include/asm/trace.h
Normal file
133
arch/powerpc/include/asm/trace.h
Normal file
@ -0,0 +1,133 @@
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM powerpc
|
||||
|
||||
#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_POWERPC_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
struct pt_regs;
|
||||
|
||||
TRACE_EVENT(irq_entry,
|
||||
|
||||
TP_PROTO(struct pt_regs *regs),
|
||||
|
||||
TP_ARGS(regs),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct pt_regs *, regs)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->regs = regs;
|
||||
),
|
||||
|
||||
TP_printk("pt_regs=%p", __entry->regs)
|
||||
);
|
||||
|
||||
TRACE_EVENT(irq_exit,
|
||||
|
||||
TP_PROTO(struct pt_regs *regs),
|
||||
|
||||
TP_ARGS(regs),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct pt_regs *, regs)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->regs = regs;
|
||||
),
|
||||
|
||||
TP_printk("pt_regs=%p", __entry->regs)
|
||||
);
|
||||
|
||||
TRACE_EVENT(timer_interrupt_entry,
|
||||
|
||||
TP_PROTO(struct pt_regs *regs),
|
||||
|
||||
TP_ARGS(regs),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct pt_regs *, regs)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->regs = regs;
|
||||
),
|
||||
|
||||
TP_printk("pt_regs=%p", __entry->regs)
|
||||
);
|
||||
|
||||
TRACE_EVENT(timer_interrupt_exit,
|
||||
|
||||
TP_PROTO(struct pt_regs *regs),
|
||||
|
||||
TP_ARGS(regs),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct pt_regs *, regs)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->regs = regs;
|
||||
),
|
||||
|
||||
TP_printk("pt_regs=%p", __entry->regs)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_PPC_PSERIES
|
||||
extern void hcall_tracepoint_regfunc(void);
|
||||
extern void hcall_tracepoint_unregfunc(void);
|
||||
|
||||
TRACE_EVENT_FN(hcall_entry,
|
||||
|
||||
TP_PROTO(unsigned long opcode, unsigned long *args),
|
||||
|
||||
TP_ARGS(opcode, args),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, opcode)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->opcode = opcode;
|
||||
),
|
||||
|
||||
TP_printk("opcode=%lu", __entry->opcode),
|
||||
|
||||
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
|
||||
);
|
||||
|
||||
TRACE_EVENT_FN(hcall_exit,
|
||||
|
||||
TP_PROTO(unsigned long opcode, unsigned long retval,
|
||||
unsigned long *retbuf),
|
||||
|
||||
TP_ARGS(opcode, retval, retbuf),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, opcode)
|
||||
__field(unsigned long, retval)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->opcode = opcode;
|
||||
__entry->retval = retval;
|
||||
),
|
||||
|
||||
TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
|
||||
|
||||
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
|
||||
);
|
||||
#endif
|
||||
|
||||
#endif /* _TRACE_POWERPC_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
|
||||
#define TRACE_INCLUDE_PATH asm
|
||||
#define TRACE_INCLUDE_FILE trace
|
||||
|
||||
#include <trace/define_trace.h>
|
@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
|
||||
|
||||
#ifdef CONFIG_SPE
|
||||
if ((instr >> 26) == 0x4) {
|
||||
PPC_WARN_EMULATED(spe);
|
||||
PPC_WARN_ALIGNMENT(spe, regs);
|
||||
return emulate_spe(regs, reg, instr);
|
||||
}
|
||||
#endif
|
||||
@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
|
||||
flags |= SPLT;
|
||||
nb = 8;
|
||||
}
|
||||
PPC_WARN_EMULATED(vsx);
|
||||
PPC_WARN_ALIGNMENT(vsx, regs);
|
||||
return emulate_vsx(addr, reg, areg, regs, flags, nb);
|
||||
}
|
||||
#endif
|
||||
@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
|
||||
* the exception of DCBZ which is handled as a special case here
|
||||
*/
|
||||
if (instr == DCBZ) {
|
||||
PPC_WARN_EMULATED(dcbz);
|
||||
PPC_WARN_ALIGNMENT(dcbz, regs);
|
||||
return emulate_dcbz(regs, addr);
|
||||
}
|
||||
if (unlikely(nb == 0))
|
||||
@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
|
||||
* function
|
||||
*/
|
||||
if (flags & M) {
|
||||
PPC_WARN_EMULATED(multiple);
|
||||
PPC_WARN_ALIGNMENT(multiple, regs);
|
||||
return emulate_multiple(regs, addr, reg, nb,
|
||||
flags, instr, swiz);
|
||||
}
|
||||
@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
|
||||
|
||||
/* Special case for 16-byte FP loads and stores */
|
||||
if (nb == 16) {
|
||||
PPC_WARN_EMULATED(fp_pair);
|
||||
PPC_WARN_ALIGNMENT(fp_pair, regs);
|
||||
return emulate_fp_pair(addr, reg, flags);
|
||||
}
|
||||
|
||||
PPC_WARN_EMULATED(unaligned);
|
||||
PPC_WARN_ALIGNMENT(unaligned, regs);
|
||||
|
||||
/* If we are loading, get the data from user space, else
|
||||
* get it from register values
|
||||
|
@ -551,7 +551,7 @@ restore:
|
||||
BEGIN_FW_FTR_SECTION
|
||||
ld r5,SOFTE(r1)
|
||||
FW_FTR_SECTION_ELSE
|
||||
b iseries_check_pending_irqs
|
||||
b .Liseries_check_pending_irqs
|
||||
ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
|
||||
2:
|
||||
TRACE_AND_RESTORE_IRQ(r5);
|
||||
@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3E */
|
||||
|
||||
iseries_check_pending_irqs:
|
||||
.Liseries_check_pending_irqs:
|
||||
#ifdef CONFIG_PPC_ISERIES
|
||||
ld r5,SOFTE(r1)
|
||||
cmpdi 0,r5,0
|
||||
|
@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
|
||||
* prolog code of the PerformanceMonitor one. A little
|
||||
* trickery is thus necessary
|
||||
*/
|
||||
performance_monitor_pSeries_1:
|
||||
. = 0xf00
|
||||
b performance_monitor_pSeries
|
||||
|
||||
altivec_unavailable_pSeries_1:
|
||||
. = 0xf20
|
||||
b altivec_unavailable_pSeries
|
||||
|
||||
vsx_unavailable_pSeries_1:
|
||||
. = 0xf40
|
||||
b vsx_unavailable_pSeries
|
||||
|
||||
|
@ -70,6 +70,8 @@
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/lv1call.h>
|
||||
#endif
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace.h>
|
||||
|
||||
int __irq_offset_value;
|
||||
static int ppc_spurious_interrupts;
|
||||
@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
unsigned int irq;
|
||||
|
||||
trace_irq_entry(regs);
|
||||
|
||||
irq_enter();
|
||||
|
||||
check_stack_overflow();
|
||||
@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
|
||||
timer_interrupt(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
trace_irq_exit(regs);
|
||||
}
|
||||
|
||||
void __init init_IRQ(void)
|
||||
|
@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
*/
|
||||
if (record) {
|
||||
struct perf_sample_data data = {
|
||||
.addr = 0,
|
||||
.addr = ~0ULL,
|
||||
.period = event->hw.last_period,
|
||||
};
|
||||
|
||||
|
@ -72,10 +72,6 @@
|
||||
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
|
||||
#define MMCR1_PMCSEL_MSK 0x7f
|
||||
|
||||
/*
|
||||
* Bits in MMCRA
|
||||
*/
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
* 6666555555555544444444443333333333222222222211111111110000000000
|
||||
|
@ -72,10 +72,6 @@
|
||||
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
|
||||
#define MMCR1_PMCSEL_MSK 0x7f
|
||||
|
||||
/*
|
||||
* Bits in MMCRA
|
||||
*/
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
* 6666555555555544444444443333333333222222222211111111110000000000
|
||||
@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
|
||||
unsigned int hwc[], unsigned long mmcr[])
|
||||
{
|
||||
unsigned long mmcr1 = 0;
|
||||
unsigned long mmcra = 0;
|
||||
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
|
||||
unsigned int pmc, unit, byte, psel;
|
||||
unsigned int ttm, grp;
|
||||
int i, isbus, bit, grsel;
|
||||
|
@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
|
||||
unsigned int hwc[], unsigned long mmcr[])
|
||||
{
|
||||
unsigned long mmcr1 = 0;
|
||||
unsigned long mmcra = 0;
|
||||
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
|
||||
int i;
|
||||
unsigned int pmc, ev, b, u, s, psel;
|
||||
unsigned int ttmset = 0;
|
||||
|
@ -50,10 +50,6 @@
|
||||
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
|
||||
#define MMCR1_PMCSEL_MSK 0xff
|
||||
|
||||
/*
|
||||
* Bits in MMCRA
|
||||
*/
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
* 6666555555555544444444443333333333222222222211111111110000000000
|
||||
@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
|
||||
unsigned int hwc[], unsigned long mmcr[])
|
||||
{
|
||||
unsigned long mmcr1 = 0;
|
||||
unsigned long mmcra = 0;
|
||||
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
|
||||
unsigned int pmc, unit, combine, l2sel, psel;
|
||||
unsigned int pmc_inuse = 0;
|
||||
int i;
|
||||
|
@ -83,10 +83,6 @@ static short mmcr1_adder_bits[8] = {
|
||||
MMCR1_PMC8_ADDER_SEL_SH
|
||||
};
|
||||
|
||||
/*
|
||||
* Bits in MMCRA
|
||||
*/
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
* 6666555555555544444444443333333333222222222211111111110000000000
|
||||
|
@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *powerpc_debugfs_root;
|
||||
EXPORT_SYMBOL(powerpc_debugfs_root);
|
||||
|
||||
static int powerpc_debugfs_init(void)
|
||||
{
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include <linux/irq.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/trace.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/processor.h>
|
||||
@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs)
|
||||
struct clock_event_device *evt = &decrementer->event;
|
||||
u64 now;
|
||||
|
||||
trace_timer_interrupt_entry(regs);
|
||||
|
||||
/* Ensure a positive value is written to the decrementer, or else
|
||||
* some CPUs will continuue to take decrementer exceptions */
|
||||
set_dec(DECREMENTER_MAX);
|
||||
@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs)
|
||||
now = decrementer->next_tb - now;
|
||||
if (now <= DECREMENTER_MAX)
|
||||
set_dec((int)now);
|
||||
trace_timer_interrupt_exit(regs);
|
||||
return;
|
||||
}
|
||||
old_regs = set_irq_regs(regs);
|
||||
@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs)
|
||||
|
||||
irq_exit();
|
||||
set_irq_regs(old_regs);
|
||||
|
||||
trace_timer_interrupt_exit(regs);
|
||||
}
|
||||
|
||||
void wakeup_decrementer(void)
|
||||
|
@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs)
|
||||
|
||||
/* Emulate the mfspr rD, PVR. */
|
||||
if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
|
||||
PPC_WARN_EMULATED(mfpvr);
|
||||
PPC_WARN_EMULATED(mfpvr, regs);
|
||||
rd = (instword >> 21) & 0x1f;
|
||||
regs->gpr[rd] = mfspr(SPRN_PVR);
|
||||
return 0;
|
||||
@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs)
|
||||
|
||||
/* Emulating the dcba insn is just a no-op. */
|
||||
if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
|
||||
PPC_WARN_EMULATED(dcba);
|
||||
PPC_WARN_EMULATED(dcba, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs)
|
||||
int shift = (instword >> 21) & 0x1c;
|
||||
unsigned long msk = 0xf0000000UL >> shift;
|
||||
|
||||
PPC_WARN_EMULATED(mcrxr);
|
||||
PPC_WARN_EMULATED(mcrxr, regs);
|
||||
regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
|
||||
regs->xer &= ~0xf0000000UL;
|
||||
return 0;
|
||||
@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs)
|
||||
|
||||
/* Emulate load/store string insn. */
|
||||
if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
|
||||
PPC_WARN_EMULATED(string);
|
||||
PPC_WARN_EMULATED(string, regs);
|
||||
return emulate_string_inst(regs, instword);
|
||||
}
|
||||
|
||||
/* Emulate the popcntb (Population Count Bytes) instruction. */
|
||||
if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
|
||||
PPC_WARN_EMULATED(popcntb);
|
||||
PPC_WARN_EMULATED(popcntb, regs);
|
||||
return emulate_popcntb_inst(regs, instword);
|
||||
}
|
||||
|
||||
/* Emulate isel (Integer Select) instruction */
|
||||
if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
|
||||
PPC_WARN_EMULATED(isel);
|
||||
PPC_WARN_EMULATED(isel, regs);
|
||||
return emulate_isel(regs, instword);
|
||||
}
|
||||
|
||||
@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs)
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
errcode = do_mathemu(regs);
|
||||
if (errcode >= 0)
|
||||
PPC_WARN_EMULATED(math);
|
||||
PPC_WARN_EMULATED(math, regs);
|
||||
|
||||
switch (errcode) {
|
||||
case 0:
|
||||
@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs)
|
||||
#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
|
||||
errcode = Soft_emulate_8xx(regs);
|
||||
if (errcode >= 0)
|
||||
PPC_WARN_EMULATED(8xx);
|
||||
PPC_WARN_EMULATED(8xx, regs);
|
||||
|
||||
switch (errcode) {
|
||||
case 0:
|
||||
@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
|
||||
|
||||
flush_altivec_to_thread(current);
|
||||
|
||||
PPC_WARN_EMULATED(altivec);
|
||||
PPC_WARN_EMULATED(altivec, regs);
|
||||
err = emulate_altivec(regs);
|
||||
if (err == 0) {
|
||||
regs->nip += 4; /* skip emulated instruction */
|
||||
|
@ -26,11 +26,11 @@ BEGIN_FTR_SECTION
|
||||
srd r8,r5,r11
|
||||
|
||||
mtctr r8
|
||||
setup:
|
||||
.Lsetup:
|
||||
dcbt r9,r4
|
||||
dcbz r9,r3
|
||||
add r9,r9,r12
|
||||
bdnz setup
|
||||
bdnz .Lsetup
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
|
||||
addi r3,r3,-8
|
||||
srdi r8,r5,7 /* page is copied in 128 byte strides */
|
||||
|
@ -14,68 +14,94 @@
|
||||
|
||||
#define STK_PARM(i) (48 + ((i)-3)*8)
|
||||
|
||||
#ifdef CONFIG_HCALL_STATS
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
|
||||
.section ".toc","aw"
|
||||
|
||||
.globl hcall_tracepoint_refcount
|
||||
hcall_tracepoint_refcount:
|
||||
.llong 0
|
||||
|
||||
.section ".text"
|
||||
|
||||
/*
|
||||
* precall must preserve all registers. use unused STK_PARM()
|
||||
* areas to save snapshots and opcode.
|
||||
* areas to save snapshots and opcode. We branch around this
|
||||
* in early init (eg when populating the MMU hashtable) by using an
|
||||
* unconditional cpu feature.
|
||||
*/
|
||||
#define HCALL_INST_PRECALL \
|
||||
std r3,STK_PARM(r3)(r1); /* save opcode */ \
|
||||
mftb r0; /* get timebase and */ \
|
||||
std r0,STK_PARM(r5)(r1); /* save for later */ \
|
||||
#define HCALL_INST_PRECALL(FIRST_REG) \
|
||||
BEGIN_FTR_SECTION; \
|
||||
mfspr r0,SPRN_PURR; /* get PURR and */ \
|
||||
std r0,STK_PARM(r6)(r1); /* save for later */ \
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_PURR);
|
||||
|
||||
b 1f; \
|
||||
END_FTR_SECTION(0, 1); \
|
||||
ld r12,hcall_tracepoint_refcount@toc(r2); \
|
||||
cmpdi r12,0; \
|
||||
beq+ 1f; \
|
||||
mflr r0; \
|
||||
std r3,STK_PARM(r3)(r1); \
|
||||
std r4,STK_PARM(r4)(r1); \
|
||||
std r5,STK_PARM(r5)(r1); \
|
||||
std r6,STK_PARM(r6)(r1); \
|
||||
std r7,STK_PARM(r7)(r1); \
|
||||
std r8,STK_PARM(r8)(r1); \
|
||||
std r9,STK_PARM(r9)(r1); \
|
||||
std r10,STK_PARM(r10)(r1); \
|
||||
std r0,16(r1); \
|
||||
addi r4,r1,STK_PARM(FIRST_REG); \
|
||||
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
|
||||
bl .__trace_hcall_entry; \
|
||||
addi r1,r1,STACK_FRAME_OVERHEAD; \
|
||||
ld r0,16(r1); \
|
||||
ld r3,STK_PARM(r3)(r1); \
|
||||
ld r4,STK_PARM(r4)(r1); \
|
||||
ld r5,STK_PARM(r5)(r1); \
|
||||
ld r6,STK_PARM(r6)(r1); \
|
||||
ld r7,STK_PARM(r7)(r1); \
|
||||
ld r8,STK_PARM(r8)(r1); \
|
||||
ld r9,STK_PARM(r9)(r1); \
|
||||
ld r10,STK_PARM(r10)(r1); \
|
||||
mtlr r0; \
|
||||
1:
|
||||
|
||||
/*
|
||||
* postcall is performed immediately before function return which
|
||||
* allows liberal use of volatile registers. We branch around this
|
||||
* in early init (eg when populating the MMU hashtable) by using an
|
||||
* unconditional cpu feature.
|
||||
*/
|
||||
#define HCALL_INST_POSTCALL \
|
||||
#define __HCALL_INST_POSTCALL \
|
||||
BEGIN_FTR_SECTION; \
|
||||
b 1f; \
|
||||
END_FTR_SECTION(0, 1); \
|
||||
ld r4,STK_PARM(r3)(r1); /* validate opcode */ \
|
||||
cmpldi cr7,r4,MAX_HCALL_OPCODE; \
|
||||
bgt- cr7,1f; \
|
||||
\
|
||||
/* get time and PURR snapshots after hcall */ \
|
||||
mftb r7; /* timebase after */ \
|
||||
BEGIN_FTR_SECTION; \
|
||||
mfspr r8,SPRN_PURR; /* PURR after */ \
|
||||
ld r6,STK_PARM(r6)(r1); /* PURR before */ \
|
||||
subf r6,r6,r8; /* delta */ \
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
|
||||
ld r5,STK_PARM(r5)(r1); /* timebase before */ \
|
||||
subf r5,r5,r7; /* time delta */ \
|
||||
\
|
||||
/* calculate address of stat structure r4 = opcode */ \
|
||||
srdi r4,r4,2; /* index into array */ \
|
||||
mulli r4,r4,HCALL_STAT_SIZE; \
|
||||
LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
|
||||
add r4,r4,r7; \
|
||||
ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
|
||||
add r4,r4,r7; \
|
||||
\
|
||||
/* update stats */ \
|
||||
ld r7,HCALL_STAT_CALLS(r4); /* count */ \
|
||||
addi r7,r7,1; \
|
||||
std r7,HCALL_STAT_CALLS(r4); \
|
||||
ld r7,HCALL_STAT_TB(r4); /* timebase */ \
|
||||
add r7,r7,r5; \
|
||||
std r7,HCALL_STAT_TB(r4); \
|
||||
BEGIN_FTR_SECTION; \
|
||||
ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
|
||||
add r7,r7,r6; \
|
||||
std r7,HCALL_STAT_PURR(r4); \
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
|
||||
ld r12,hcall_tracepoint_refcount@toc(r2); \
|
||||
cmpdi r12,0; \
|
||||
beq+ 1f; \
|
||||
mflr r0; \
|
||||
ld r6,STK_PARM(r3)(r1); \
|
||||
std r3,STK_PARM(r3)(r1); \
|
||||
mr r4,r3; \
|
||||
mr r3,r6; \
|
||||
std r0,16(r1); \
|
||||
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
|
||||
bl .__trace_hcall_exit; \
|
||||
addi r1,r1,STACK_FRAME_OVERHEAD; \
|
||||
ld r0,16(r1); \
|
||||
ld r3,STK_PARM(r3)(r1); \
|
||||
mtlr r0; \
|
||||
1:
|
||||
|
||||
#define HCALL_INST_POSTCALL_NORETS \
|
||||
li r5,0; \
|
||||
__HCALL_INST_POSTCALL
|
||||
|
||||
#define HCALL_INST_POSTCALL(BUFREG) \
|
||||
mr r5,BUFREG; \
|
||||
__HCALL_INST_POSTCALL
|
||||
|
||||
#else
|
||||
#define HCALL_INST_PRECALL
|
||||
#define HCALL_INST_POSTCALL
|
||||
#define HCALL_INST_PRECALL(FIRST_ARG)
|
||||
#define HCALL_INST_POSTCALL_NORETS
|
||||
#define HCALL_INST_POSTCALL(BUFREG)
|
||||
#endif
|
||||
|
||||
.text
|
||||
@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
|
||||
mfcr r0
|
||||
stw r0,8(r1)
|
||||
|
||||
HCALL_INST_PRECALL
|
||||
HCALL_INST_PRECALL(r4)
|
||||
|
||||
HVSC /* invoke the hypervisor */
|
||||
|
||||
HCALL_INST_POSTCALL
|
||||
HCALL_INST_POSTCALL_NORETS
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
|
||||
mfcr r0
|
||||
stw r0,8(r1)
|
||||
|
||||
HCALL_INST_PRECALL
|
||||
HCALL_INST_PRECALL(r5)
|
||||
|
||||
std r4,STK_PARM(r4)(r1) /* Save ret buffer */
|
||||
|
||||
@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
|
||||
std r6, 16(r12)
|
||||
std r7, 24(r12)
|
||||
|
||||
HCALL_INST_POSTCALL
|
||||
HCALL_INST_POSTCALL(r12)
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
|
||||
mfcr r0
|
||||
stw r0,8(r1)
|
||||
|
||||
HCALL_INST_PRECALL
|
||||
HCALL_INST_PRECALL(r5)
|
||||
|
||||
std r4,STK_PARM(r4)(r1) /* Save ret buffer */
|
||||
|
||||
@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
|
||||
std r11,56(r12)
|
||||
std r0, 64(r12)
|
||||
|
||||
HCALL_INST_POSTCALL
|
||||
HCALL_INST_POSTCALL(r12)
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <asm/hvcall.h>
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/trace.h>
|
||||
|
||||
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
|
||||
|
||||
@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
|
||||
#define HCALL_ROOT_DIR "hcall_inst"
|
||||
#define CPU_NAME_BUF_SIZE 32
|
||||
|
||||
|
||||
static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
|
||||
{
|
||||
struct hcall_stats *h;
|
||||
|
||||
if (opcode > MAX_HCALL_OPCODE)
|
||||
return;
|
||||
|
||||
h = &get_cpu_var(hcall_stats)[opcode / 4];
|
||||
h->tb_start = mftb();
|
||||
h->purr_start = mfspr(SPRN_PURR);
|
||||
}
|
||||
|
||||
static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
|
||||
unsigned long *retbuf)
|
||||
{
|
||||
struct hcall_stats *h;
|
||||
|
||||
if (opcode > MAX_HCALL_OPCODE)
|
||||
return;
|
||||
|
||||
h = &__get_cpu_var(hcall_stats)[opcode / 4];
|
||||
h->num_calls++;
|
||||
h->tb_total = mftb() - h->tb_start;
|
||||
h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
|
||||
|
||||
put_cpu_var(hcall_stats);
|
||||
}
|
||||
|
||||
static int __init hcall_inst_init(void)
|
||||
{
|
||||
struct dentry *hcall_root;
|
||||
@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
|
||||
if (!firmware_has_feature(FW_FEATURE_LPAR))
|
||||
return 0;
|
||||
|
||||
if (register_trace_hcall_entry(probe_hcall_entry))
|
||||
return -EINVAL;
|
||||
|
||||
if (register_trace_hcall_exit(probe_hcall_exit)) {
|
||||
unregister_trace_hcall_entry(probe_hcall_entry);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
|
||||
if (!hcall_root)
|
||||
return -ENOMEM;
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/udbg.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/trace.h>
|
||||
|
||||
#include "plpar_wrappers.h"
|
||||
#include "pseries.h"
|
||||
@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
|
||||
EXPORT_SYMBOL(arch_free_page);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
/*
|
||||
* We optimise our hcall path by placing hcall_tracepoint_refcount
|
||||
* directly in the TOC so we can check if the hcall tracepoints are
|
||||
* enabled via a single load.
|
||||
*/
|
||||
|
||||
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
|
||||
extern long hcall_tracepoint_refcount;
|
||||
|
||||
void hcall_tracepoint_regfunc(void)
|
||||
{
|
||||
hcall_tracepoint_refcount++;
|
||||
}
|
||||
|
||||
void hcall_tracepoint_unregfunc(void)
|
||||
{
|
||||
hcall_tracepoint_refcount--;
|
||||
}
|
||||
|
||||
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
|
||||
{
|
||||
trace_hcall_entry(opcode, args);
|
||||
}
|
||||
|
||||
void __trace_hcall_exit(long opcode, unsigned long retval,
|
||||
unsigned long *retbuf)
|
||||
{
|
||||
trace_hcall_exit(opcode, retval, retbuf);
|
||||
}
|
||||
#endif
|
||||
|
@ -106,6 +106,8 @@ enum perf_sw_ids {
|
||||
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
|
||||
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
|
||||
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
|
||||
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
|
||||
PERF_COUNT_SW_EMULATION_FAULTS = 8,
|
||||
|
||||
PERF_COUNT_SW_MAX, /* non-ABI */
|
||||
};
|
||||
|
@ -102,6 +102,8 @@ enum perf_sw_ids {
|
||||
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
|
||||
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
|
||||
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
|
||||
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
|
||||
PERF_COUNT_SW_EMULATION_FAULTS = 8,
|
||||
|
||||
PERF_COUNT_SW_MAX, /* non-ABI */
|
||||
};
|
||||
|
@ -4274,6 +4274,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
|
||||
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
|
||||
case PERF_COUNT_SW_CONTEXT_SWITCHES:
|
||||
case PERF_COUNT_SW_CPU_MIGRATIONS:
|
||||
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
|
||||
case PERF_COUNT_SW_EMULATION_FAULTS:
|
||||
if (!event->parent) {
|
||||
atomic_inc(&perf_swevent_enabled[event_id]);
|
||||
event->destroy = sw_perf_event_destroy;
|
||||
|
120
tools/perf/Documentation/perf-bench.txt
Normal file
120
tools/perf/Documentation/perf-bench.txt
Normal file
@ -0,0 +1,120 @@
|
||||
perf-bench(1)
|
||||
============
|
||||
|
||||
NAME
|
||||
----
|
||||
perf-bench - General framework for benchmark suites
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf bench' [<common options>] <subsystem> <suite> [<options>]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
This 'perf bench' command is general framework for benchmark suites.
|
||||
|
||||
COMMON OPTIONS
|
||||
--------------
|
||||
-f::
|
||||
--format=::
|
||||
Specify format style.
|
||||
Current available format styles are,
|
||||
|
||||
'default'::
|
||||
Default style. This is mainly for human reading.
|
||||
---------------------
|
||||
% perf bench sched pipe # with no style specify
|
||||
(executing 1000000 pipe operations between two tasks)
|
||||
Total time:5.855 sec
|
||||
5.855061 usecs/op
|
||||
170792 ops/sec
|
||||
---------------------
|
||||
|
||||
'simple'::
|
||||
This simple style is friendly for automated
|
||||
processing by scripts.
|
||||
---------------------
|
||||
% perf bench --format=simple sched pipe # specified simple
|
||||
5.988
|
||||
---------------------
|
||||
|
||||
SUBSYSTEM
|
||||
---------
|
||||
|
||||
'sched'::
|
||||
Scheduler and IPC mechanisms.
|
||||
|
||||
SUITES FOR 'sched'
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
*messaging*::
|
||||
Suite for evaluating performance of scheduler and IPC mechanisms.
|
||||
Based on hackbench by Rusty Russell.
|
||||
|
||||
Options of *pipe*
|
||||
^^^^^^^^^^^^^^^^^
|
||||
-p::
|
||||
--pipe::
|
||||
Use pipe() instead of socketpair()
|
||||
|
||||
-t::
|
||||
--thread::
|
||||
Be multi thread instead of multi process
|
||||
|
||||
-g::
|
||||
--group=::
|
||||
Specify number of groups
|
||||
|
||||
-l::
|
||||
--loop=::
|
||||
Specify number of loops
|
||||
|
||||
Example of *messaging*
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
---------------------
|
||||
% perf bench sched messaging # run with default
|
||||
options (20 sender and receiver processes per group)
|
||||
(10 groups == 400 processes run)
|
||||
|
||||
Total time:0.308 sec
|
||||
|
||||
% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups
|
||||
(20 sender and receiver threads per group)
|
||||
(20 groups == 800 threads run)
|
||||
|
||||
Total time:0.582 sec
|
||||
---------------------
|
||||
|
||||
*pipe*::
|
||||
Suite for pipe() system call.
|
||||
Based on pipe-test-1m.c by Ingo Molnar.
|
||||
|
||||
Options of *pipe*
|
||||
^^^^^^^^^^^^^^^^^
|
||||
-l::
|
||||
--loop=::
|
||||
Specify number of loops.
|
||||
|
||||
Example of *pipe*
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
---------------------
|
||||
% perf bench sched pipe
|
||||
(executing 1000000 pipe operations between two tasks)
|
||||
|
||||
Total time:8.091 sec
|
||||
8.091833 usecs/op
|
||||
123581 ops/sec
|
||||
|
||||
% perf bench sched pipe -l 1000 # loop 1000
|
||||
(executing 1000 pipe operations between two tasks)
|
||||
|
||||
Total time:0.016 sec
|
||||
16.948000 usecs/op
|
||||
59004 ops/sec
|
||||
---------------------
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf[1]
|
@ -421,6 +421,13 @@ LIB_OBJS += util/hist.o
|
||||
LIB_OBJS += util/data_map.o
|
||||
|
||||
BUILTIN_OBJS += builtin-annotate.o
|
||||
|
||||
BUILTIN_OBJS += builtin-bench.o
|
||||
|
||||
# Benchmark modules
|
||||
BUILTIN_OBJS += bench/sched-messaging.o
|
||||
BUILTIN_OBJS += bench/sched-pipe.o
|
||||
|
||||
BUILTIN_OBJS += builtin-help.o
|
||||
BUILTIN_OBJS += builtin-sched.o
|
||||
BUILTIN_OBJS += builtin-list.o
|
||||
|
16
tools/perf/bench/bench.h
Normal file
16
tools/perf/bench/bench.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef BENCH_H
|
||||
#define BENCH_H
|
||||
|
||||
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
|
||||
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
|
||||
|
||||
#define BENCH_FORMAT_DEFAULT_STR "default"
|
||||
#define BENCH_FORMAT_DEFAULT 0
|
||||
#define BENCH_FORMAT_SIMPLE_STR "simple"
|
||||
#define BENCH_FORMAT_SIMPLE 1
|
||||
|
||||
#define BENCH_FORMAT_UNKNOWN -1
|
||||
|
||||
extern int bench_format;
|
||||
|
||||
#endif
|
336
tools/perf/bench/sched-messaging.c
Normal file
336
tools/perf/bench/sched-messaging.c
Normal file
@ -0,0 +1,336 @@
|
||||
/*
|
||||
*
|
||||
* builtin-bench-messaging.c
|
||||
*
|
||||
* messaging: Benchmark for scheduler and IPC mechanisms
|
||||
*
|
||||
* Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
|
||||
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "../perf.h"
|
||||
#include "../util/util.h"
|
||||
#include "../util/parse-options.h"
|
||||
#include "../builtin.h"
|
||||
#include "bench.h"
|
||||
|
||||
/* Test groups of 20 processes spraying to 20 receivers */
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/poll.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define DATASIZE 100
|
||||
|
||||
static int use_pipes = 0;
|
||||
static unsigned int loops = 100;
|
||||
static unsigned int thread_mode = 0;
|
||||
static unsigned int num_groups = 10;
|
||||
|
||||
struct sender_context {
|
||||
unsigned int num_fds;
|
||||
int ready_out;
|
||||
int wakefd;
|
||||
int out_fds[0];
|
||||
};
|
||||
|
||||
struct receiver_context {
|
||||
unsigned int num_packets;
|
||||
int in_fds[2];
|
||||
int ready_out;
|
||||
int wakefd;
|
||||
};
|
||||
|
||||
static void barf(const char *msg)
|
||||
{
|
||||
fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void fdpair(int fds[2])
|
||||
{
|
||||
if (use_pipes) {
|
||||
if (pipe(fds) == 0)
|
||||
return;
|
||||
} else {
|
||||
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
|
||||
return;
|
||||
}
|
||||
|
||||
barf(use_pipes ? "pipe()" : "socketpair()");
|
||||
}
|
||||
|
||||
/* Block until we're ready to go */
|
||||
static void ready(int ready_out, int wakefd)
|
||||
{
|
||||
char dummy;
|
||||
struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
|
||||
|
||||
/* Tell them we're ready. */
|
||||
if (write(ready_out, &dummy, 1) != 1)
|
||||
barf("CLIENT: ready write");
|
||||
|
||||
/* Wait for "GO" signal */
|
||||
if (poll(&pollfd, 1, -1) != 1)
|
||||
barf("poll");
|
||||
}
|
||||
|
||||
/* Sender sprays loops messages down each file descriptor */
|
||||
static void *sender(struct sender_context *ctx)
|
||||
{
|
||||
char data[DATASIZE];
|
||||
unsigned int i, j;
|
||||
|
||||
ready(ctx->ready_out, ctx->wakefd);
|
||||
|
||||
/* Now pump to every receiver. */
|
||||
for (i = 0; i < loops; i++) {
|
||||
for (j = 0; j < ctx->num_fds; j++) {
|
||||
int ret, done = 0;
|
||||
|
||||
again:
|
||||
ret = write(ctx->out_fds[j], data + done,
|
||||
sizeof(data)-done);
|
||||
if (ret < 0)
|
||||
barf("SENDER: write");
|
||||
done += ret;
|
||||
if (done < DATASIZE)
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* One receiver per fd */
|
||||
static void *receiver(struct receiver_context* ctx)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (!thread_mode)
|
||||
close(ctx->in_fds[1]);
|
||||
|
||||
/* Wait for start... */
|
||||
ready(ctx->ready_out, ctx->wakefd);
|
||||
|
||||
/* Receive them all */
|
||||
for (i = 0; i < ctx->num_packets; i++) {
|
||||
char data[DATASIZE];
|
||||
int ret, done = 0;
|
||||
|
||||
again:
|
||||
ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
|
||||
if (ret < 0)
|
||||
barf("SERVER: read");
|
||||
done += ret;
|
||||
if (done < DATASIZE)
|
||||
goto again;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static pthread_t create_worker(void *ctx, void *(*func)(void *))
|
||||
{
|
||||
pthread_attr_t attr;
|
||||
pthread_t childid;
|
||||
int err;
|
||||
|
||||
if (!thread_mode) {
|
||||
/* process mode */
|
||||
/* Fork the receiver. */
|
||||
switch (fork()) {
|
||||
case -1:
|
||||
barf("fork()");
|
||||
break;
|
||||
case 0:
|
||||
(*func) (ctx);
|
||||
exit(0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return (pthread_t)0;
|
||||
}
|
||||
|
||||
if (pthread_attr_init(&attr) != 0)
|
||||
barf("pthread_attr_init:");
|
||||
|
||||
#ifndef __ia64__
|
||||
if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
|
||||
barf("pthread_attr_setstacksize");
|
||||
#endif
|
||||
|
||||
err = pthread_create(&childid, &attr, func, ctx);
|
||||
if (err != 0) {
|
||||
fprintf(stderr, "pthread_create failed: %s (%d)\n",
|
||||
strerror(err), err);
|
||||
exit(-1);
|
||||
}
|
||||
return childid;
|
||||
}
|
||||
|
||||
static void reap_worker(pthread_t id)
|
||||
{
|
||||
int proc_status;
|
||||
void *thread_status;
|
||||
|
||||
if (!thread_mode) {
|
||||
/* process mode */
|
||||
wait(&proc_status);
|
||||
if (!WIFEXITED(proc_status))
|
||||
exit(1);
|
||||
} else {
|
||||
pthread_join(id, &thread_status);
|
||||
}
|
||||
}
|
||||
|
||||
/* One group of senders and receivers */
|
||||
static unsigned int group(pthread_t *pth,
|
||||
unsigned int num_fds,
|
||||
int ready_out,
|
||||
int wakefd)
|
||||
{
|
||||
unsigned int i;
|
||||
struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
|
||||
+ num_fds * sizeof(int));
|
||||
|
||||
if (!snd_ctx)
|
||||
barf("malloc()");
|
||||
|
||||
for (i = 0; i < num_fds; i++) {
|
||||
int fds[2];
|
||||
struct receiver_context *ctx = malloc(sizeof(*ctx));
|
||||
|
||||
if (!ctx)
|
||||
barf("malloc()");
|
||||
|
||||
|
||||
/* Create the pipe between client and server */
|
||||
fdpair(fds);
|
||||
|
||||
ctx->num_packets = num_fds * loops;
|
||||
ctx->in_fds[0] = fds[0];
|
||||
ctx->in_fds[1] = fds[1];
|
||||
ctx->ready_out = ready_out;
|
||||
ctx->wakefd = wakefd;
|
||||
|
||||
pth[i] = create_worker(ctx, (void *)receiver);
|
||||
|
||||
snd_ctx->out_fds[i] = fds[1];
|
||||
if (!thread_mode)
|
||||
close(fds[0]);
|
||||
}
|
||||
|
||||
/* Now we have all the fds, fork the senders */
|
||||
for (i = 0; i < num_fds; i++) {
|
||||
snd_ctx->ready_out = ready_out;
|
||||
snd_ctx->wakefd = wakefd;
|
||||
snd_ctx->num_fds = num_fds;
|
||||
|
||||
pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
|
||||
}
|
||||
|
||||
/* Close the fds we have left */
|
||||
if (!thread_mode)
|
||||
for (i = 0; i < num_fds; i++)
|
||||
close(snd_ctx->out_fds[i]);
|
||||
|
||||
/* Return number of children to reap */
|
||||
return num_fds * 2;
|
||||
}
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_BOOLEAN('p', "pipe", &use_pipes,
|
||||
"Use pipe() instead of socketpair()"),
|
||||
OPT_BOOLEAN('t', "thread", &thread_mode,
|
||||
"Be multi thread instead of multi process"),
|
||||
OPT_INTEGER('g', "group", &num_groups,
|
||||
"Specify number of groups"),
|
||||
OPT_INTEGER('l', "loop", &loops,
|
||||
"Specify number of loops"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static const char * const bench_sched_message_usage[] = {
|
||||
"perf bench sched messaging <options>",
|
||||
NULL
|
||||
};
|
||||
|
||||
int bench_sched_messaging(int argc, const char **argv,
|
||||
const char *prefix __used)
|
||||
{
|
||||
unsigned int i, total_children;
|
||||
struct timeval start, stop, diff;
|
||||
unsigned int num_fds = 20;
|
||||
int readyfds[2], wakefds[2];
|
||||
char dummy;
|
||||
pthread_t *pth_tab;
|
||||
|
||||
argc = parse_options(argc, argv, options,
|
||||
bench_sched_message_usage, 0);
|
||||
|
||||
pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
|
||||
if (!pth_tab)
|
||||
barf("main:malloc()");
|
||||
|
||||
fdpair(readyfds);
|
||||
fdpair(wakefds);
|
||||
|
||||
total_children = 0;
|
||||
for (i = 0; i < num_groups; i++)
|
||||
total_children += group(pth_tab+total_children, num_fds,
|
||||
readyfds[1], wakefds[0]);
|
||||
|
||||
/* Wait for everyone to be ready */
|
||||
for (i = 0; i < total_children; i++)
|
||||
if (read(readyfds[0], &dummy, 1) != 1)
|
||||
barf("Reading for readyfds");
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
|
||||
/* Kick them off */
|
||||
if (write(wakefds[1], &dummy, 1) != 1)
|
||||
barf("Writing to start them");
|
||||
|
||||
/* Reap them all */
|
||||
for (i = 0; i < total_children; i++)
|
||||
reap_worker(pth_tab[i]);
|
||||
|
||||
gettimeofday(&stop, NULL);
|
||||
|
||||
timersub(&stop, &start, &diff);
|
||||
|
||||
switch (bench_format) {
|
||||
case BENCH_FORMAT_DEFAULT:
|
||||
printf("# %d sender and receiver %s per group\n",
|
||||
num_fds, thread_mode ? "threads" : "processes");
|
||||
printf("# %d groups == %d %s run\n\n",
|
||||
num_groups, num_groups * 2 * num_fds,
|
||||
thread_mode ? "threads" : "processes");
|
||||
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
|
||||
diff.tv_sec, diff.tv_usec/1000);
|
||||
break;
|
||||
case BENCH_FORMAT_SIMPLE:
|
||||
printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
|
||||
break;
|
||||
default:
|
||||
/* reaching here is something disaster */
|
||||
fprintf(stderr, "Unknown format:%d\n", bench_format);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
124
tools/perf/bench/sched-pipe.c
Normal file
124
tools/perf/bench/sched-pipe.c
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
*
|
||||
* builtin-bench-pipe.c
|
||||
*
|
||||
* pipe: Benchmark for pipe()
|
||||
*
|
||||
* Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
|
||||
* http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
|
||||
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "../perf.h"
|
||||
#include "../util/util.h"
|
||||
#include "../util/parse-options.h"
|
||||
#include "../builtin.h"
|
||||
#include "bench.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <sys/wait.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define LOOPS_DEFAULT 1000000
|
||||
static int loops = LOOPS_DEFAULT;
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_INTEGER('l', "loop", &loops,
|
||||
"Specify number of loops"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static const char * const bench_sched_pipe_usage[] = {
|
||||
"perf bench sched pipe <options>",
|
||||
NULL
|
||||
};
|
||||
|
||||
int bench_sched_pipe(int argc, const char **argv,
|
||||
const char *prefix __used)
|
||||
{
|
||||
int pipe_1[2], pipe_2[2];
|
||||
int m = 0, i;
|
||||
struct timeval start, stop, diff;
|
||||
unsigned long long result_usec = 0;
|
||||
|
||||
/*
|
||||
* why does "ret" exist?
|
||||
* discarding returned value of read(), write()
|
||||
* causes error in building environment for perf
|
||||
*/
|
||||
int ret, wait_stat;
|
||||
pid_t pid, retpid;
|
||||
|
||||
argc = parse_options(argc, argv, options,
|
||||
bench_sched_pipe_usage, 0);
|
||||
|
||||
assert(!pipe(pipe_1));
|
||||
assert(!pipe(pipe_2));
|
||||
|
||||
pid = fork();
|
||||
assert(pid >= 0);
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
|
||||
if (!pid) {
|
||||
for (i = 0; i < loops; i++) {
|
||||
ret = read(pipe_1[0], &m, sizeof(int));
|
||||
ret = write(pipe_2[1], &m, sizeof(int));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < loops; i++) {
|
||||
ret = write(pipe_1[1], &m, sizeof(int));
|
||||
ret = read(pipe_2[0], &m, sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday(&stop, NULL);
|
||||
timersub(&stop, &start, &diff);
|
||||
|
||||
if (pid) {
|
||||
retpid = waitpid(pid, &wait_stat, 0);
|
||||
assert((retpid == pid) && WIFEXITED(wait_stat));
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (bench_format) {
|
||||
case BENCH_FORMAT_DEFAULT:
|
||||
printf("# Extecuted %d pipe operations between two tasks\n\n",
|
||||
loops);
|
||||
|
||||
result_usec = diff.tv_sec * 1000000;
|
||||
result_usec += diff.tv_usec;
|
||||
|
||||
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
|
||||
diff.tv_sec, diff.tv_usec/1000);
|
||||
|
||||
printf(" %14lf usecs/op\n",
|
||||
(double)result_usec / (double)loops);
|
||||
printf(" %14d ops/sec\n",
|
||||
(int)((double)loops /
|
||||
((double)result_usec / (double)1000000)));
|
||||
break;
|
||||
|
||||
case BENCH_FORMAT_SIMPLE:
|
||||
printf("%lu.%03lu\n",
|
||||
diff.tv_sec, diff.tv_usec / 1000);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* reaching here is something disaster */
|
||||
fprintf(stderr, "Unknown format:%d\n", bench_format);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
183
tools/perf/builtin-bench.c
Normal file
183
tools/perf/builtin-bench.c
Normal file
@ -0,0 +1,183 @@
|
||||
/*
|
||||
*
|
||||
* builtin-bench.c
|
||||
*
|
||||
* General benchmarking subsystem provided by perf
|
||||
*
|
||||
* Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Available subsystem list:
|
||||
* sched ... scheduler and IPC mechanism
|
||||
*
|
||||
*/
|
||||
|
||||
#include "perf.h"
|
||||
#include "util/util.h"
|
||||
#include "util/parse-options.h"
|
||||
#include "builtin.h"
|
||||
#include "bench/bench.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct bench_suite {
|
||||
const char *name;
|
||||
const char *summary;
|
||||
int (*fn)(int, const char **, const char *);
|
||||
};
|
||||
|
||||
static struct bench_suite sched_suites[] = {
|
||||
{ "messaging",
|
||||
"Benchmark for scheduler and IPC mechanisms",
|
||||
bench_sched_messaging },
|
||||
{ "pipe",
|
||||
"Flood of communication over pipe() between two processes",
|
||||
bench_sched_pipe },
|
||||
{ NULL,
|
||||
NULL,
|
||||
NULL }
|
||||
};
|
||||
|
||||
struct bench_subsys {
|
||||
const char *name;
|
||||
const char *summary;
|
||||
struct bench_suite *suites;
|
||||
};
|
||||
|
||||
static struct bench_subsys subsystems[] = {
|
||||
{ "sched",
|
||||
"scheduler and IPC mechanism",
|
||||
sched_suites },
|
||||
{ NULL,
|
||||
NULL,
|
||||
NULL }
|
||||
};
|
||||
|
||||
static void dump_suites(int subsys_index)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("List of available suites for %s...\n\n",
|
||||
subsystems[subsys_index].name);
|
||||
|
||||
for (i = 0; subsystems[subsys_index].suites[i].name; i++)
|
||||
printf("\t%s: %s\n",
|
||||
subsystems[subsys_index].suites[i].name,
|
||||
subsystems[subsys_index].suites[i].summary);
|
||||
|
||||
printf("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
static char *bench_format_str;
|
||||
int bench_format = BENCH_FORMAT_DEFAULT;
|
||||
|
||||
static const struct option bench_options[] = {
|
||||
OPT_STRING('f', "format", &bench_format_str, "default",
|
||||
"Specify format style"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static const char * const bench_usage[] = {
|
||||
"perf bench [<common options>] <subsystem> <suite> [<options>]",
|
||||
NULL
|
||||
};
|
||||
|
||||
static void print_usage(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("Usage: \n");
|
||||
for (i = 0; bench_usage[i]; i++)
|
||||
printf("\t%s\n", bench_usage[i]);
|
||||
printf("\n");
|
||||
|
||||
printf("List of available subsystems...\n\n");
|
||||
|
||||
for (i = 0; subsystems[i].name; i++)
|
||||
printf("\t%s: %s\n",
|
||||
subsystems[i].name, subsystems[i].summary);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static int bench_str2int(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return BENCH_FORMAT_DEFAULT;
|
||||
|
||||
if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
|
||||
return BENCH_FORMAT_DEFAULT;
|
||||
else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
|
||||
return BENCH_FORMAT_SIMPLE;
|
||||
|
||||
return BENCH_FORMAT_UNKNOWN;
|
||||
}
|
||||
|
||||
int cmd_bench(int argc, const char **argv, const char *prefix __used)
|
||||
{
|
||||
int i, j, status = 0;
|
||||
|
||||
if (argc < 2) {
|
||||
/* No subsystem specified. */
|
||||
print_usage();
|
||||
goto end;
|
||||
}
|
||||
|
||||
argc = parse_options(argc, argv, bench_options, bench_usage,
|
||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
|
||||
bench_format = bench_str2int(bench_format_str);
|
||||
if (bench_format == BENCH_FORMAT_UNKNOWN) {
|
||||
printf("Unknown format descriptor:%s\n", bench_format_str);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (argc < 1) {
|
||||
print_usage();
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (i = 0; subsystems[i].name; i++) {
|
||||
if (strcmp(subsystems[i].name, argv[0]))
|
||||
continue;
|
||||
|
||||
if (argc < 2) {
|
||||
/* No suite specified. */
|
||||
dump_suites(i);
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (j = 0; subsystems[i].suites[j].name; j++) {
|
||||
if (strcmp(subsystems[i].suites[j].name, argv[1]))
|
||||
continue;
|
||||
|
||||
if (bench_format == BENCH_FORMAT_DEFAULT)
|
||||
printf("# Running %s/%s benchmark...\n",
|
||||
subsystems[i].name,
|
||||
subsystems[i].suites[j].name);
|
||||
status = subsystems[i].suites[j].fn(argc - 1,
|
||||
argv + 1, prefix);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
|
||||
dump_suites(i);
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
|
||||
status = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf("Unknown subsystem:%s\n", argv[0]);
|
||||
status = 1;
|
||||
|
||||
end:
|
||||
return status;
|
||||
}
|
@ -15,6 +15,7 @@ extern int read_line_with_nul(char *buf, int size, FILE *file);
|
||||
extern int check_pager_config(const char *cmd);
|
||||
|
||||
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_bench(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_help(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_sched(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_list(int argc, const char **argv, const char *prefix);
|
||||
|
@ -3,6 +3,7 @@
|
||||
# command name category [deprecated] [common]
|
||||
#
|
||||
perf-annotate mainporcelain common
|
||||
perf-bench mainporcelain common
|
||||
perf-list mainporcelain common
|
||||
perf-sched mainporcelain common
|
||||
perf-record mainporcelain common
|
||||
|
@ -137,6 +137,8 @@ enum sw_event_ids {
|
||||
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
|
||||
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
|
||||
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
|
||||
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
|
||||
PERF_COUNT_SW_EMULATION_FAULTS = 8,
|
||||
};
|
||||
|
||||
Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
|
||||
|
@ -289,6 +289,7 @@ static void handle_internal_command(int argc, const char **argv)
|
||||
{ "list", cmd_list, 0 },
|
||||
{ "record", cmd_record, 0 },
|
||||
{ "report", cmd_report, 0 },
|
||||
{ "bench", cmd_bench, 0 },
|
||||
{ "stat", cmd_stat, 0 },
|
||||
{ "timechart", cmd_timechart, 0 },
|
||||
{ "top", cmd_top, 0 },
|
||||
|
@ -48,6 +48,8 @@ static struct event_symbol event_symbols[] = {
|
||||
{ CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
|
||||
{ CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
|
||||
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
|
||||
{ CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
|
||||
{ CSW(EMULATION_FAULTS), "emulation-faults", "" },
|
||||
};
|
||||
|
||||
#define __PERF_EVENT_FIELD(config, name) \
|
||||
@ -76,6 +78,8 @@ static const char *sw_event_names[] = {
|
||||
"CPU-migrations",
|
||||
"minor-faults",
|
||||
"major-faults",
|
||||
"alignment-faults",
|
||||
"emulation-faults",
|
||||
};
|
||||
|
||||
#define MAX_ALIASES 8
|
||||
|
Loading…
Reference in New Issue
Block a user