powerpc: tracing: Add hypervisor call tracepoints
Add hcall_entry and hcall_exit tracepoints. This replaces the inline assembly HCALL_STATS code and converts it to use the new tracepoints. To keep the disabled case as quick as possible, we embed a status word in the TOC so we can get at it with a single load. By doing so we keep the overhead at a minimum. Time taken for a null hcall: No tracepoint code: 135.79 cycles Disabled tracepoints: 137.95 cycles For reference, before this patch enabling HCALL_STATS resulted in a null hcall of 201.44 cycles! Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
This commit is contained in:
parent
6795b85c6a
commit
c8cd093a6e
@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
|
|||||||
|
|
||||||
config HCALL_STATS
|
config HCALL_STATS
|
||||||
bool "Hypervisor call instrumentation"
|
bool "Hypervisor call instrumentation"
|
||||||
depends on PPC_PSERIES && DEBUG_FS
|
depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
|
||||||
help
|
help
|
||||||
Adds code to keep track of the number of hypervisor calls made and
|
Adds code to keep track of the number of hypervisor calls made and
|
||||||
the amount of time spent in hypervisor calls. Wall time spent in
|
the amount of time spent in hypervisor calls. Wall time spent in
|
||||||
|
@ -274,6 +274,8 @@ struct hcall_stats {
|
|||||||
unsigned long num_calls; /* number of calls (on this CPU) */
|
unsigned long num_calls; /* number of calls (on this CPU) */
|
||||||
unsigned long tb_total; /* total wall time (mftb) of calls. */
|
unsigned long tb_total; /* total wall time (mftb) of calls. */
|
||||||
unsigned long purr_total; /* total cpu time (PURR) of calls. */
|
unsigned long purr_total; /* total cpu time (PURR) of calls. */
|
||||||
|
unsigned long tb_start;
|
||||||
|
unsigned long purr_start;
|
||||||
};
|
};
|
||||||
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
|
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
|
||||||
|
|
||||||
|
@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit,
|
|||||||
TP_printk("pt_regs=%p", __entry->regs)
|
TP_printk("pt_regs=%p", __entry->regs)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PPC_PSERIES
|
||||||
|
extern void hcall_tracepoint_regfunc(void);
|
||||||
|
extern void hcall_tracepoint_unregfunc(void);
|
||||||
|
|
||||||
|
TRACE_EVENT_FN(hcall_entry,
|
||||||
|
|
||||||
|
TP_PROTO(unsigned long opcode),
|
||||||
|
|
||||||
|
TP_ARGS(opcode),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned long, opcode)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->opcode = opcode;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("opcode=%lu", __entry->opcode),
|
||||||
|
|
||||||
|
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT_FN(hcall_exit,
|
||||||
|
|
||||||
|
TP_PROTO(unsigned long opcode, unsigned long retval),
|
||||||
|
|
||||||
|
TP_ARGS(opcode, retval),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned long, opcode)
|
||||||
|
__field(unsigned long, retval)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->opcode = opcode;
|
||||||
|
__entry->retval = retval;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
|
||||||
|
|
||||||
|
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _TRACE_POWERPC_H */
|
#endif /* _TRACE_POWERPC_H */
|
||||||
|
|
||||||
#undef TRACE_INCLUDE_PATH
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
@ -14,20 +14,54 @@
|
|||||||
|
|
||||||
#define STK_PARM(i) (48 + ((i)-3)*8)
|
#define STK_PARM(i) (48 + ((i)-3)*8)
|
||||||
|
|
||||||
#ifdef CONFIG_HCALL_STATS
|
#ifdef CONFIG_TRACEPOINTS
|
||||||
|
|
||||||
|
.section ".toc","aw"
|
||||||
|
|
||||||
|
.globl hcall_tracepoint_refcount
|
||||||
|
hcall_tracepoint_refcount:
|
||||||
|
.llong 0
|
||||||
|
|
||||||
|
.section ".text"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* precall must preserve all registers. use unused STK_PARM()
|
* precall must preserve all registers. use unused STK_PARM()
|
||||||
* areas to save snapshots and opcode.
|
* areas to save snapshots and opcode. We branch around this
|
||||||
|
* in early init (eg when populating the MMU hashtable) by using an
|
||||||
|
* unconditional cpu feature.
|
||||||
*/
|
*/
|
||||||
#define HCALL_INST_PRECALL \
|
#define HCALL_INST_PRECALL \
|
||||||
std r3,STK_PARM(r3)(r1); /* save opcode */ \
|
|
||||||
mftb r0; /* get timebase and */ \
|
|
||||||
std r0,STK_PARM(r5)(r1); /* save for later */ \
|
|
||||||
BEGIN_FTR_SECTION; \
|
BEGIN_FTR_SECTION; \
|
||||||
mfspr r0,SPRN_PURR; /* get PURR and */ \
|
b 1f; \
|
||||||
std r0,STK_PARM(r6)(r1); /* save for later */ \
|
END_FTR_SECTION(0, 1); \
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_PURR);
|
ld r12,hcall_tracepoint_refcount@toc(r2); \
|
||||||
|
cmpdi r12,0; \
|
||||||
|
beq+ 1f; \
|
||||||
|
mflr r0; \
|
||||||
|
std r3,STK_PARM(r3)(r1); \
|
||||||
|
std r4,STK_PARM(r4)(r1); \
|
||||||
|
std r5,STK_PARM(r5)(r1); \
|
||||||
|
std r6,STK_PARM(r6)(r1); \
|
||||||
|
std r7,STK_PARM(r7)(r1); \
|
||||||
|
std r8,STK_PARM(r8)(r1); \
|
||||||
|
std r9,STK_PARM(r9)(r1); \
|
||||||
|
std r10,STK_PARM(r10)(r1); \
|
||||||
|
std r0,16(r1); \
|
||||||
|
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
|
||||||
|
bl .__trace_hcall_entry; \
|
||||||
|
addi r1,r1,STACK_FRAME_OVERHEAD; \
|
||||||
|
ld r0,16(r1); \
|
||||||
|
ld r3,STK_PARM(r3)(r1); \
|
||||||
|
ld r4,STK_PARM(r4)(r1); \
|
||||||
|
ld r5,STK_PARM(r5)(r1); \
|
||||||
|
ld r6,STK_PARM(r6)(r1); \
|
||||||
|
ld r7,STK_PARM(r7)(r1); \
|
||||||
|
ld r8,STK_PARM(r8)(r1); \
|
||||||
|
ld r9,STK_PARM(r9)(r1); \
|
||||||
|
ld r10,STK_PARM(r10)(r1); \
|
||||||
|
mtlr r0; \
|
||||||
|
1:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* postcall is performed immediately before function return which
|
* postcall is performed immediately before function return which
|
||||||
* allows liberal use of volatile registers. We branch around this
|
* allows liberal use of volatile registers. We branch around this
|
||||||
@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);
|
|||||||
BEGIN_FTR_SECTION; \
|
BEGIN_FTR_SECTION; \
|
||||||
b 1f; \
|
b 1f; \
|
||||||
END_FTR_SECTION(0, 1); \
|
END_FTR_SECTION(0, 1); \
|
||||||
ld r4,STK_PARM(r3)(r1); /* validate opcode */ \
|
ld r12,hcall_tracepoint_refcount@toc(r2); \
|
||||||
cmpldi cr7,r4,MAX_HCALL_OPCODE; \
|
cmpdi r12,0; \
|
||||||
bgt- cr7,1f; \
|
beq+ 1f; \
|
||||||
\
|
mflr r0; \
|
||||||
/* get time and PURR snapshots after hcall */ \
|
ld r6,STK_PARM(r3)(r1); \
|
||||||
mftb r7; /* timebase after */ \
|
std r3,STK_PARM(r3)(r1); \
|
||||||
BEGIN_FTR_SECTION; \
|
mr r4,r3; \
|
||||||
mfspr r8,SPRN_PURR; /* PURR after */ \
|
mr r3,r6; \
|
||||||
ld r6,STK_PARM(r6)(r1); /* PURR before */ \
|
std r0,16(r1); \
|
||||||
subf r6,r6,r8; /* delta */ \
|
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
|
bl .__trace_hcall_exit; \
|
||||||
ld r5,STK_PARM(r5)(r1); /* timebase before */ \
|
addi r1,r1,STACK_FRAME_OVERHEAD; \
|
||||||
subf r5,r5,r7; /* time delta */ \
|
ld r0,16(r1); \
|
||||||
\
|
ld r3,STK_PARM(r3)(r1); \
|
||||||
/* calculate address of stat structure r4 = opcode */ \
|
mtlr r0; \
|
||||||
srdi r4,r4,2; /* index into array */ \
|
|
||||||
mulli r4,r4,HCALL_STAT_SIZE; \
|
|
||||||
LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
|
|
||||||
add r4,r4,r7; \
|
|
||||||
ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
|
|
||||||
add r4,r4,r7; \
|
|
||||||
\
|
|
||||||
/* update stats */ \
|
|
||||||
ld r7,HCALL_STAT_CALLS(r4); /* count */ \
|
|
||||||
addi r7,r7,1; \
|
|
||||||
std r7,HCALL_STAT_CALLS(r4); \
|
|
||||||
ld r7,HCALL_STAT_TB(r4); /* timebase */ \
|
|
||||||
add r7,r7,r5; \
|
|
||||||
std r7,HCALL_STAT_TB(r4); \
|
|
||||||
BEGIN_FTR_SECTION; \
|
|
||||||
ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
|
|
||||||
add r7,r7,r6; \
|
|
||||||
std r7,HCALL_STAT_PURR(r4); \
|
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
|
|
||||||
1:
|
1:
|
||||||
#else
|
#else
|
||||||
#define HCALL_INST_PRECALL
|
#define HCALL_INST_PRECALL
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include <asm/hvcall.h>
|
#include <asm/hvcall.h>
|
||||||
#include <asm/firmware.h>
|
#include <asm/firmware.h>
|
||||||
#include <asm/cputable.h>
|
#include <asm/cputable.h>
|
||||||
|
#include <asm/trace.h>
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
|
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
|
||||||
|
|
||||||
@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = {
|
|||||||
#define HCALL_ROOT_DIR "hcall_inst"
|
#define HCALL_ROOT_DIR "hcall_inst"
|
||||||
#define CPU_NAME_BUF_SIZE 32
|
#define CPU_NAME_BUF_SIZE 32
|
||||||
|
|
||||||
|
|
||||||
|
static void probe_hcall_entry(unsigned long opcode)
|
||||||
|
{
|
||||||
|
struct hcall_stats *h;
|
||||||
|
|
||||||
|
if (opcode > MAX_HCALL_OPCODE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
h = &get_cpu_var(hcall_stats)[opcode / 4];
|
||||||
|
h->tb_start = mftb();
|
||||||
|
h->purr_start = mfspr(SPRN_PURR);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void probe_hcall_exit(unsigned long opcode, unsigned long retval)
|
||||||
|
{
|
||||||
|
struct hcall_stats *h;
|
||||||
|
|
||||||
|
if (opcode > MAX_HCALL_OPCODE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
h = &__get_cpu_var(hcall_stats)[opcode / 4];
|
||||||
|
h->num_calls++;
|
||||||
|
h->tb_total = mftb() - h->tb_start;
|
||||||
|
h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
|
||||||
|
|
||||||
|
put_cpu_var(hcall_stats);
|
||||||
|
}
|
||||||
|
|
||||||
static int __init hcall_inst_init(void)
|
static int __init hcall_inst_init(void)
|
||||||
{
|
{
|
||||||
struct dentry *hcall_root;
|
struct dentry *hcall_root;
|
||||||
@ -110,6 +139,14 @@ static int __init hcall_inst_init(void)
|
|||||||
if (!firmware_has_feature(FW_FEATURE_LPAR))
|
if (!firmware_has_feature(FW_FEATURE_LPAR))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (register_trace_hcall_entry(probe_hcall_entry))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (register_trace_hcall_exit(probe_hcall_exit)) {
|
||||||
|
unregister_trace_hcall_entry(probe_hcall_entry);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
|
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
|
||||||
if (!hcall_root)
|
if (!hcall_root)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include <asm/cputable.h>
|
#include <asm/cputable.h>
|
||||||
#include <asm/udbg.h>
|
#include <asm/udbg.h>
|
||||||
#include <asm/smp.h>
|
#include <asm/smp.h>
|
||||||
|
#include <asm/trace.h>
|
||||||
|
|
||||||
#include "plpar_wrappers.h"
|
#include "plpar_wrappers.h"
|
||||||
#include "pseries.h"
|
#include "pseries.h"
|
||||||
@ -661,3 +662,34 @@ void arch_free_page(struct page *page, int order)
|
|||||||
EXPORT_SYMBOL(arch_free_page);
|
EXPORT_SYMBOL(arch_free_page);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_TRACEPOINTS
|
||||||
|
/*
|
||||||
|
* We optimise our hcall path by placing hcall_tracepoint_refcount
|
||||||
|
* directly in the TOC so we can check if the hcall tracepoints are
|
||||||
|
* enabled via a single load.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
|
||||||
|
extern long hcall_tracepoint_refcount;
|
||||||
|
|
||||||
|
void hcall_tracepoint_regfunc(void)
|
||||||
|
{
|
||||||
|
hcall_tracepoint_refcount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hcall_tracepoint_unregfunc(void)
|
||||||
|
{
|
||||||
|
hcall_tracepoint_refcount--;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __trace_hcall_entry(unsigned long opcode)
|
||||||
|
{
|
||||||
|
trace_hcall_entry(opcode);
|
||||||
|
}
|
||||||
|
|
||||||
|
void __trace_hcall_exit(long opcode, unsigned long retval)
|
||||||
|
{
|
||||||
|
trace_hcall_exit(opcode, retval);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user