perf events: Precalculate the header space for PERF_SAMPLE_ fields
PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others can be precalculated, reducing a bit the per sample cost. Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Ian Munsie <imunsie@au1.ibm.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Stephane Eranian <eranian@google.com> LKML-Reference: <new-submission> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
068ffaa8bf
commit
c320c7b7d3
@ -758,6 +758,8 @@ struct perf_event {
|
|||||||
u64 shadow_ctx_time;
|
u64 shadow_ctx_time;
|
||||||
|
|
||||||
struct perf_event_attr attr;
|
struct perf_event_attr attr;
|
||||||
|
u16 header_size;
|
||||||
|
u16 read_size;
|
||||||
struct hw_perf_event hw;
|
struct hw_perf_event hw;
|
||||||
|
|
||||||
struct perf_event_context *ctx;
|
struct perf_event_context *ctx;
|
||||||
|
@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
|
|||||||
ctx->nr_stat++;
|
ctx->nr_stat++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called at perf_event creation and when events are attached/detached from a
|
||||||
|
* group.
|
||||||
|
*/
|
||||||
|
static void perf_event__read_size(struct perf_event *event)
|
||||||
|
{
|
||||||
|
int entry = sizeof(u64); /* value */
|
||||||
|
int size = 0;
|
||||||
|
int nr = 1;
|
||||||
|
|
||||||
|
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
|
||||||
|
size += sizeof(u64);
|
||||||
|
|
||||||
|
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
|
||||||
|
size += sizeof(u64);
|
||||||
|
|
||||||
|
if (event->attr.read_format & PERF_FORMAT_ID)
|
||||||
|
entry += sizeof(u64);
|
||||||
|
|
||||||
|
if (event->attr.read_format & PERF_FORMAT_GROUP) {
|
||||||
|
nr += event->group_leader->nr_siblings;
|
||||||
|
size += sizeof(u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
size += entry * nr;
|
||||||
|
event->read_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void perf_event__header_size(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct perf_sample_data *data;
|
||||||
|
u64 sample_type = event->attr.sample_type;
|
||||||
|
u16 size = 0;
|
||||||
|
|
||||||
|
perf_event__read_size(event);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_IP)
|
||||||
|
size += sizeof(data->ip);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_TID)
|
||||||
|
size += sizeof(data->tid_entry);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_TIME)
|
||||||
|
size += sizeof(data->time);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_ADDR)
|
||||||
|
size += sizeof(data->addr);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_ID)
|
||||||
|
size += sizeof(data->id);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_STREAM_ID)
|
||||||
|
size += sizeof(data->stream_id);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_CPU)
|
||||||
|
size += sizeof(data->cpu_entry);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_PERIOD)
|
||||||
|
size += sizeof(data->period);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_READ)
|
||||||
|
size += event->read_size;
|
||||||
|
|
||||||
|
event->header_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
static void perf_group_attach(struct perf_event *event)
|
static void perf_group_attach(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct perf_event *group_leader = event->group_leader;
|
struct perf_event *group_leader = event->group_leader, *pos;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can have double attach due to group movement in perf_event_open.
|
* We can have double attach due to group movement in perf_event_open.
|
||||||
@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
|
|||||||
|
|
||||||
list_add_tail(&event->group_entry, &group_leader->sibling_list);
|
list_add_tail(&event->group_entry, &group_leader->sibling_list);
|
||||||
group_leader->nr_siblings++;
|
group_leader->nr_siblings++;
|
||||||
|
|
||||||
|
perf_event__header_size(group_leader);
|
||||||
|
|
||||||
|
list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
|
||||||
|
perf_event__header_size(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
|
|||||||
if (event->group_leader != event) {
|
if (event->group_leader != event) {
|
||||||
list_del_init(&event->group_entry);
|
list_del_init(&event->group_entry);
|
||||||
event->group_leader->nr_siblings--;
|
event->group_leader->nr_siblings--;
|
||||||
return;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!list_empty(&event->group_entry))
|
if (!list_empty(&event->group_entry))
|
||||||
@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
|
|||||||
/* Inherit group flags from the previous leader */
|
/* Inherit group flags from the previous leader */
|
||||||
sibling->group_flags = event->group_flags;
|
sibling->group_flags = event->group_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
perf_event__header_size(event->group_leader);
|
||||||
|
|
||||||
|
list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
|
||||||
|
perf_event__header_size(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
|
|||||||
return perf_event_release_kernel(event);
|
return perf_event_release_kernel(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int perf_event_read_size(struct perf_event *event)
|
|
||||||
{
|
|
||||||
int entry = sizeof(u64); /* value */
|
|
||||||
int size = 0;
|
|
||||||
int nr = 1;
|
|
||||||
|
|
||||||
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
|
|
||||||
size += sizeof(u64);
|
|
||||||
|
|
||||||
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
|
|
||||||
size += sizeof(u64);
|
|
||||||
|
|
||||||
if (event->attr.read_format & PERF_FORMAT_ID)
|
|
||||||
entry += sizeof(u64);
|
|
||||||
|
|
||||||
if (event->attr.read_format & PERF_FORMAT_GROUP) {
|
|
||||||
nr += event->group_leader->nr_siblings;
|
|
||||||
size += sizeof(u64);
|
|
||||||
}
|
|
||||||
|
|
||||||
size += entry * nr;
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
|
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
|
||||||
{
|
{
|
||||||
struct perf_event *child;
|
struct perf_event *child;
|
||||||
@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
|
|||||||
if (event->state == PERF_EVENT_STATE_ERROR)
|
if (event->state == PERF_EVENT_STATE_ERROR)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (count < perf_event_read_size(event))
|
if (count < event->read_size)
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
WARN_ON_ONCE(event->ctx->parent_ctx);
|
WARN_ON_ONCE(event->ctx->parent_ctx);
|
||||||
@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
|
|||||||
data->type = sample_type;
|
data->type = sample_type;
|
||||||
|
|
||||||
header->type = PERF_RECORD_SAMPLE;
|
header->type = PERF_RECORD_SAMPLE;
|
||||||
header->size = sizeof(*header);
|
header->size = sizeof(*header) + event->header_size;
|
||||||
|
|
||||||
header->misc = 0;
|
header->misc = 0;
|
||||||
header->misc |= perf_misc_flags(regs);
|
header->misc |= perf_misc_flags(regs);
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_IP) {
|
if (sample_type & PERF_SAMPLE_IP)
|
||||||
data->ip = perf_instruction_pointer(regs);
|
data->ip = perf_instruction_pointer(regs);
|
||||||
|
|
||||||
header->size += sizeof(data->ip);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_TID) {
|
if (sample_type & PERF_SAMPLE_TID) {
|
||||||
/* namespace issues */
|
/* namespace issues */
|
||||||
data->tid_entry.pid = perf_event_pid(event, current);
|
data->tid_entry.pid = perf_event_pid(event, current);
|
||||||
data->tid_entry.tid = perf_event_tid(event, current);
|
data->tid_entry.tid = perf_event_tid(event, current);
|
||||||
|
|
||||||
header->size += sizeof(data->tid_entry);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_TIME) {
|
if (sample_type & PERF_SAMPLE_TIME)
|
||||||
data->time = perf_clock();
|
data->time = perf_clock();
|
||||||
|
|
||||||
header->size += sizeof(data->time);
|
if (sample_type & PERF_SAMPLE_ID)
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_ADDR)
|
|
||||||
header->size += sizeof(data->addr);
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_ID) {
|
|
||||||
data->id = primary_event_id(event);
|
data->id = primary_event_id(event);
|
||||||
|
|
||||||
header->size += sizeof(data->id);
|
if (sample_type & PERF_SAMPLE_STREAM_ID)
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_STREAM_ID) {
|
|
||||||
data->stream_id = event->id;
|
data->stream_id = event->id;
|
||||||
|
|
||||||
header->size += sizeof(data->stream_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_CPU) {
|
if (sample_type & PERF_SAMPLE_CPU) {
|
||||||
data->cpu_entry.cpu = raw_smp_processor_id();
|
data->cpu_entry.cpu = raw_smp_processor_id();
|
||||||
data->cpu_entry.reserved = 0;
|
data->cpu_entry.reserved = 0;
|
||||||
|
|
||||||
header->size += sizeof(data->cpu_entry);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_PERIOD)
|
|
||||||
header->size += sizeof(data->period);
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_READ)
|
|
||||||
header->size += perf_event_read_size(event);
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||||
int size = 1;
|
int size = 1;
|
||||||
|
|
||||||
@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
|
|||||||
.header = {
|
.header = {
|
||||||
.type = PERF_RECORD_READ,
|
.type = PERF_RECORD_READ,
|
||||||
.misc = 0,
|
.misc = 0,
|
||||||
.size = sizeof(read_event) + perf_event_read_size(event),
|
.size = sizeof(read_event) + event->read_size,
|
||||||
},
|
},
|
||||||
.pid = perf_event_pid(event, task),
|
.pid = perf_event_pid(event, task),
|
||||||
.tid = perf_event_tid(event, task),
|
.tid = perf_event_tid(event, task),
|
||||||
@ -5714,6 +5741,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||||||
list_add_tail(&event->owner_entry, ¤t->perf_event_list);
|
list_add_tail(&event->owner_entry, ¤t->perf_event_list);
|
||||||
mutex_unlock(¤t->perf_event_mutex);
|
mutex_unlock(¤t->perf_event_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Precalculate sample_data sizes
|
||||||
|
*/
|
||||||
|
perf_event__header_size(event);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop the reference on the group_event after placing the
|
* Drop the reference on the group_event after placing the
|
||||||
* new event on the sibling_list. This ensures destruction
|
* new event on the sibling_list. This ensures destruction
|
||||||
|
Loading…
x
Reference in New Issue
Block a user