perf_events: Add fast-path to the rescheduling code

Implement correct fastpath scheduling, i.e., reuse previous assignment.

Signed-off-by: Stephane Eranian <eranian@google.com>
[ split from larger patch]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Stephane Eranian 2010-01-21 17:39:01 +02:00 committed by Ingo Molnar
parent 1da53e0230
commit 8113070d66

View File

@ -1244,6 +1244,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
constraints[i]); constraints[i]);
} }
/*
* fastpath, try to reuse previous register
*/
for (i = 0, num = n; i < n; i++, num--) {
hwc = &cpuc->event_list[i]->hw;
c = (unsigned long *)constraints[i];
/* never assigned */
if (hwc->idx == -1)
break;
/* constraint still honored */
if (!test_bit(hwc->idx, c))
break;
/* not already used */
if (test_bit(hwc->idx, used_mask))
break;
#if 0
pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
smp_processor_id(),
hwc->config,
hwc->idx,
assign ? 'y' : 'n');
#endif
set_bit(hwc->idx, used_mask);
if (assign)
assign[i] = hwc->idx;
}
if (!num)
goto done;
/*
* begin slow path
*/
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
/* /*
* weight = number of possible counters * weight = number of possible counters
* *
@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (x86_pmu.num_events_fixed) if (x86_pmu.num_events_fixed)
wmax++; wmax++;
num = n; for (w = 1, num = n; num && w <= wmax; w++) {
for (w = 1; num && w <= wmax; w++) {
/* for each event */ /* for each event */
for (i = 0; i < n; i++) { for (i = 0; num && i < n; i++) {
c = (unsigned long *)constraints[i]; c = (unsigned long *)constraints[i];
hwc = &cpuc->event_list[i]->hw; hwc = &cpuc->event_list[i]->hw;
@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (weight != w) if (weight != w)
continue; continue;
/*
* try to reuse previous assignment
*
* This is possible despite the fact that
* events or events order may have changed.
*
* What matters is the level of constraints
* of an event and this is constant for now.
*
* This is possible also because we always
* scan from most to least constrained. Thus,
* if a counter can be reused, it means no,
* more constrained events, needed it. And
* next events will either compete for it
* (which cannot be solved anyway) or they
* have fewer constraints, and they can use
* another counter.
*/
j = hwc->idx;
if (j != -1 && !test_bit(j, used_mask))
goto skip;
for_each_bit(j, c, X86_PMC_IDX_MAX) { for_each_bit(j, c, X86_PMC_IDX_MAX) {
if (!test_bit(j, used_mask)) if (!test_bit(j, used_mask))
break; break;
@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (j == X86_PMC_IDX_MAX) if (j == X86_PMC_IDX_MAX)
break; break;
skip:
set_bit(j, used_mask);
#if 0 #if 0
pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
smp_processor_id(), smp_processor_id(),
hwc->config, hwc->config,
j, j,
assign ? 'y' : 'n'); assign ? 'y' : 'n');
#endif #endif
set_bit(j, used_mask);
if (assign) if (assign)
assign[i] = j; assign[i] = j;
num--; num--;
} }
} }
done:
/* /*
* scheduling failed or is just a simulation, * scheduling failed or is just a simulation,
* free resources if necessary * free resources if necessary
@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
list_for_each_entry(event, &leader->sibling_list, group_entry) { list_for_each_entry(event, &leader->sibling_list, group_entry) {
if (!is_x86_event(event) || if (!is_x86_event(event) ||
event->state == PERF_EVENT_STATE_OFF) event->state <= PERF_EVENT_STATE_OFF)
continue; continue;
if (n >= max_count) if (n >= max_count)
@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event, struct perf_event *event,
u64 *idxmsk) u64 *idxmsk)
{ {
/* no constraints, means supports all generic counters */
bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
} }
static int x86_event_sched_in(struct perf_event *event, static int x86_event_sched_in(struct perf_event *event,
@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
n1 = 1; n1 = 1;
list_for_each_entry(sub, &leader->sibling_list, group_entry) { list_for_each_entry(sub, &leader->sibling_list, group_entry) {
if (sub->state != PERF_EVENT_STATE_OFF) { if (sub->state > PERF_EVENT_STATE_OFF) {
ret = x86_event_sched_in(sub, cpuctx, cpu); ret = x86_event_sched_in(sub, cpuctx, cpu);
if (ret) if (ret)
goto undo; goto undo;
@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event)
const struct pmu *hw_perf_event_init(struct perf_event *event) const struct pmu *hw_perf_event_init(struct perf_event *event)
{ {
const struct pmu *tmp;
int err; int err;
err = __hw_perf_event_init(event); err = __hw_perf_event_init(event);
if (!err) { if (!err) {
/*
* we temporarily connect event to its pmu
* such that validate_group() can classify
* it as an x86 event using is_x86_event()
*/
tmp = event->pmu;
event->pmu = &pmu;
if (event->group_leader != event) if (event->group_leader != event)
err = validate_group(event); err = validate_group(event);
event->pmu = tmp;
} }
if (err) { if (err) {
if (event->destroy) if (event->destroy)