perf/x86/amd/ibs: Fix pmu::stop() nesting
Patch5a50f52917
("perf/x86/ibs: Fix race with IBS_STARTING state") closed a big hole while opening another, smaller hole. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes:5a50f52917
("perf/x86/ibs: Fix race with IBS_STARTING state") Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
201c2f85bd
commit
85dc600263
@ -28,10 +28,46 @@ static u32 ibs_caps;
|
||||
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
|
||||
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
|
||||
|
||||
|
||||
/*
|
||||
* IBS states:
|
||||
*
|
||||
* ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
|
||||
* and any further add()s must fail.
|
||||
*
|
||||
* STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
|
||||
* complicated by the fact that the IBS hardware can send late NMIs (ie. after
|
||||
* we've cleared the EN bit).
|
||||
*
|
||||
* In order to consume these late NMIs we have the STOPPED state, any NMI that
|
||||
* happens after we've cleared the EN state will clear this bit and report the
|
||||
* NMI handled (this is fundamentally racy in the face or multiple NMI sources,
|
||||
* someone else can consume our BIT and our NMI will go unhandled).
|
||||
*
|
||||
* And since we cannot set/clear this separate bit together with the EN bit,
|
||||
* there are races; if we cleared STARTED early, an NMI could land in
|
||||
* between clearing STARTED and clearing the EN bit (in fact multiple NMIs
|
||||
* could happen if the period is small enough), and consume our STOPPED bit
|
||||
* and trigger streams of unhandled NMIs.
|
||||
*
|
||||
* If, however, we clear STARTED late, an NMI can hit between clearing the
|
||||
* EN bit and clearing STARTED, still see STARTED set and process the event.
|
||||
* If this event will have the VALID bit clear, we bail properly, but this
|
||||
* is not a given. With VALID set we can end up calling pmu::stop() again
|
||||
* (the throttle logic) and trigger the WARNs in there.
|
||||
*
|
||||
* So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
|
||||
* nesting, and clear STARTED late, so that we have a well defined state over
|
||||
* the clearing of the EN bit.
|
||||
*
|
||||
* XXX: we could probably be using !atomic bitops for all this.
|
||||
*/
|
||||
|
||||
enum ibs_states {
|
||||
IBS_ENABLED = 0,
|
||||
IBS_STARTED = 1,
|
||||
IBS_STOPPING = 2,
|
||||
IBS_STOPPED = 3,
|
||||
|
||||
IBS_MAX_STATES,
|
||||
};
|
||||
@ -377,11 +413,10 @@ static void perf_ibs_start(struct perf_event *event, int flags)
|
||||
|
||||
perf_ibs_set_period(perf_ibs, hwc, &period);
|
||||
/*
|
||||
* Set STARTED before enabling the hardware, such that
|
||||
* a subsequent NMI must observe it. Then clear STOPPING
|
||||
* such that we don't consume NMIs by accident.
|
||||
* Set STARTED before enabling the hardware, such that a subsequent NMI
|
||||
* must observe it.
|
||||
*/
|
||||
set_bit(IBS_STARTED, pcpu->state);
|
||||
set_bit(IBS_STARTED, pcpu->state);
|
||||
clear_bit(IBS_STOPPING, pcpu->state);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
|
||||
u64 config;
|
||||
int stopping;
|
||||
|
||||
if (test_and_set_bit(IBS_STOPPING, pcpu->state))
|
||||
return;
|
||||
|
||||
stopping = test_bit(IBS_STARTED, pcpu->state);
|
||||
|
||||
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
|
||||
@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
|
||||
|
||||
if (stopping) {
|
||||
/*
|
||||
* Set STOPPING before disabling the hardware, such that it
|
||||
* Set STOPPED before disabling the hardware, such that it
|
||||
* must be visible to NMIs the moment we clear the EN bit,
|
||||
* at which point we can generate an !VALID sample which
|
||||
* we need to consume.
|
||||
*/
|
||||
set_bit(IBS_STOPPING, pcpu->state);
|
||||
set_bit(IBS_STOPPED, pcpu->state);
|
||||
perf_ibs_disable_event(perf_ibs, hwc, config);
|
||||
/*
|
||||
* Clear STARTED after disabling the hardware; if it were
|
||||
@ -556,7 +594,7 @@ fail:
|
||||
* with samples that even have the valid bit cleared.
|
||||
* Mark all this NMIs as handled.
|
||||
*/
|
||||
if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
|
||||
if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user