drm/i915: Reset GPU immediately if submission is disabled

If submission is disabled by the backend for any reason, reset the GPU
immediately in the heartbeat code as the backend can't be reenabled
until the GPU is reset.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-10-matthew.brost@intel.com
This commit is contained in:
Matthew Brost 2021-07-26 17:23:24 -07:00 committed by John Harrison
parent eb5e7da736
commit c41ee2873e
6 changed files with 79 additions and 13 deletions

@ -70,12 +70,30 @@ static void show_heartbeat(const struct i915_request *rq,
{
struct drm_printer p = drm_debug_printer("heartbeat");
intel_engine_dump(engine, &p,
"%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
engine->name,
rq->fence.context,
rq->fence.seqno,
rq->sched.attr.priority);
if (!rq) {
intel_engine_dump(engine, &p,
"%s heartbeat not ticking\n",
engine->name);
} else {
intel_engine_dump(engine, &p,
"%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
engine->name,
rq->fence.context,
rq->fence.seqno,
rq->sched.attr.priority);
}
}
static void
reset_engine(struct intel_engine_cs *engine, struct i915_request *rq)
{
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
show_heartbeat(rq, engine);
intel_gt_handle_error(engine->gt, engine->mask,
I915_ERROR_CAPTURE,
"stopped heartbeat on %s",
engine->name);
}
static void heartbeat(struct work_struct *wrk)
@ -102,6 +120,11 @@ static void heartbeat(struct work_struct *wrk)
if (intel_gt_is_wedged(engine->gt))
goto out;
if (i915_sched_engine_disabled(engine->sched_engine)) {
reset_engine(engine, engine->heartbeat.systole);
goto out;
}
if (engine->heartbeat.systole) {
long delay = READ_ONCE(engine->props.heartbeat_interval_ms);
@ -139,13 +162,7 @@ static void heartbeat(struct work_struct *wrk)
engine->sched_engine->schedule(rq, &attr);
local_bh_enable();
} else {
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
show_heartbeat(rq, engine);
intel_gt_handle_error(engine->gt, engine->mask,
I915_ERROR_CAPTURE,
"stopped heartbeat on %s",
engine->name);
reset_engine(engine, rq);
}
rq->emitted_jiffies = jiffies;
@ -194,6 +211,25 @@ void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
}
void intel_gt_unpark_heartbeats(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, gt, id)
if (intel_engine_pm_is_awake(engine))
intel_engine_unpark_heartbeat(engine);
}
void intel_gt_park_heartbeats(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, gt, id)
intel_engine_park_heartbeat(engine);
}
void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
{
INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);

@ -7,6 +7,7 @@
#define INTEL_ENGINE_HEARTBEAT_H
struct intel_engine_cs;
struct intel_gt;
void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
@ -16,6 +17,9 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
void intel_gt_park_heartbeats(struct intel_gt *gt);
void intel_gt_unpark_heartbeats(struct intel_gt *gt);
int intel_engine_pulse(struct intel_engine_cs *engine);
int intel_engine_flush_barriers(struct intel_engine_cs *engine);

@ -10,6 +10,7 @@
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
@ -603,6 +604,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
return;
}
intel_gt_park_heartbeats(guc_to_gt(guc));
disable_submission(guc);
guc->interrupts.disable(guc);
@ -890,6 +892,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
atomic_set(&guc->outstanding_submission_g2h, 0);
enable_submission(guc);
intel_gt_unpark_heartbeats(guc_to_gt(guc));
}
/*
@ -1866,6 +1869,11 @@ static int guc_resume(struct intel_engine_cs *engine)
return 0;
}
static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
{
return !sched_engine->tasklet.callback;
}
static void guc_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = guc_submit_request;
@ -2016,6 +2024,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
return -ENOMEM;
guc->sched_engine->schedule = i915_schedule;
guc->sched_engine->disabled = guc_sched_engine_disabled;
guc->sched_engine->private_data = guc;
guc->sched_engine->destroy = guc_sched_engine_destroy;
tasklet_setup(&guc->sched_engine->tasklet,

@ -440,6 +440,11 @@ static void default_destroy(struct kref *kref)
kfree(sched_engine);
}
static bool default_disabled(struct i915_sched_engine *sched_engine)
{
return false;
}
struct i915_sched_engine *
i915_sched_engine_create(unsigned int subclass)
{
@ -454,6 +459,7 @@ i915_sched_engine_create(unsigned int subclass)
sched_engine->queue = RB_ROOT_CACHED;
sched_engine->queue_priority_hint = INT_MIN;
sched_engine->destroy = default_destroy;
sched_engine->disabled = default_disabled;
INIT_LIST_HEAD(&sched_engine->requests);
INIT_LIST_HEAD(&sched_engine->hold);

@ -96,4 +96,10 @@ void i915_request_show_with_schedule(struct drm_printer *m,
const char *prefix,
int indent);
static inline bool
i915_sched_engine_disabled(struct i915_sched_engine *sched_engine)
{
return sched_engine->disabled(sched_engine);
}
#endif /* _I915_SCHEDULER_H_ */

@ -168,6 +168,11 @@ struct i915_sched_engine {
*/
void (*destroy)(struct kref *kref);
/**
* @disabled: check if backend has disabled submission
*/
bool (*disabled)(struct i915_sched_engine *sched_engine);
/**
* @kick_backend: kick backend after a request's priority has changed
*/