drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more consistent if called concurrently, and only do the wedging and reporting once, curtailing any possible race where we start unwedging in the middle of a wedge. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190114210408.4561-2-chris@chris-wilson.co.uk
This commit is contained in:
parent
204474a6b8
commit
18bb2bccb5
@ -3187,10 +3187,15 @@ static void nop_submit_request(struct i915_request *request)
|
|||||||
|
|
||||||
void i915_gem_set_wedged(struct drm_i915_private *i915)
|
void i915_gem_set_wedged(struct drm_i915_private *i915)
|
||||||
{
|
{
|
||||||
|
struct i915_gpu_error *error = &i915->gpu_error;
|
||||||
struct intel_engine_cs *engine;
|
struct intel_engine_cs *engine;
|
||||||
enum intel_engine_id id;
|
enum intel_engine_id id;
|
||||||
|
|
||||||
GEM_TRACE("start\n");
|
mutex_lock(&error->wedge_mutex);
|
||||||
|
if (test_bit(I915_WEDGED, &error->flags)) {
|
||||||
|
mutex_unlock(&error->wedge_mutex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
|
if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
|
||||||
struct drm_printer p = drm_debug_printer(__func__);
|
struct drm_printer p = drm_debug_printer(__func__);
|
||||||
@ -3199,8 +3204,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
|
|||||||
intel_engine_dump(engine, &p, "%s\n", engine->name);
|
intel_engine_dump(engine, &p, "%s\n", engine->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
|
GEM_TRACE("start\n");
|
||||||
goto out;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, stop submission to hw, but do not yet complete requests by
|
* First, stop submission to hw, but do not yet complete requests by
|
||||||
@ -3236,23 +3240,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
|
|||||||
intel_engine_wakeup(engine);
|
intel_engine_wakeup(engine);
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
smp_mb__before_atomic();
|
||||||
GEM_TRACE("end\n");
|
set_bit(I915_WEDGED, &error->flags);
|
||||||
|
|
||||||
wake_up_all(&i915->gpu_error.reset_queue);
|
GEM_TRACE("end\n");
|
||||||
|
mutex_unlock(&error->wedge_mutex);
|
||||||
|
|
||||||
|
wake_up_all(&error->reset_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
||||||
{
|
{
|
||||||
|
struct i915_gpu_error *error = &i915->gpu_error;
|
||||||
struct i915_timeline *tl;
|
struct i915_timeline *tl;
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
lockdep_assert_held(&i915->drm.struct_mutex);
|
lockdep_assert_held(&i915->drm.struct_mutex);
|
||||||
if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
|
|
||||||
|
if (!test_bit(I915_WEDGED, &error->flags))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
|
if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
mutex_lock(&error->wedge_mutex);
|
||||||
|
|
||||||
GEM_TRACE("start\n");
|
GEM_TRACE("start\n");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3286,7 +3298,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
|||||||
*/
|
*/
|
||||||
if (dma_fence_default_wait(&rq->fence, true,
|
if (dma_fence_default_wait(&rq->fence, true,
|
||||||
MAX_SCHEDULE_TIMEOUT) < 0)
|
MAX_SCHEDULE_TIMEOUT) < 0)
|
||||||
return false;
|
goto unlock;
|
||||||
}
|
}
|
||||||
i915_retire_requests(i915);
|
i915_retire_requests(i915);
|
||||||
GEM_BUG_ON(i915->gt.active_requests);
|
GEM_BUG_ON(i915->gt.active_requests);
|
||||||
@ -3309,8 +3321,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
|||||||
|
|
||||||
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
|
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
|
||||||
clear_bit(I915_WEDGED, &i915->gpu_error.flags);
|
clear_bit(I915_WEDGED, &i915->gpu_error.flags);
|
||||||
|
ret = true;
|
||||||
|
unlock:
|
||||||
|
mutex_unlock(&i915->gpu_error.wedge_mutex);
|
||||||
|
|
||||||
return true;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -5706,6 +5721,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
|
|||||||
i915_gem_idle_work_handler);
|
i915_gem_idle_work_handler);
|
||||||
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
|
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
|
||||||
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
|
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
|
||||||
|
mutex_init(&dev_priv->gpu_error.wedge_mutex);
|
||||||
|
|
||||||
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
|
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
|
||||||
|
|
||||||
|
@ -271,8 +271,8 @@ struct i915_gpu_error {
|
|||||||
#define I915_RESET_BACKOFF 0
|
#define I915_RESET_BACKOFF 0
|
||||||
#define I915_RESET_HANDOFF 1
|
#define I915_RESET_HANDOFF 1
|
||||||
#define I915_RESET_MODESET 2
|
#define I915_RESET_MODESET 2
|
||||||
|
#define I915_RESET_ENGINE 3
|
||||||
#define I915_WEDGED (BITS_PER_LONG - 1)
|
#define I915_WEDGED (BITS_PER_LONG - 1)
|
||||||
#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
|
|
||||||
|
|
||||||
/** Number of times an engine has been reset */
|
/** Number of times an engine has been reset */
|
||||||
u32 reset_engine_count[I915_NUM_ENGINES];
|
u32 reset_engine_count[I915_NUM_ENGINES];
|
||||||
@ -283,6 +283,8 @@ struct i915_gpu_error {
|
|||||||
/** Reason for the current *global* reset */
|
/** Reason for the current *global* reset */
|
||||||
const char *reason;
|
const char *reason;
|
||||||
|
|
||||||
|
struct mutex wedge_mutex; /* serialises wedging/unwedging */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waitqueue to signal when a hang is detected. Used to for waiters
|
* Waitqueue to signal when a hang is detected. Used to for waiters
|
||||||
* to release the struct_mutex for the reset to procede.
|
* to release the struct_mutex for the reset to procede.
|
||||||
|
@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void)
|
|||||||
|
|
||||||
init_waitqueue_head(&i915->gpu_error.wait_queue);
|
init_waitqueue_head(&i915->gpu_error.wait_queue);
|
||||||
init_waitqueue_head(&i915->gpu_error.reset_queue);
|
init_waitqueue_head(&i915->gpu_error.reset_queue);
|
||||||
|
mutex_init(&i915->gpu_error.wedge_mutex);
|
||||||
|
|
||||||
i915->wq = alloc_ordered_workqueue("mock", 0);
|
i915->wq = alloc_ordered_workqueue("mock", 0);
|
||||||
if (!i915->wq)
|
if (!i915->wq)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user