diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f522a366223d..309bb2f1a1e8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, engine->get_seqno(engine), - i915_gem_request_completed(work->flip_queued_req, true)); + i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1fefa8c495f2..0ea69c5ecc8b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3287,24 +3287,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); return i915_seqno_passed(req->engine->get_seqno(req->engine), req->previous_seqno); } -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); return i915_seqno_passed(req->engine->get_seqno(req->engine), req->seqno); } +bool __i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); @@ -3983,6 +3986,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine, static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { + struct intel_engine_cs *engine = req->engine; + /* Ensure our read of the seqno is coherent so that we * do not "miss an interrupt" (i.e. if this is the last * request and the seqno write from the GPU is not visible @@ -3994,7 +3999,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (i915_gem_request_completed(req, false)) + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + + if (i915_gem_request_completed(req)) return true; /* We need to check whether any gpu reset happened in between diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9814572e346..2aef737fa761 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1375,9 +1375,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu) return this_cpu != cpu; } -static bool __i915_spin_request(struct drm_i915_gem_request *req, int state) +bool __i915_spin_request(const struct drm_i915_gem_request *req, + int state, unsigned long timeout_us) { - unsigned long timeout; unsigned cpu; /* When waiting for high frequency requests, e.g. during synchronous @@ -1390,19 +1390,15 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, int state) * takes to sleep on a request, on the order of a microsecond. */ - /* Only spin if we know the GPU is processing this request */ - if (!i915_gem_request_started(req, true)) - return false; - - timeout = local_clock_us(&cpu) + 5; + timeout_us += local_clock_us(&cpu); do { - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return true; if (signal_pending_state(state, current)) break; - if (busywait_stop(timeout, cpu)) + if (busywait_stop(timeout_us, cpu)) break; cpu_relax_lowlatency(); @@ -1445,7 +1441,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (list_empty(&req->list)) return 0; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return 0; timeout_remain = MAX_SCHEDULE_TIMEOUT; @@ -1470,7 +1466,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, gen6_rps_boost(req->i915, rps, req->emitted_jiffies); /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (__i915_spin_request(req, state)) + if (i915_spin_request(req, state, 5)) goto complete; set_current_state(state); @@ -1518,6 +1514,10 @@ wakeup: */ if (__i915_request_irq_complete(req)) break; + + /* Only spin if we know the GPU is processing this request */ + if (i915_spin_request(req, state, 2)) + break; } remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); @@ -3055,8 +3055,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; + /* We are called by the error capture and reset at a random + * point in time. In particular, note that neither is crucially + * ordered with an interrupt. After a hang, the GPU is dead and we + * assume that no more writes can happen (we waited long enough for + * all writes that were in transaction to be flushed) - adding an + * extra delay for a recent interrupt is pointless. Hence, we do + * not need an engine->irq_seqno_barrier() before the seqno reads. + */ list_for_each_entry(request, &engine->request_list, list) { - if (i915_gem_request_completed(request, false)) + if (i915_gem_request_completed(request)) continue; return request; @@ -3188,7 +3196,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) struct drm_i915_gem_request, list); - if (!i915_gem_request_completed(request, true)) + if (!i915_gem_request_completed(request)) break; i915_gem_request_retire(request); @@ -3212,7 +3220,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) } if (unlikely(engine->trace_irq_req && - i915_gem_request_completed(engine->trace_irq_req, true))) { + i915_gem_request_completed(engine->trace_irq_req))) { engine->irq_put(engine); i915_gem_request_assign(&engine->trace_irq_req, NULL); } @@ -3310,7 +3318,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (req == NULL) continue; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) i915_gem_object_retire__read(obj, i); } @@ -3418,7 +3426,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (to == from) return 0; - if (i915_gem_request_completed(from_req, true)) + if (i915_gem_request_completed(from_req)) return 0; if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 30c181a72202..88e899b46853 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11628,7 +11628,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv, vblank = intel_crtc_get_vblank_counter(intel_crtc); if (work->flip_ready_vblank == 0) { if (work->flip_queued_req && - !i915_gem_request_completed(work->flip_queued_req, true)) + !i915_gem_request_completed(work->flip_queued_req)) return false; work->flip_ready_vblank = vblank; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cfe850f8815a..82c2efdcdd89 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7767,7 +7767,7 @@ static void __intel_rps_boost_work(struct work_struct *work) struct request_boost *boost = container_of(work, struct request_boost, work); struct drm_i915_gem_request *req = boost->req; - if (!i915_gem_request_completed(req, true)) + if (!i915_gem_request_completed(req)) gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); i915_gem_request_unreference(req); @@ -7781,7 +7781,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) if (req == NULL || INTEL_GEN(req->i915) < 6) return; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return; boost = kmalloc(sizeof(*boost), GFP_ATOMIC);