diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 6b15e3335dd6..49ad02c3720f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -68,10 +68,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; - i915_request_add_active_barriers(rq); + + /* Install ourselves as a preemption barrier */ + rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; __i915_request_commit(rq); + /* Release our exclusive hold on the engine */ + __intel_wakeref_defer_park(&engine->wakeref); + __i915_request_queue(rq, NULL); + return false; } diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index b02dea17dcab..21037a2e2038 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -16,18 +16,6 @@ enum { I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - /* - * Requests containing performance queries must not be preempted by - * another context. They get scheduled with their default priority and - * once they reach the execlist ports we ensure that they stick on the - * HW until finished by pretending that they have maximum priority, - * i.e. nothing can have higher priority and force us to usurp the - * active request. - */ - I915_PRIORITY_UNPREEMPTABLE = INT_MAX, - - I915_PRIORITY_INVALID = INT_MIN }; #define I915_USER_PRIORITY_SHIFT 2 @@ -39,6 +27,19 @@ enum { #define I915_PRIORITY_WAIT ((u8)BIT(0)) #define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1)) +/* Smallest priority value that cannot be bumped. */ +#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) + +/* + * Requests containing performance queries must not be preempted by + * another context. They get scheduled with their default priority and + * once they reach the execlist ports we ensure that they stick on the + * HW until finished by pretending that they have maximum priority, + * i.e. nothing can have higher priority and force us to usurp the + * active request. + */ +#define I915_PRIORITY_UNPREEMPTABLE INT_MAX + #define __NO_PREEMPTION (I915_PRIORITY_WAIT) struct i915_priolist { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 43175bada09e..4703aab3ae21 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) list_add(&ring->active_link, &rq->i915->gt.active_rings); rq->emitted_jiffies = jiffies; + return prev; +} + +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr) +{ /* * Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) */ local_bh_disable(); i915_sw_fence_commit(&rq->semaphore); - if (engine->schedule) { - struct i915_sched_attr attr = rq->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; - - engine->schedule(rq, &attr); - } + if (attr && rq->engine->schedule) + rq->engine->schedule(rq, attr); i915_sw_fence_commit(&rq->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ - - return prev; } void i915_request_add(struct i915_request *rq) { + struct i915_sched_attr attr = rq->gem_context->sched; struct i915_request *prev; lockdep_assert_held(&rq->timeline->mutex); @@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq) prev = __i915_request_commit(rq); + /* + * Boost actual workloads past semaphores! + * + * With semaphores we spin on one engine waiting for another, + * simply to reduce the latency of starting our work when + * the signaler completes. However, if there is any other + * work that we could be doing on this engine instead, that + * is better utilisation and will reduce the overall duration + * of the current work. To avoid PI boosting a semaphore + * far in the distance past over useful work, we keep a history + * of any semaphore use along our dependency chain. + */ + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + attr.priority |= I915_PRIORITY_NOSEMAPHORE; + + /* + * Boost priorities to new clients (new request flows). + * + * Allow interactive/synchronous clients to jump ahead of + * the bulk clients. (FQ_CODEL) + */ + if (list_empty(&rq->sched.signalers_list)) + attr.priority |= I915_PRIORITY_WAIT; + + __i915_request_queue(rq, &attr); + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 313df3c37158..fec1d5f17c94 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -251,6 +251,8 @@ struct i915_request * __must_check i915_request_create(struct intel_context *ce); struct i915_request *__i915_request_commit(struct i915_request *request); +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr); void i915_request_retire_upto(struct i915_request *rq); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 0bd452e851d8..7b84ebca2901 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) unsigned long flags; GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); - - if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) + if (READ_ONCE(rq->sched.attr.priority) & bump) return; spin_lock_irqsave(&schedule_lock, flags); diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index d4443e81c1c8..868cc78048d0 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf) if (!atomic_dec_and_test(&wf->count)) goto unlock; + /* ops->put() must reschedule its own release on error/deferral */ if (likely(!wf->ops->put(wf))) { rpm_put(wf); wake_up_var(&wf->wakeref); - } else { - /* ops->put() must schedule its own release on deferral */ - atomic_set_release(&wf->count, 1); } unlock: diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 535a3a12864b..5f0c972a80fb 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf) return READ_ONCE(wf->wakeref); } +/** + * __intel_wakeref_defer_park: Defer the current park callback + * @wf: the wakeref + */ +static inline void +__intel_wakeref_defer_park(struct intel_wakeref *wf) +{ + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); + atomic_set_release(&wf->count, 1); +} + /** * intel_wakeref_wait_for_idle: Wait until the wakeref is idle * @wf: the wakeref