drm/i915/gt: Resubmit the virtual engine on schedule-out

Having recognised that we do not change the sibling until we schedule
out, we can then defer the decision to resubmit the virtual engine from
the unwind of the active queue to scheduling out of the virtual context.
This improves our resilence in virtual engine scheduling, and should
eliminate the rare cases of gem_exec_balance failing.

By keeping the unwind order intact on the local engine, we can preserve
data dependency ordering while doing a preempt-to-busy pass until we
have determined the new ELSP. This means that if we try to timeslice
between a virtual engine and a data-dependent ordinary request, the pair
will maintain their relative ordering and we will avoid the
resubmission, cancelling the timeslicing until further change.

The dilemma though is that we then may end up in a situation where the
'demotion' of the virtual request to an ordinary request in the engine
queue results in filling the ELSP[] with virtual requests instead of
spreading the load across the engines. To compensate for this, we mark
each virtual request and refuse to resubmit a virtual request in the
secondary ELSP slots, thus forcing subsequent virtual requests to be
scheduled out after timeslicing. By delaying the decision until we
schedule out, we will avoid unnecessary resubmission.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2079
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2098
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201224135544.1713-7-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2020-12-24 13:55:42 +00:00
parent 66e40750d2
commit f81475bb5b
2 changed files with 83 additions and 53 deletions

View File

@ -388,38 +388,23 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
/*
* Push the request back into the queue for later resubmission.
* If this request is not native to this physical engine (i.e.
* it came from a virtual source), push it back onto the virtual
* engine so that it can be moved across onto another physical
* engine as load dictates.
*/
if (likely(rq->execution_mask == engine->mask)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
pl = i915_sched_lookup_priolist(engine, prio);
}
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
list_move(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
/* Check in case we rollback so far we wrap [size/2] */
if (intel_ring_direction(rq->ring,
rq->tail,
rq->ring->tail + 8) > 0)
rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
active = rq;
} else {
struct intel_engine_cs *owner = rq->context->engine;
WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
pl = i915_sched_lookup_priolist(engine, prio);
}
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
list_move(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
/* Check in case we rollback so far we wrap [size/2] */
if (intel_ring_direction(rq->ring,
rq->tail,
rq->ring->tail + 8) > 0)
rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
active = rq;
}
return active;
@ -578,9 +563,9 @@ static inline void execlists_schedule_in(struct i915_request *rq, int idx)
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
}
static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static void
resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct intel_engine_cs *engine = rq->engine;
/* Flush concurrent rcu iterators in signal_irq_work */
@ -598,6 +583,30 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
cpu_relax();
}
spin_lock_irq(&engine->active.lock);
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
WRITE_ONCE(rq->engine, &ve->base);
ve->base.submit_request(rq);
spin_unlock_irq(&engine->active.lock);
}
static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct intel_engine_cs *engine = rq->engine;
/*
* This engine is now too busy to run this virtual request, so
* see if we can find an alternative engine for it to execute on.
* Once a request has become bonded to this engine, we treat it the
* same as other native request.
*/
if (i915_request_in_priority_queue(rq) &&
rq->execution_mask != engine->mask)
resubmit_virtual_request(rq, ve);
if (READ_ONCE(ve->request))
tasklet_hi_schedule(&ve->base.execlists.tasklet);
}
@ -843,6 +852,20 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
sentinel = i915_request_has_sentinel(rq);
/*
* We want virtual requests to only be in the first slot so
* that they are never stuck behind a hog and can be immediately
* transferred onto the next idle engine.
*/
if (rq->execution_mask != engine->mask &&
port != execlists->pending) {
GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n",
engine->name,
ce->timeline->fence_context,
port - execlists->pending);
return false;
}
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
@ -1502,6 +1525,15 @@ unlock:
if (i915_request_has_sentinel(last))
goto done;
/*
* We avoid submitting virtual requests into
* the secondary ports so that we can migrate
* the request immediately to another engine
* rather than wait for the primary request.
*/
if (rq->execution_mask != engine->mask)
goto done;
/*
* If GVT overrides us we only ever submit
* port[0], leaving port[1] empty. Note that we
@ -3562,7 +3594,6 @@ unlock_engine:
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
struct i915_request *old;
unsigned long flags;
ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
@ -3573,28 +3604,27 @@ static void virtual_submit_request(struct i915_request *rq)
spin_lock_irqsave(&ve->base.active.lock, flags);
old = ve->request;
if (old) { /* background completion event from preempt-to-busy */
GEM_BUG_ON(!__i915_request_is_complete(old));
__i915_request_submit(old);
i915_request_put(old);
}
/* By the time we resubmit a request, it may be completed */
if (__i915_request_is_complete(rq)) {
__i915_request_submit(rq);
ve->base.execlists.queue_priority_hint = INT_MIN;
ve->request = NULL;
} else {
ve->base.execlists.queue_priority_hint = rq_prio(rq);
ve->request = i915_request_get(rq);
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
list_move_tail(&rq->sched.link, virtual_queue(ve));
tasklet_hi_schedule(&ve->base.execlists.tasklet);
goto unlock;
}
if (ve->request) { /* background completion from preempt-to-busy */
GEM_BUG_ON(!i915_request_completed(ve->request));
__i915_request_submit(ve->request);
i915_request_put(ve->request);
}
ve->base.execlists.queue_priority_hint = rq_prio(rq);
ve->request = i915_request_get(rq);
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
list_move_tail(&rq->sched.link, virtual_queue(ve));
tasklet_hi_schedule(&ve->base.execlists.tasklet);
unlock:
spin_unlock_irqrestore(&ve->base.active.lock, flags);
}

View File

@ -4566,7 +4566,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
spin_lock_irq(&engine->active.lock);
__unwind_incomplete_requests(engine);
spin_unlock_irq(&engine->active.lock);
GEM_BUG_ON(rq->engine != ve->engine);
GEM_BUG_ON(rq->engine != engine);
/* Reset the engine while keeping our active request on hold */
execlists_hold(engine, rq);