|
|
|
@ -208,9 +208,9 @@
|
|
|
|
|
|
|
|
|
|
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
|
|
|
|
|
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
|
|
|
|
|
|
|
|
|
|
#define WA_TAIL_DWORDS 2
|
|
|
|
|
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
|
|
|
|
|
#define PREEMPT_ID 0x1
|
|
|
|
|
|
|
|
|
|
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
|
|
|
|
|
struct intel_engine_cs *engine);
|
|
|
|
@ -429,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
|
|
|
|
|
return ce->lrc_desc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void elsp_write(u64 desc, u32 __iomem *elsp)
|
|
|
|
|
{
|
|
|
|
|
writel(upper_32_bits(desc), elsp);
|
|
|
|
|
writel(lower_32_bits(desc), elsp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void execlists_submit_ports(struct intel_engine_cs *engine)
|
|
|
|
|
{
|
|
|
|
|
struct execlist_port *port = engine->execlists.port;
|
|
|
|
@ -454,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
|
|
|
|
|
desc = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
writel(upper_32_bits(desc), elsp);
|
|
|
|
|
writel(lower_32_bits(desc), elsp);
|
|
|
|
|
elsp_write(desc, elsp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -488,26 +493,43 @@ static void port_assign(struct execlist_port *port,
|
|
|
|
|
port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void inject_preempt_context(struct intel_engine_cs *engine)
|
|
|
|
|
{
|
|
|
|
|
struct intel_context *ce =
|
|
|
|
|
&engine->i915->preempt_context->engine[engine->id];
|
|
|
|
|
u32 __iomem *elsp =
|
|
|
|
|
engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
|
|
|
|
|
unsigned int n;
|
|
|
|
|
|
|
|
|
|
GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
|
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
|
|
|
|
|
|
|
|
|
|
memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
|
|
|
|
|
ce->ring->tail += WA_TAIL_BYTES;
|
|
|
|
|
ce->ring->tail &= (ce->ring->size - 1);
|
|
|
|
|
ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
|
|
|
|
|
|
|
|
|
|
for (n = execlists_num_ports(&engine->execlists); --n; )
|
|
|
|
|
elsp_write(0, elsp);
|
|
|
|
|
|
|
|
|
|
elsp_write(ce->lrc_desc, elsp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool can_preempt(struct intel_engine_cs *engine)
|
|
|
|
|
{
|
|
|
|
|
return INTEL_INFO(engine->i915)->has_logical_ring_preemption;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
|
|
|
{
|
|
|
|
|
struct drm_i915_gem_request *last;
|
|
|
|
|
struct intel_engine_execlists * const execlists = &engine->execlists;
|
|
|
|
|
struct execlist_port *port = execlists->port;
|
|
|
|
|
const struct execlist_port * const last_port =
|
|
|
|
|
&execlists->port[execlists->port_mask];
|
|
|
|
|
struct drm_i915_gem_request *last = port_request(port);
|
|
|
|
|
struct rb_node *rb;
|
|
|
|
|
bool submit = false;
|
|
|
|
|
|
|
|
|
|
last = port_request(port);
|
|
|
|
|
if (last)
|
|
|
|
|
/* WaIdleLiteRestore:bdw,skl
|
|
|
|
|
* Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
|
|
|
|
|
* as we resubmit the request. See gen8_emit_breadcrumb()
|
|
|
|
|
* for where we prepare the padding after the end of the
|
|
|
|
|
* request.
|
|
|
|
|
*/
|
|
|
|
|
last->tail = last->wa_tail;
|
|
|
|
|
|
|
|
|
|
/* Hardware submission is through 2 ports. Conceptually each port
|
|
|
|
|
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
|
|
|
|
|
* static for a context, and unique to each, so we only execute
|
|
|
|
@ -532,7 +554,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
|
|
|
spin_lock_irq(&engine->timeline->lock);
|
|
|
|
|
rb = execlists->first;
|
|
|
|
|
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
|
|
|
|
|
while (rb) {
|
|
|
|
|
if (!rb)
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
if (last) {
|
|
|
|
|
/*
|
|
|
|
|
* Don't resubmit or switch until all outstanding
|
|
|
|
|
* preemptions (lite-restore) are seen. Then we
|
|
|
|
|
* know the next preemption status we see corresponds
|
|
|
|
|
* to this ELSP update.
|
|
|
|
|
*/
|
|
|
|
|
if (port_count(&port[0]) > 1)
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
if (can_preempt(engine) &&
|
|
|
|
|
rb_entry(rb, struct i915_priolist, node)->priority >
|
|
|
|
|
max(last->priotree.priority, 0)) {
|
|
|
|
|
/*
|
|
|
|
|
* Switch to our empty preempt context so
|
|
|
|
|
* the state of the GPU is known (idle).
|
|
|
|
|
*/
|
|
|
|
|
inject_preempt_context(engine);
|
|
|
|
|
execlists->preempt = true;
|
|
|
|
|
goto unlock;
|
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
* In theory, we could coalesce more requests onto
|
|
|
|
|
* the second port (the first port is active, with
|
|
|
|
|
* no preemptions pending). However, that means we
|
|
|
|
|
* then have to deal with the possible lite-restore
|
|
|
|
|
* of the second port (as we submit the ELSP, there
|
|
|
|
|
* may be a context-switch) but also we may complete
|
|
|
|
|
* the resubmission before the context-switch. Ergo,
|
|
|
|
|
* coalescing onto the second port will cause a
|
|
|
|
|
* preemption event, but we cannot predict whether
|
|
|
|
|
* that will affect port[0] or port[1].
|
|
|
|
|
*
|
|
|
|
|
* If the second port is already active, we can wait
|
|
|
|
|
* until the next context-switch before contemplating
|
|
|
|
|
* new requests. The GPU will be busy and we should be
|
|
|
|
|
* able to resubmit the new ELSP before it idles,
|
|
|
|
|
* avoiding pipeline bubbles (momentary pauses where
|
|
|
|
|
* the driver is unable to keep up the supply of new
|
|
|
|
|
* work).
|
|
|
|
|
*/
|
|
|
|
|
if (port_count(&port[1]))
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
/* WaIdleLiteRestore:bdw,skl
|
|
|
|
|
* Apply the wa NOOPs to prevent
|
|
|
|
|
* ring:HEAD == req:TAIL as we resubmit the
|
|
|
|
|
* request. See gen8_emit_breadcrumb() for
|
|
|
|
|
* where we prepare the padding after the
|
|
|
|
|
* end of the request.
|
|
|
|
|
*/
|
|
|
|
|
last->tail = last->wa_tail;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
|
|
|
|
|
struct drm_i915_gem_request *rq, *rn;
|
|
|
|
|
|
|
|
|
@ -595,11 +675,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
|
|
|
INIT_LIST_HEAD(&p->requests);
|
|
|
|
|
if (p->priority != I915_PRIORITY_NORMAL)
|
|
|
|
|
kmem_cache_free(engine->i915->priorities, p);
|
|
|
|
|
}
|
|
|
|
|
} while (rb);
|
|
|
|
|
done:
|
|
|
|
|
execlists->first = rb;
|
|
|
|
|
if (submit)
|
|
|
|
|
port_assign(port, last);
|
|
|
|
|
unlock:
|
|
|
|
|
spin_unlock_irq(&engine->timeline->lock);
|
|
|
|
|
|
|
|
|
|
if (submit)
|
|
|
|
@ -680,13 +761,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
|
|
|
|
|
spin_unlock_irqrestore(&engine->timeline->lock, flags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
|
|
|
|
|
{
|
|
|
|
|
const struct execlist_port *port = engine->execlists.port;
|
|
|
|
|
|
|
|
|
|
return port_count(&port[0]) + port_count(&port[1]) < 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check the unread Context Status Buffers and manage the submission of new
|
|
|
|
|
* contexts to the ELSP accordingly.
|
|
|
|
@ -695,7 +769,7 @@ static void intel_lrc_irq_handler(unsigned long data)
|
|
|
|
|
{
|
|
|
|
|
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
|
|
|
|
|
struct intel_engine_execlists * const execlists = &engine->execlists;
|
|
|
|
|
struct execlist_port *port = execlists->port;
|
|
|
|
|
struct execlist_port * const port = execlists->port;
|
|
|
|
|
struct drm_i915_private *dev_priv = engine->i915;
|
|
|
|
|
|
|
|
|
|
/* We can skip acquiring intel_runtime_pm_get() here as it was taken
|
|
|
|
@ -780,6 +854,23 @@ static void intel_lrc_irq_handler(unsigned long data)
|
|
|
|
|
if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
|
|
|
|
|
buf[2*head + 1] == PREEMPT_ID) {
|
|
|
|
|
execlist_cancel_port_requests(execlists);
|
|
|
|
|
|
|
|
|
|
spin_lock_irq(&engine->timeline->lock);
|
|
|
|
|
unwind_incomplete_requests(engine);
|
|
|
|
|
spin_unlock_irq(&engine->timeline->lock);
|
|
|
|
|
|
|
|
|
|
GEM_BUG_ON(!execlists->preempt);
|
|
|
|
|
execlists->preempt = false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (status & GEN8_CTX_STATUS_PREEMPTED &&
|
|
|
|
|
execlists->preempt)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Check the context/desc id for this event matches */
|
|
|
|
|
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
|
|
|
|
|
|
|
|
|
@ -811,7 +902,7 @@ static void intel_lrc_irq_handler(unsigned long data)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (execlists_elsp_ready(engine))
|
|
|
|
|
if (!execlists->preempt)
|
|
|
|
|
execlists_dequeue(engine);
|
|
|
|
|
|
|
|
|
|
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
|
|
|
|
@ -824,7 +915,7 @@ static void insert_request(struct intel_engine_cs *engine,
|
|
|
|
|
struct i915_priolist *p = lookup_priolist(engine, pt, prio);
|
|
|
|
|
|
|
|
|
|
list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
|
|
|
|
|
if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
|
|
|
|
|
if (ptr_unmask_bits(p, 1))
|
|
|
|
|
tasklet_hi_schedule(&engine->execlists.irq_tasklet);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -954,8 +1045,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spin_unlock_irq(&engine->timeline->lock);
|
|
|
|
|
|
|
|
|
|
/* XXX Do we need to preempt to make room for us and our deps? */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct intel_ring *
|
|
|
|
@ -1151,6 +1240,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
|
|
|
|
|
i915_ggtt_offset(engine->scratch) +
|
|
|
|
|
2 * CACHELINE_BYTES);
|
|
|
|
|
|
|
|
|
|
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
|
|
|
|
|
|
|
|
|
|
/* Pad to end of cacheline */
|
|
|
|
|
while ((unsigned long)batch % CACHELINE_BYTES)
|
|
|
|
|
*batch++ = MI_NOOP;
|
|
|
|
@ -1166,6 +1257,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
|
|
|
|
|
|
|
|
|
|
static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
|
|
|
|
|
{
|
|
|
|
|
*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
|
|
|
|
|
|
|
|
|
|
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
|
|
|
|
|
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
|
|
|
|
|
|
|
|
|
@ -1211,6 +1304,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
|
|
|
|
|
*batch++ = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
|
|
|
|
|
|
|
|
|
|
/* Pad to end of cacheline */
|
|
|
|
|
while ((unsigned long)batch % CACHELINE_BYTES)
|
|
|
|
|
*batch++ = MI_NOOP;
|
|
|
|
@ -1364,6 +1459,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
|
|
|
|
|
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
|
|
|
|
|
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
|
|
|
|
|
execlists->csb_head = -1;
|
|
|
|
|
execlists->preempt = false;
|
|
|
|
|
|
|
|
|
|
/* After a GPU reset, we may have requests to replay */
|
|
|
|
|
if (!i915_modparams.enable_guc_submission && execlists->first)
|
|
|
|
@ -1659,7 +1755,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
|
|
|
|
|
*/
|
|
|
|
|
static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
|
|
|
|
|
{
|
|
|
|
|
*cs++ = MI_NOOP;
|
|
|
|
|
/* Ensure there's always at least one preemption point per-request. */
|
|
|
|
|
*cs++ = MI_ARB_CHECK;
|
|
|
|
|
*cs++ = MI_NOOP;
|
|
|
|
|
request->wa_tail = intel_ring_offset(request, cs);
|
|
|
|
|
}
|
|
|
|
@ -1680,7 +1777,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
|
|
|
|
|
|
|
|
|
|
gen8_emit_wa_tail(request, cs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
|
|
|
|
|
|
|
|
|
|
static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
|
|
|
|
@ -1708,7 +1804,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
|
|
|
|
|
|
|
|
|
|
gen8_emit_wa_tail(request, cs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
|
|
|
|
|
|
|
|
|
|
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
|
|
|
|
|