diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4a34c4f62065..f7ba0935ed67 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1115,7 +1115,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6cfdc0f9f2b9..caac091b3eb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -230,9 +230,10 @@ static int __execlists_context_alloc(struct intel_context *ce, struct intel_engine_cs *engine); static void execlists_init_reg_state(u32 *reg_state, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring); + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close); static void mark_eio(struct i915_request *rq) { @@ -471,6 +472,411 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) return desc; } +static u32 *set_offsets(u32 *regs, + const u8 *data, + const struct intel_engine_cs *engine) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count)) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END() 0 +{ + const u32 base = engine->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + regs += *data++ & ~BIT(7); + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + if (INTEL_GEN(engine->i915) >= 11) + *regs |= MI_LRI_CS_MMIO; + regs++; + + GEM_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + *regs = base + (offset << 2); + regs += 2; + } while (--count); + } + + return regs; +} + +static const u8 gen8_xcs_offsets[] = { + NOP(1), + LRI(11, 0), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + + NOP(9), + LRI(9, 0), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, 0), + REG16(0x200), + REG(0x028), + + END(), +}; + +static const u8 gen9_xcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, POSTED), + REG16(0x200), + + NOP(13), + LRI(44, POSTED), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(), +}; + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, POSTED), + REG16(0x200), + REG16(0x204), + + NOP(11), + LRI(50, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(), +}; + +static const u8 gen8_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen11_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(1, POSTED), + REG(0x1b0), + + NOP(10), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(const struct intel_engine_cs *engine) +{ + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 11) + return gen11_rcs_offsets; + else + return gen8_rcs_offsets; + } else { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_xcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_xcs_offsets; + else + return gen8_xcs_offsets; + } +} + static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); @@ -654,7 +1060,7 @@ static u64 execlists_update_context(const struct i915_request *rq) struct intel_context *ce = rq->hw_context; u64 desc; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = + ce->lrc_reg_state[CTX_RING_TAIL] = intel_ring_set_tail(rq->ring, rq->tail); /* @@ -826,54 +1232,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - u32 base = engine->mmio_base; - - /* Refactor so that we only have one place that knows all the offsets! */ - GEM_WARN_ON(INTEL_GEN(engine->i915) >= 12); - - /* Must match execlists_init_reg_state()! */ - - /* Common part */ - regs[CTX_CONTEXT_CONTROL] = - i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); - regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); - regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); - regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); - regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); - - regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); - regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); - regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); - - regs[CTX_SECOND_BB_HEAD_U] = - i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); - regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); - regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); - - /* PPGTT part */ - regs[CTX_CTX_TIMESTAMP] = - i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); - - regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); - regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); - regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); - regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); - regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); - regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); - regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - - if (engine->class == RENDER_CLASS) { - regs[CTX_RCS_INDIRECT_CTX] = - i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); - regs[CTX_RCS_INDIRECT_CTX_OFFSET] = - i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); - regs[CTX_BB_PER_CTX_PTR] = - i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); - - regs[CTX_R_PWR_CLK_STATE] = - i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - } + set_offsets(regs, reg_offsets(engine), engine); } static bool virtual_matches(const struct virtual_engine *ve, @@ -1738,8 +2097,8 @@ static void execlists_context_unpin(struct intel_context *ce) } static void -__execlists_update_reg_state(struct intel_context *ce, - struct intel_engine_cs *engine) +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -1747,16 +2106,16 @@ __execlists_update_reg_state(struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD + 1] = ring->head; - regs[CTX_RING_TAIL + 1] = ring->tail; + regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD] = ring->head; + regs[CTX_RING_TAIL] = ring->tail; /* RPCS */ if (engine->class == RENDER_CLASS) { - regs[CTX_R_PWR_CLK_STATE + 1] = + regs[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); - i915_oa_init_reg_state(engine, ce, regs); + i915_oa_init_reg_state(ce, engine); } } @@ -2465,7 +2824,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, ce, engine, ce->ring); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); out_replay: GEM_TRACE("%s replay {head:%04x, tail:%04x\n", @@ -3243,7 +3602,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) return 0; } -static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) { u32 indirect_ctx_offset; @@ -3278,75 +3637,48 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) static void init_common_reg_state(u32 * const regs, - struct i915_ppgtt * const ppgtt, - struct intel_engine_cs *engine, - struct intel_ring *ring) + const struct intel_engine_cs *engine, + const struct intel_ring *ring) { - const u32 base = engine->mmio_base; - - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); - if (INTEL_GEN(engine->i915) < 11) { - regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + if (INTEL_GEN(engine->i915) < 11) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); - } - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), - RING_CTL_SIZE(ring->size) | RING_VALID); - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + + regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; + regs[CTX_BB_STATE] = RING_BB_PPGTT; } static void init_wa_bb_reg_state(u32 * const regs, - struct intel_engine_cs *engine, + const struct intel_engine_cs *engine, u32 pos_bb_per_ctx) { - struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - const u32 base = engine->mmio_base; - const u32 pos_indirect_ctx = pos_bb_per_ctx + 2; - const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2; + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, pos_indirect_ctx_offset, - RING_INDIRECT_CTX_OFFSET(base), 0); - if (wa_ctx->indirect_ctx.size) { - const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[pos_indirect_ctx + 1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[pos_indirect_ctx_offset + 1] = - intel_lr_indirect_ctx_offset(engine) << 6; - } - - CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0); if (wa_ctx->per_ctx.size) { const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - regs[pos_bb_per_ctx + 1] = + regs[pos_bb_per_ctx] = (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } + + if (wa_ctx->indirect_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[pos_bb_per_ctx + 2] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); + + regs[pos_bb_per_ctx + 4] = + intel_lr_indirect_ctx_offset(engine) << 6; + } } -static void init_ppgtt_reg_state(u32 *regs, u32 base, - struct i915_ppgtt *ppgtt) +static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) { - /* PDP values well be assigned later if needed */ - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); - if (i915_vm_is_4lvl(&ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and @@ -3369,91 +3701,11 @@ static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) return i915_vm_to_ppgtt(vm); } -static void gen8_init_reg_state(u32 * const regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt * const ppgtt = vm_alias(ce->vm); - const bool rcs = engine->class == RENDER_CLASS; - const u32 base = engine->mmio_base; - const u32 lri_base = - intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; - - regs[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED | - lri_base; - - init_common_reg_state(regs, ppgtt, engine, ring); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) - init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR); - - regs[CTX_LRI_HEADER_1] = - MI_LOAD_REGISTER_IMM(9) | - MI_LRI_FORCE_POSTED | - lri_base; - - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - - init_ppgtt_reg_state(regs, base, ppgtt); - - if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base; - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - } - - regs[CTX_END] = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - regs[CTX_END] |= BIT(0); -} - -static void gen12_init_reg_state(u32 * const regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm); - const bool rcs = engine->class == RENDER_CLASS; - const u32 base = engine->mmio_base; - const u32 lri_base = - intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; - - regs[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) | - MI_LRI_FORCE_POSTED | - lri_base; - - init_common_reg_state(regs, ppgtt, engine, ring); - - /* We want ctx_ptr for all engines to be set */ - init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR); - - regs[CTX_LRI_HEADER_1] = - MI_LOAD_REGISTER_IMM(9) | - MI_LRI_FORCE_POSTED | - lri_base; - - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - - init_ppgtt_reg_state(regs, base, ppgtt); - - if (rcs) { - regs[GEN12_CTX_LRI_HEADER_3] = - MI_LOAD_REGISTER_IMM(1) | lri_base; - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - - /* TODO: oa_init_reg_state ? */ - } -} - static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close) { /* * A context is actually a big batch buffer with several @@ -3465,10 +3717,21 @@ static void execlists_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - if (INTEL_GEN(engine->i915) >= 12) - gen12_init_reg_state(regs, ce, engine, ring); - else - gen8_init_reg_state(regs, ce, engine, ring); + u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); + + if (close) { /* Close the batch; used mainly by live_lrc_layout() */ + *bbe = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *bbe |= BIT(0); + } + + init_common_reg_state(regs, engine, ring); + init_ppgtt_reg_state(regs, vm_alias(ce->vm)); + + init_wa_bb_reg_state(regs, engine, + INTEL_GEN(engine->i915) >= 12 ? + GEN12_CTX_BB_PER_CTX_PTR : + CTX_BB_PER_CTX_PTR); } static int @@ -3477,6 +3740,7 @@ populate_lr_context(struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring) { + bool inhibit = true; void *vaddr; u32 *regs; int ret; @@ -3508,14 +3772,15 @@ populate_lr_context(struct intel_context *ce, memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + inhibit = false; } /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring); - if (!engine->default_state) - regs[CTX_CONTEXT_CONTROL + 1] |= + execlists_init_reg_state(regs, ce, engine, ring, inhibit); + if (inhibit) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); ret = 0; @@ -4212,7 +4477,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, ce, engine, ce->ring); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); } /* Rerun the request; its payload has been neutered (if guilty). */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 7e773e74a3fe..06ab0276e10e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -10,60 +10,40 @@ #include /* GEN8 to GEN11 Reg State Context */ -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_END 0x44 +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_BUFFER_START (0x08 + 1) +#define CTX_RING_BUFFER_CONTROL (0x0a + 1) +#define CTX_BB_STATE (0x10 + 1) +#define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_PDP3_UDW (0x24 + 1) +#define CTX_PDP3_LDW (0x26 + 1) +#define CTX_PDP2_UDW (0x28 + 1) +#define CTX_PDP2_LDW (0x2a + 1) +#define CTX_PDP1_UDW (0x2c + 1) +#define CTX_PDP1_LDW (0x2e + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) +#define CTX_R_PWR_CLK_STATE (0x42 + 1) #define GEN9_CTX_RING_MI_MODE 0x54 /* GEN12+ Reg State Context */ -#define GEN12_CTX_BB_PER_CTX_PTR 0x12 -#define GEN12_CTX_LRI_HEADER_3 0x41 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - u32 *reg_state__ = (reg_state); \ - const u32 pos__ = (pos); \ - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ - (reg_state__)[(pos__) + 1] = (val); \ -} while (0) +#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ - (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \ } while (0) #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = px_dma(ppgtt->pd); \ - (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \ } while (0) #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 93a871bfd95d..22ea2e747064 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2201,3 +2201,145 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) return i915_live_subtests(tests, i915); } + +static void hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static int live_lrc_layout(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 *mem; + int err; + + /* + * Check the registers offsets we use to create the initial reg state + * match the layout saved by HW. + */ + + mem = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!mem) + return -ENOMEM; + + err = 0; + for_each_engine(engine, gt->i915, id) { + u32 *hw, *lrc; + int dw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + lrc = memset(mem, 0, PAGE_SIZE); + execlists_init_reg_state(lrc, + engine->kernel_context, + engine, + engine->kernel_context->ring, + true); + + dw = 0; + do { + u32 lri = hw[dw]; + + if (lri == 0) { + dw++; + continue; + } + + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + pr_err("%s: Expected LRI command at dword %d, found %08x\n", + engine->name, dw, lri); + err = -EINVAL; + break; + } + + if (lrc[dw] != lri) { + pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", + engine->name, dw, lri, lrc[dw]); + err = -EINVAL; + break; + } + + lri &= 0x7f; + lri++; + dw++; + + while (lri) { + if (hw[dw] != lrc[dw]) { + pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", + engine->name, dw, hw[dw], lrc[dw]); + err = -EINVAL; + break; + } + + /* + * Skip over the actual register value as we + * expect that to differ. + */ + dw += 2; + lri -= 2; + } + } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + if (err) { + pr_info("%s: HW register image:\n", engine->name); + hexdump(hw, PAGE_SIZE); + + pr_info("%s: SW register image:\n", engine->name); + hexdump(lrc, PAGE_SIZE); + } + + i915_gem_object_unpin_map(engine->default_state); + if (err) + break; + } + + kfree(mem); + return err; +} + +int intel_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_lrc_layout), + }; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c1b764233761..524f6710b7aa 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1673,10 +1673,8 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, * in the case that the OA unit has been disabled. */ static void -gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, - struct intel_context *ce, - u32 *reg_state, - const struct i915_oa_config *oa_config) +gen8_update_reg_state_unlocked(const struct intel_context *ce, + const struct i915_perf_stream *stream) { struct drm_i915_private *i915 = ce->engine->i915; u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; @@ -1691,21 +1689,19 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, EU_PERF_CNTL5, EU_PERF_CNTL6, }; + u32 *reg_state = ce->lrc_reg_state; int i; - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, + reg_state[ctx_oactxctrl + 1] = (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME); + GEN8_OA_COUNTER_RESUME; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { - CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], - oa_config_flex_reg(oa_config, flex_regs[i])); - } + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + reg_state[ctx_flexeu0 + i * 2 + 1] = + oa_config_flex_reg(stream->oa_config, flex_regs[i]); - CTX_REG(reg_state, - CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - intel_sseu_make_rpcs(i915, &ce->sseu)); + reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu); } struct flex { @@ -1729,7 +1725,7 @@ gen8_store_flex(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; do { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = offset + (flex->offset + 1) * sizeof(u32); + *cs++ = offset + flex->offset * sizeof(u32); *cs++ = 0; *cs++ = flex->value; } while (flex++, --count); @@ -1863,7 +1859,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, struct drm_i915_private *i915 = stream->dev_priv; /* The MMIO offsets for Flex EU registers aren't contiguous */ const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) struct flex regs[] = { { GEN8_R_PWR_CLK_STATE, @@ -1871,7 +1867,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }, { GEN8_OACTXCONTROL, - i915->perf.ctx_oactxctrl_offset, + i915->perf.ctx_oactxctrl_offset + 1, ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME) @@ -2299,9 +2295,8 @@ err_config: return ret; } -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *regs) +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct i915_perf_stream *stream; @@ -2313,7 +2308,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, stream = engine->i915->perf.exclusive_stream; if (stream) - gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config); + gen8_update_reg_state_unlocked(ce, stream); } /** diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index a412b16d9ffc..f4fb311184b1 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -25,8 +25,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *reg_state); +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); #endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 1ccf0f731ac0..66d83c1390c1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,6 +15,7 @@ selftest(workarounds, intel_workarounds_live_selftests) selftest(gt_engines, intel_engine_live_selftests) selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) +selftest(gt_lrc, intel_lrc_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests)