drm/i915/perf: Determine gen12 oa ctx offset at runtime
Some SKUs of same gen12 platform may have different oactxctrl offsets. For gen12, determine oactxctrl offsets at runtime. v2: (Lionel) - Move MI definitions to intel_gpu_commands.h - Ensure __find_reg_in_lri does read past context image size v3: (Ashutosh) - Drop unnecessary use of double underscores - fix find_reg_in_lri - Return error if oa context offset is U32_MAX - Error out if oa_ctx_ctrl_offset does not find offset v4: (Ashutosh) - Warn on odd MI LRI_LEN - Remove unnecessary check for valid_oactxctrl_offset - Drop valid_oactxctrl_offset macro v5: Drop unrelated comment Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221026222102.5526-5-umesh.nerlige.ramappa@intel.com
This commit is contained in:
parent
2d9da58521
commit
a5c3a3cbf0
@ -187,6 +187,10 @@
|
||||
#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
|
||||
#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
|
||||
|
||||
#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
|
||||
#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
|
||||
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
|
||||
|
||||
/*
|
||||
* 3D instructions used by the kernel
|
||||
*/
|
||||
|
@ -1356,6 +1356,74 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
|
||||
{
|
||||
u32 idx = *offset;
|
||||
u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
|
||||
bool found = false;
|
||||
|
||||
idx++;
|
||||
for (; idx < len; idx += 2) {
|
||||
if (state[idx] == reg) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*offset = idx;
|
||||
return found;
|
||||
}
|
||||
|
||||
static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
|
||||
{
|
||||
u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
|
||||
u32 *state = ce->lrc_reg_state;
|
||||
|
||||
for (offset = 0; offset < len; ) {
|
||||
if (IS_MI_LRI_CMD(state[offset])) {
|
||||
/*
|
||||
* We expect reg-value pairs in MI_LRI command, so
|
||||
* MI_LRI_LEN() should be even, if not, issue a warning.
|
||||
*/
|
||||
drm_WARN_ON(&ce->engine->i915->drm,
|
||||
MI_LRI_LEN(state[offset]) & 0x1);
|
||||
|
||||
if (oa_find_reg_in_lri(state, reg, &offset, len))
|
||||
break;
|
||||
} else {
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
return offset < len ? offset : U32_MAX;
|
||||
}
|
||||
|
||||
static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
|
||||
{
|
||||
i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
|
||||
struct i915_perf *perf = &ce->engine->i915->perf;
|
||||
u32 offset = perf->ctx_oactxctrl_offset;
|
||||
|
||||
/* Do this only once. Failure is stored as offset of U32_MAX */
|
||||
if (offset)
|
||||
goto exit;
|
||||
|
||||
offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
|
||||
perf->ctx_oactxctrl_offset = offset;
|
||||
|
||||
drm_dbg(&ce->engine->i915->drm,
|
||||
"%s oa ctx control at 0x%08x dword offset\n",
|
||||
ce->engine->name, offset);
|
||||
|
||||
exit:
|
||||
return offset && offset != U32_MAX ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
static bool engine_supports_mi_query(struct intel_engine_cs *engine)
|
||||
{
|
||||
return engine->class == RENDER_CLASS;
|
||||
}
|
||||
|
||||
/**
|
||||
* oa_get_render_ctx_id - determine and hold ctx hw id
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
@ -1375,6 +1443,21 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
|
||||
if (IS_ERR(ce))
|
||||
return PTR_ERR(ce);
|
||||
|
||||
if (engine_supports_mi_query(stream->engine)) {
|
||||
/*
|
||||
* We are enabling perf query here. If we don't find the context
|
||||
* offset here, just return an error.
|
||||
*/
|
||||
ret = set_oa_ctx_ctrl_offset(ce);
|
||||
if (ret) {
|
||||
intel_context_unpin(ce);
|
||||
drm_err(&stream->perf->i915->drm,
|
||||
"Enabling perf query failed for %s\n",
|
||||
stream->engine->name);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
switch (GRAPHICS_VER(ce->engine->i915)) {
|
||||
case 7: {
|
||||
/*
|
||||
@ -2406,10 +2489,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
|
||||
int err;
|
||||
struct intel_context *ce = stream->pinned_ctx;
|
||||
u32 format = stream->oa_buffer.format;
|
||||
u32 offset = stream->perf->ctx_oactxctrl_offset;
|
||||
struct flex regs_context[] = {
|
||||
{
|
||||
GEN8_OACTXCONTROL,
|
||||
stream->perf->ctx_oactxctrl_offset + 1,
|
||||
offset + 1,
|
||||
active ? GEN8_OA_COUNTER_RESUME : 0,
|
||||
},
|
||||
};
|
||||
@ -2434,12 +2518,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
|
||||
},
|
||||
};
|
||||
|
||||
/* Modify the context image of pinned context with regs_context*/
|
||||
/* Modify the context image of pinned context with regs_context */
|
||||
err = intel_context_lock_pinned(ce);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
|
||||
err = gen8_modify_context(ce, regs_context,
|
||||
ARRAY_SIZE(regs_context));
|
||||
intel_context_unlock_pinned(ce);
|
||||
if (err)
|
||||
return err;
|
||||
@ -2564,6 +2649,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
const struct i915_oa_config *oa_config,
|
||||
struct i915_active *active)
|
||||
{
|
||||
u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
|
||||
/* The MMIO offsets for Flex EU registers aren't contiguous */
|
||||
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
|
||||
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
|
||||
@ -2574,7 +2660,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
},
|
||||
{
|
||||
GEN8_OACTXCONTROL,
|
||||
stream->perf->ctx_oactxctrl_offset + 1,
|
||||
ctx_oactxctrl + 1,
|
||||
},
|
||||
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
|
||||
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
|
||||
@ -4543,6 +4629,37 @@ static void oa_init_supported_formats(struct i915_perf *perf)
|
||||
}
|
||||
}
|
||||
|
||||
static void i915_perf_init_info(struct drm_i915_private *i915)
|
||||
{
|
||||
struct i915_perf *perf = &i915->perf;
|
||||
|
||||
switch (GRAPHICS_VER(i915)) {
|
||||
case 8:
|
||||
perf->ctx_oactxctrl_offset = 0x120;
|
||||
perf->ctx_flexeu0_offset = 0x2ce;
|
||||
perf->gen8_valid_ctx_bit = BIT(25);
|
||||
break;
|
||||
case 9:
|
||||
perf->ctx_oactxctrl_offset = 0x128;
|
||||
perf->ctx_flexeu0_offset = 0x3de;
|
||||
perf->gen8_valid_ctx_bit = BIT(16);
|
||||
break;
|
||||
case 11:
|
||||
perf->ctx_oactxctrl_offset = 0x124;
|
||||
perf->ctx_flexeu0_offset = 0x78e;
|
||||
perf->gen8_valid_ctx_bit = BIT(16);
|
||||
break;
|
||||
case 12:
|
||||
/*
|
||||
* Calculate offset at runtime in oa_pin_context for gen12 and
|
||||
* cache the value in perf->ctx_oactxctrl_offset.
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
MISSING_CASE(GRAPHICS_VER(i915));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_init - initialize i915-perf state on module bind
|
||||
* @i915: i915 device instance
|
||||
@ -4581,6 +4698,7 @@ void i915_perf_init(struct drm_i915_private *i915)
|
||||
* execlist mode by default.
|
||||
*/
|
||||
perf->ops.read = gen8_oa_read;
|
||||
i915_perf_init_info(i915);
|
||||
|
||||
if (IS_GRAPHICS_VER(i915, 8, 9)) {
|
||||
perf->ops.is_valid_b_counter_reg =
|
||||
@ -4600,18 +4718,6 @@ void i915_perf_init(struct drm_i915_private *i915)
|
||||
perf->ops.enable_metric_set = gen8_enable_metric_set;
|
||||
perf->ops.disable_metric_set = gen8_disable_metric_set;
|
||||
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
|
||||
|
||||
if (GRAPHICS_VER(i915) == 8) {
|
||||
perf->ctx_oactxctrl_offset = 0x120;
|
||||
perf->ctx_flexeu0_offset = 0x2ce;
|
||||
|
||||
perf->gen8_valid_ctx_bit = BIT(25);
|
||||
} else {
|
||||
perf->ctx_oactxctrl_offset = 0x128;
|
||||
perf->ctx_flexeu0_offset = 0x3de;
|
||||
|
||||
perf->gen8_valid_ctx_bit = BIT(16);
|
||||
}
|
||||
} else if (GRAPHICS_VER(i915) == 11) {
|
||||
perf->ops.is_valid_b_counter_reg =
|
||||
gen7_is_valid_b_counter_addr;
|
||||
@ -4625,11 +4731,6 @@ void i915_perf_init(struct drm_i915_private *i915)
|
||||
perf->ops.enable_metric_set = gen8_enable_metric_set;
|
||||
perf->ops.disable_metric_set = gen11_disable_metric_set;
|
||||
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
|
||||
|
||||
perf->ctx_oactxctrl_offset = 0x124;
|
||||
perf->ctx_flexeu0_offset = 0x78e;
|
||||
|
||||
perf->gen8_valid_ctx_bit = BIT(16);
|
||||
} else if (GRAPHICS_VER(i915) == 12) {
|
||||
perf->ops.is_valid_b_counter_reg =
|
||||
gen12_is_valid_b_counter_addr;
|
||||
@ -4643,9 +4744,6 @@ void i915_perf_init(struct drm_i915_private *i915)
|
||||
perf->ops.enable_metric_set = gen12_enable_metric_set;
|
||||
perf->ops.disable_metric_set = gen12_disable_metric_set;
|
||||
perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
|
||||
|
||||
perf->ctx_flexeu0_offset = 0;
|
||||
perf->ctx_oactxctrl_offset = 0x144;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,7 +97,7 @@
|
||||
#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
|
||||
#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0)
|
||||
|
||||
#define GEN12_OACTXCONTROL _MMIO(0x2360)
|
||||
#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
|
||||
#define GEN12_OAR_OASTATUS _MMIO(0x2968)
|
||||
|
||||
/* Gen12 OAG unit */
|
||||
|
Loading…
x
Reference in New Issue
Block a user