diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index db630d27beba..0be4f489d3e1 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -224,6 +224,19 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) return job->q->vm ? BIT(8) : 0; } +static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) +{ + dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | + MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = MI_NOOP; + + return i; +} + /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, u64 batch_addr, u32 seqno) @@ -232,6 +245,8 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; + i = emit_copy_timestamp(lrc, dw, i); + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -283,6 +298,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); /* hsdes: 1809175790 */ @@ -332,6 +349,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; @@ -375,6 +394,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, { u32 dw[MAX_JOB_SIZE_DW], i = 0; + i = emit_copy_timestamp(lrc, dw, i); + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i);