From 11a2407ed5f017edcea436220ebba7c8619924f2 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 17 Mar 2023 21:05:30 +0530 Subject: [PATCH] drm/xe: Stop accepting value in xe_migrate_clear Although xe_migrate_clear() has a value argument, currently the driver is only passing 0 at all the places this function is invoked with the exception the kunit tests are using the parameter to validate this function with different values. xe_migrate_clear() is failing on platforms with link copy engines because xe_migrate_clear() via emit_clear() is using the blitter instruction XY_FAST_COLOR_BLT to clear the memory. But this instruction is not supported by link copy engine. So the solution is to use the alternate instruction MEM_SET when platform contains link copy engine. But MEM_SET instruction accepts only 8-bit value for setting whereas the value agrument of xe_migrate_clear() is 32-bit. So instead of spreading this limitation around all invocations of xe_migrate_clear() and causing more confusion, it was decided to not accept any value itself as driver does not really need this currently. All the kunit tests are adapted as per the new function prototype. This will be followed by a patch to add support for link copy engines. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 9 ++++ drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 18 +++---- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_migrate.c | 65 +++++++++++++++++++---- drivers/gpu/drm/xe/xe_migrate.h | 3 +- drivers/gpu/drm/xe/xe_pci.c | 3 ++ 8 files changed, 80 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 288576035ce3..e60372a82723 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -6,6 +6,8 @@ #ifndef _XE_GPU_COMMANDS_H_ #define _XE_GPU_COMMANDS_H_ +#include "regs/xe_reg_defs.h" + #define INSTR_CLIENT_SHIFT 29 #define INSTR_MI_CLIENT 0x0 #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) @@ -56,6 +58,13 @@ #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define BLT_DEPTH_32 (3<<24) +#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) +#define PVC_MEM_SET_CMD_LEN_DW 7 +#define PVC_MS_MATRIX REG_BIT(17) +#define PVC_MS_DATA_FIELD GENMASK(31, 24) +/* Bspec lists field as [6:0], but index alone is from [6:1] */ +#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1) + #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) #define PIPE_CONTROL_AMFS_FLUSH (1<<25) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index f03fb907b59a..3c60cbdf516c 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -32,7 +32,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0); + fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index e50b6ceb56e6..17829f878757 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -99,7 +99,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, struct kunit *test) { struct xe_device *xe = gt_to_xe(m->gt); - u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL; + u64 retval, expected = 0; bool big = bo->size >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; @@ -130,7 +130,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); - fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0); + fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource); if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : "Clearing sysmem small bo", test)) { retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); @@ -311,10 +311,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) bb->len = 0; bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); - expected = 0x12345678U; + expected = 0; emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, - expected, IS_DGFX(xe)); + IS_DGFX(xe)); run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", test); @@ -326,8 +326,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); - expected = 0x224488ff; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected); + expected = 0; + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -342,11 +342,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) test_copy(m, tiny, test); } - /* Clear a big bo with a fixed value */ + /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); - expected = 0x11223344U; - fence = xe_migrate_clear(m, big, big->ttm.resource, expected); + expected = 0; + fence = xe_migrate_clear(m, big, big->ttm.resource); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3ca28f84dff7..ba156a85460c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -686,7 +686,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } else { if (move_lacks_source) - fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0); + fence = xe_migrate_clear(gt->migrate, bo, new_mem); else fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); if (IS_ERR(fence)) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8d99450f0bf4..377a8979bc06 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -89,6 +89,8 @@ struct xe_device { bool has_4tile; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ bool has_range_tlb_invalidation; + /** @has_link_copy_engines: Whether the platform has link copy engines */ + bool has_link_copy_engine; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 3ee3d707a8ca..9102fa1d8759 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -747,14 +747,35 @@ err_sync: return fence; } -static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, - u32 size, u32 pitch, u32 value, bool is_vram) +static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch) { + u32 *cs = bb->cs + bb->len; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + u32 len = PVC_MEM_SET_CMD_LEN_DW; + + *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2); + *cs++ = pitch - 1; + *cs++ = (size / pitch) - 1; + *cs++ = pitch - 1; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs); + + XE_BUG_ON(cs - bb->cs != len + bb->len); + + bb->len += len; +} + +static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 len = XY_FAST_COLOR_BLT_DW; u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); - if (GRAPHICS_VERx100(gt->xe) < 1250) + if (GRAPHICS_VERx100(xe) < 1250) len = 11; *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | @@ -766,7 +787,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, *cs++ = lower_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs); *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; - *cs++ = value; + *cs++ = 0; *cs++ = 0; *cs++ = 0; *cs++ = 0; @@ -780,7 +801,30 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, } XE_BUG_ON(cs - bb->cs != len + bb->len); + bb->len += len; +} + +static u32 emit_clear_cmd_len(struct xe_device *xe) +{ + if (xe->info.has_link_copy_engine) + return PVC_MEM_SET_CMD_LEN_DW; + else + return XY_FAST_COLOR_BLT_DW; +} + +static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.has_link_copy_engine) { + emit_clear_link_copy(gt, bb, src_ofs, size, pitch); + + } else { + emit_clear_main_copy(gt, bb, src_ofs, size, pitch, + is_vram); + } return 0; } @@ -790,10 +834,9 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. - * @value: Clear value. * - * Clear the contents of @dst. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destionations. + * Clear the contents of @dst to zero. On flat CCS devices, + * the CCS metadata is cleared to zero as well on VRAM destinations. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -802,8 +845,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst, - u32 value) + struct ttm_resource *dst) { bool clear_vram = mem_type_is_vram(dst->mem_type); struct xe_gt *gt = m->gt; @@ -837,7 +879,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, clear_vram, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT); + emit_clear_cmd_len(xe), 0, + NUM_PT_PER_BLIT); if (xe_device_has_flat_ccs(xe) && clear_vram) batch_size += EMIT_COPY_CCS_DW; @@ -868,7 +911,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, update_idx = bb->len; emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, - value, clear_vram); + clear_vram); if (xe_device_has_flat_ccs(xe) && clear_vram) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_vram_ofs, false, clear_L0); diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index a569851db6f7..1ff6e0a90de5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -79,8 +79,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst, - u32 value); + struct ttm_resource *dst); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6dcefb8cc7c3..0a3b61f08d37 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -71,6 +71,7 @@ struct xe_device_desc { bool has_4tile; bool has_range_tlb_invalidation; bool has_asid; + bool has_link_copy_engine; }; #define PLATFORM(x) \ @@ -226,6 +227,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .vm_max_level = 4, .supports_usm = true, .has_asid = true, + .has_link_copy_engine = true, }; #define MTL_MEDIA_ENGINES \ @@ -413,6 +415,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.has_flat_ccs = desc->has_flat_ccs; xe->info.has_4tile = desc->has_4tile; xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; + xe->info.has_link_copy_engine = desc->has_link_copy_engine; spd = subplatform_get(xe, desc); xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;