diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 288576035ce3..e60372a82723 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -6,6 +6,8 @@ #ifndef _XE_GPU_COMMANDS_H_ #define _XE_GPU_COMMANDS_H_ +#include "regs/xe_reg_defs.h" + #define INSTR_CLIENT_SHIFT 29 #define INSTR_MI_CLIENT 0x0 #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) @@ -56,6 +58,13 @@ #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define BLT_DEPTH_32 (3<<24) +#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) +#define PVC_MEM_SET_CMD_LEN_DW 7 +#define PVC_MS_MATRIX REG_BIT(17) +#define PVC_MS_DATA_FIELD GENMASK(31, 24) +/* Bspec lists field as [6:0], but index alone is from [6:1] */ +#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1) + #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) #define PIPE_CONTROL_AMFS_FLUSH (1<<25) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index f03fb907b59a..3c60cbdf516c 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -32,7 +32,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0); + fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index e50b6ceb56e6..17829f878757 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -99,7 +99,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, struct kunit *test) { struct xe_device *xe = gt_to_xe(m->gt); - u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL; + u64 retval, expected = 0; bool big = bo->size >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; @@ -130,7 +130,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); - fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0); + fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource); if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : "Clearing sysmem small bo", test)) { retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); @@ -311,10 +311,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) bb->len = 0; bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); - expected = 0x12345678U; + expected = 0; emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, - expected, IS_DGFX(xe)); + IS_DGFX(xe)); run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", test); @@ -326,8 +326,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); - expected = 0x224488ff; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected); + expected = 0; + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -342,11 +342,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) test_copy(m, tiny, test); } - /* Clear a big bo with a fixed value */ + /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); - expected = 0x11223344U; - fence = xe_migrate_clear(m, big, big->ttm.resource, expected); + expected = 0; + fence = xe_migrate_clear(m, big, big->ttm.resource); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3ca28f84dff7..ba156a85460c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -686,7 +686,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } else { if (move_lacks_source) - fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0); + fence = xe_migrate_clear(gt->migrate, bo, new_mem); else fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); if (IS_ERR(fence)) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8d99450f0bf4..377a8979bc06 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -89,6 +89,8 @@ struct xe_device { bool has_4tile; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ bool has_range_tlb_invalidation; + /** @has_link_copy_engines: Whether the platform has link copy engines */ + bool has_link_copy_engine; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 3ee3d707a8ca..9102fa1d8759 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -747,14 +747,35 @@ err_sync: return fence; } -static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, - u32 size, u32 pitch, u32 value, bool is_vram) +static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch) { + u32 *cs = bb->cs + bb->len; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + u32 len = PVC_MEM_SET_CMD_LEN_DW; + + *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2); + *cs++ = pitch - 1; + *cs++ = (size / pitch) - 1; + *cs++ = pitch - 1; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs); + + XE_BUG_ON(cs - bb->cs != len + bb->len); + + bb->len += len; +} + +static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 len = XY_FAST_COLOR_BLT_DW; u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); - if (GRAPHICS_VERx100(gt->xe) < 1250) + if (GRAPHICS_VERx100(xe) < 1250) len = 11; *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | @@ -766,7 +787,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, *cs++ = lower_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs); *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; - *cs++ = value; + *cs++ = 0; *cs++ = 0; *cs++ = 0; *cs++ = 0; @@ -780,7 +801,30 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, } XE_BUG_ON(cs - bb->cs != len + bb->len); + bb->len += len; +} + +static u32 emit_clear_cmd_len(struct xe_device *xe) +{ + if (xe->info.has_link_copy_engine) + return PVC_MEM_SET_CMD_LEN_DW; + else + return XY_FAST_COLOR_BLT_DW; +} + +static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.has_link_copy_engine) { + emit_clear_link_copy(gt, bb, src_ofs, size, pitch); + + } else { + emit_clear_main_copy(gt, bb, src_ofs, size, pitch, + is_vram); + } return 0; } @@ -790,10 +834,9 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. - * @value: Clear value. * - * Clear the contents of @dst. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destionations. + * Clear the contents of @dst to zero. On flat CCS devices, + * the CCS metadata is cleared to zero as well on VRAM destinations. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -802,8 +845,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst, - u32 value) + struct ttm_resource *dst) { bool clear_vram = mem_type_is_vram(dst->mem_type); struct xe_gt *gt = m->gt; @@ -837,7 +879,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, clear_vram, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT); + emit_clear_cmd_len(xe), 0, + NUM_PT_PER_BLIT); if (xe_device_has_flat_ccs(xe) && clear_vram) batch_size += EMIT_COPY_CCS_DW; @@ -868,7 +911,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, update_idx = bb->len; emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, - value, clear_vram); + clear_vram); if (xe_device_has_flat_ccs(xe) && clear_vram) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_vram_ofs, false, clear_L0); diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index a569851db6f7..1ff6e0a90de5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -79,8 +79,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst, - u32 value); + struct ttm_resource *dst); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6dcefb8cc7c3..0a3b61f08d37 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -71,6 +71,7 @@ struct xe_device_desc { bool has_4tile; bool has_range_tlb_invalidation; bool has_asid; + bool has_link_copy_engine; }; #define PLATFORM(x) \ @@ -226,6 +227,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .vm_max_level = 4, .supports_usm = true, .has_asid = true, + .has_link_copy_engine = true, }; #define MTL_MEDIA_ENGINES \ @@ -413,6 +415,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.has_flat_ccs = desc->has_flat_ccs; xe->info.has_4tile = desc->has_4tile; xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; + xe->info.has_link_copy_engine = desc->has_link_copy_engine; spd = subplatform_get(xe, desc); xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;