From 681818fdb97de821cc1ee6b81c7a09f3ef8fc96d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 29 Mar 2023 10:33:32 -0700 Subject: [PATCH] drm/xe: Include hardware prefetch buffer in batchbuffer allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware prefetches several cachelines of data from batchbuffers before they are parsed. This prefetching only stops when the parser encounters an MI_BATCH_BUFFER_END instruction (or a nested MI_BATCH_BUFFER_START), so we must ensure that there is enough padding at the end of the batchbuffer to prevent the prefetcher from running past the end of the allocation and potentially faulting. Bspec: 45717 Link: https://lore.kernel.org/r/20230329173334.4015124-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 5b24018e2a80..f326f117ba3b 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -8,11 +8,26 @@ #include "regs/xe_gpu_commands.h" #include "xe_device.h" #include "xe_engine_types.h" +#include "xe_gt.h" #include "xe_hw_fence.h" #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_vm_types.h" +static int bb_prefetch(struct xe_gt *gt) +{ + struct xe_device *xe = gt->xe; + + if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + /* + * RCS and CCS require 1K, although other engines would be + * okay with 512. + */ + return SZ_1K; + else + return SZ_512; +} + struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) { struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); @@ -21,8 +36,14 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) if (!bb) return ERR_PTR(-ENOMEM); - bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : - >->usm.bb_pool, 4 * dwords + 4); + /* + * We need to allocate space for the requested number of dwords, + * one additional MI_BATCH_BUFFER_END dword, and additional buffer + * space to accomodate the platform-specific hardware prefetch + * requirements. + */ + bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : >->usm.bb_pool, + 4 * (dwords + 1) + bb_prefetch(gt)); if (IS_ERR(bb->bo)) { err = PTR_ERR(bb->bo); goto err;