diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 2c04357377ab..549ab343de80 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -177,8 +177,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) { struct xe_bo *bo, *external; - unsigned int bo_flags = XE_BO_CREATE_USER_BIT | - XE_BO_CREATE_VRAM_IF_DGFX(tile); + unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct xe_gt *__gt; int err, i, id; @@ -188,16 +187,19 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for (i = 0; i < 2; ++i) { xe_vm_lock(vm, false); - bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device, - bo_flags); + bo = xe_bo_create_user(xe, NULL, vm, 0x10000, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, + bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { KUNIT_FAIL(test, "bo create err=%pe\n", bo); break; } - external = xe_bo_create(xe, NULL, NULL, 0x10000, - ttm_bo_type_device, bo_flags); + external = xe_bo_create_user(xe, NULL, NULL, 0x10000, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 18c00bc03024..81f12422a587 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -116,8 +116,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) return; kunit_info(test, "running %s\n", __func__); - bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device, - XE_BO_CREATE_USER_BIT | params->mem_mask); + bo = xe_bo_create_user(xe, NULL, NULL, PAGE_SIZE, DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index e19337390812..dc1ad3b4dc2a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -332,7 +332,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; - enum ttm_caching caching = ttm_cached; + enum ttm_caching caching; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -346,13 +346,24 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching); + /* * Display scanout is always non-coherent with the CPU cache. * * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and * require a CPU:WC mapping. */ - if (bo->flags & XE_BO_SCANOUT_BIT || + if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) || (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE)) caching = ttm_write_combined; @@ -1198,10 +1209,11 @@ void xe_bo_free(struct xe_bo *bo) kfree(bo); } -struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - enum ttm_bo_type type, u32 flags) +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -1239,6 +1251,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->tile = tile; bo->size = size; bo->flags = flags; + bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; bo->props.preferred_gt = XE_BO_PROPS_INVALID; @@ -1354,11 +1367,11 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, return 0; } -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags) +static struct xe_bo * +__xe_bo_create_locked(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + u16 cpu_caching, enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = NULL; int err; @@ -1379,11 +1392,11 @@ xe_bo_create_locked_range(struct xe_device *xe, } } - bo = __xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, - vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_CREATE_USER_BIT ? - &vm->lru_bulk_move : NULL, size, - type, flags); + bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_CREATE_USER_BIT ? + &vm->lru_bulk_move : NULL, size, + cpu_caching, type, flags); if (IS_ERR(bo)) return bo; @@ -1423,11 +1436,35 @@ err_unlock_put_bo: return ERR_PTR(err); } +struct xe_bo * +xe_bo_create_locked_range(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) +{ + return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags); +} + struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags); + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags); +} + +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + enum ttm_bo_type type, + u32 flags) +{ + struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, + cpu_caching, type, + flags | XE_BO_CREATE_USER_BIT); + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + + return bo; } struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, @@ -1809,7 +1846,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_gem_create *args = data; struct xe_vm *vm = NULL; struct xe_bo *bo; - unsigned int bo_flags = XE_BO_CREATE_USER_BIT; + unsigned int bo_flags; u32 handle; int err; @@ -1840,6 +1877,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK)) return -EINVAL; + bo_flags = 0; if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) bo_flags |= XE_BO_DEFER_BACKING; @@ -1855,6 +1893,18 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= XE_BO_NEEDS_CPU_ACCESS; } + if (XE_IOCTL_DBG(xe, !args->cpu_caching || + args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK && + args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT && + args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return -EINVAL; + if (args->vm_id) { vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) @@ -1864,8 +1914,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, goto out_vm; } - bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, - bo_flags); + bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, + ttm_bo_type_device, bo_flags); if (vm) xe_vm_unlock(vm); @@ -2163,10 +2213,12 @@ int xe_bo_dumb_create(struct drm_file *file_priv, args->size = ALIGN(mul_u32_u32(args->pitch, args->height), page_size); - bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | - XE_BO_NEEDS_CPU_ACCESS); + bo = xe_bo_create_user(xe, NULL, NULL, args->size, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | + XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index f8bae873418d..6f183568f76d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -94,10 +94,11 @@ struct sg_table; struct xe_bo *xe_bo_alloc(void); void xe_bo_free(struct xe_bo *bo); -struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - enum ttm_bo_type type, u32 flags); +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags); struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, @@ -109,6 +110,11 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + enum ttm_bo_type type, + u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 4bff60996168..f71dbc518958 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -79,6 +79,11 @@ struct xe_bo { struct llist_node freed; /** @created: Whether the bo has passed initial creation */ bool created; + /** + * @cpu_caching: CPU caching mode. Currently only used for userspace + * objects. + */ + u16 cpu_caching; }; #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index cfde3be3b0dc..64ed303728fd 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -214,8 +214,9 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret; dma_resv_lock(resv, NULL); - bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, - ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + 0, /* Will require 1way or 2way for vm_bind */ + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); if (IS_ERR(bo)) { ret = PTR_ERR(bo); goto error; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 88f3aca02b08..ab7d1b26c773 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -541,8 +541,25 @@ struct drm_xe_gem_create { */ __u32 handle; + /** + * @cpu_caching: The CPU caching mode to select for this object. If + * mmaping the object the mode selected here will also be used. + * + * Supported values: + * + * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back + * caching. On iGPU this can't be used for scanout surfaces. Currently + * not allowed for objects placed in VRAM. + * + * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This + * is uncached. Scanout surfaces should likely use this. All objects + * that can be placed in VRAM must use this. + */ +#define DRM_XE_GEM_CPU_CACHING_WB 1 +#define DRM_XE_GEM_CPU_CACHING_WC 2 + __u16 cpu_caching; /** @pad: MBZ */ - __u32 pad; + __u16 pad; /** @reserved: Reserved */ __u64 reserved[2];