linux/drivers/gpu/drm/i915/intel_region_ttm.c

249 lines
7.0 KiB
C
Raw Normal View History

// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_range_manager.h>
#include "i915_drv.h"
#include "i915_scatterlist.h"
#include "i915_ttm_buddy_manager.h"
#include "intel_region_ttm.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */
/**
* DOC: TTM support structure
*
* The code in this file deals with setting up memory managers for TTM
* LMEM and MOCK regions and converting the output from
* the managers to struct sg_table, Basically providing the mapping from
* i915 GEM regions to TTM memory types and resource managers.
*/
/**
* intel_region_ttm_device_init - Initialize a TTM device
* @dev_priv: Pointer to an i915 device private structure.
*
* Return: 0 on success, negative error code on failure.
*/
int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
{
struct drm_device *drm = &dev_priv->drm;
return ttm_device_init(&dev_priv->bdev, i915_ttm_driver(),
drm->dev, drm->anon_inode->i_mapping,
drm->vma_offset_manager, false, false);
}
/**
* intel_region_ttm_device_fini - Finalize a TTM device
* @dev_priv: Pointer to an i915 device private structure.
*/
void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv)
{
ttm_device_fini(&dev_priv->bdev);
}
/*
* Map the i915 memory regions to TTM memory types. We use the
* driver-private types for now, reserving TTM_PL_VRAM for stolen
* memory and TTM_PL_TT for GGTT use if decided to implement this.
*/
int intel_region_to_ttm_type(const struct intel_memory_region *mem)
{
int type;
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
mem->type != INTEL_MEMORY_MOCK &&
mem->type != INTEL_MEMORY_SYSTEM);
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
return type;
}
/**
* intel_region_ttm_init - Initialize a memory region for TTM.
* @mem: The region to initialize.
*
* This function initializes a suitable TTM resource manager for the
* region, and if it's a LMEM region type, attaches it to the TTM
* device. MOCK regions are NOT attached to the TTM device, since we don't
* have one for the mock selftests.
*
* Return: 0 on success, negative error code on failure.
*/
int intel_region_ttm_init(struct intel_memory_region *mem)
{
struct ttm_device *bdev = &mem->i915->bdev;
int mem_type = intel_region_to_ttm_type(mem);
int ret;
ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
resource_size(&mem->region),
mem->io_size,
mem->min_page_size, PAGE_SIZE);
if (ret)
return ret;
mem->region_private = ttm_manager_type(bdev, mem_type);
return 0;
}
/**
* intel_region_ttm_fini - Finalize a TTM region.
* @mem: The memory region
*
* This functions takes down the TTM resource manager associated with the
* memory region, and if it was registered with the TTM device,
* removes that registration.
*/
int intel_region_ttm_fini(struct intel_memory_region *mem)
{
struct ttm_resource_manager *man = mem->region_private;
int ret = -EBUSY;
int count;
/*
* Put the region's move fences. This releases requests that
* may hold on to contexts and vms that may hold on to buffer
* objects placed in this region.
*/
if (man)
ttm_resource_manager_cleanup(man);
/* Flush objects from region. */
for (count = 0; count < 10; ++count) {
i915_gem_flush_free_objects(mem->i915);
mutex_lock(&mem->objects.lock);
if (list_empty(&mem->objects.list))
ret = 0;
mutex_unlock(&mem->objects.lock);
if (!ret)
break;
msleep(20);
flush_delayed_work(&mem->i915->bdev.wq);
}
/* If we leaked objects, Don't free the region causing use after free */
if (ret || !man)
return ret;
ret = i915_ttm_buddy_man_fini(&mem->i915->bdev,
intel_region_to_ttm_type(mem));
GEM_WARN_ON(ret);
mem->region_private = NULL;
return ret;
}
/**
drm/i915: Introduce refcounted sg-tables As we start to introduce asynchronous failsafe object migration, where we update the object state and then submit asynchronous commands we need to record what memory resources are actually used by various part of the command stream. Initially for three purposes: 1) Error capture. 2) Asynchronous migration error recovery. 3) Asynchronous vma bind. At the time where these happens, the object state may have been updated to be several migrations ahead and object sg-tables discarded. In order to make it possible to keep sg-tables with memory resource information for these operations, introduce refcounted sg-tables that aren't freed until the last user is done with them. The alternative would be to reference information sitting on the corresponding ttm_resources which typically have the same lifetime as these refcountes sg_tables, but that leads to other awkward constructs: Due to the design direction chosen for ttm resource managers that would lead to diamond-style inheritance, the LMEM resources may sometimes be prematurely freed, and finally the subclassed struct ttm_resource would have to bleed into the asynchronous vma bind code. v3: - Address a number of style issues (Matthew Auld) v4: - Dont check for st->sgl being NULL in i915_ttm_tt__shmem_unpopulate(), that should never happen. (Matthew Auld) v5: - Fix a Potential double-free (Matthew Auld) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211101122444.114607-1-thomas.hellstrom@linux.intel.com
2021-11-01 13:24:44 +01:00
* intel_region_ttm_resource_to_rsgt -
* Convert an opaque TTM resource manager resource to a refcounted sg_table.
* @mem: The memory region.
* @res: The resource manager resource obtained from the TTM resource manager.
*
* The gem backends typically use sg-tables for operations on the underlying
* io_memory. So provide a way for the backends to translate the
* nodes they are handed from TTM to sg-tables.
*
* Return: A malloced sg_table on success, an error pointer on failure.
*/
drm/i915: Introduce refcounted sg-tables As we start to introduce asynchronous failsafe object migration, where we update the object state and then submit asynchronous commands we need to record what memory resources are actually used by various part of the command stream. Initially for three purposes: 1) Error capture. 2) Asynchronous migration error recovery. 3) Asynchronous vma bind. At the time where these happens, the object state may have been updated to be several migrations ahead and object sg-tables discarded. In order to make it possible to keep sg-tables with memory resource information for these operations, introduce refcounted sg-tables that aren't freed until the last user is done with them. The alternative would be to reference information sitting on the corresponding ttm_resources which typically have the same lifetime as these refcountes sg_tables, but that leads to other awkward constructs: Due to the design direction chosen for ttm resource managers that would lead to diamond-style inheritance, the LMEM resources may sometimes be prematurely freed, and finally the subclassed struct ttm_resource would have to bleed into the asynchronous vma bind code. v3: - Address a number of style issues (Matthew Auld) v4: - Dont check for st->sgl being NULL in i915_ttm_tt__shmem_unpopulate(), that should never happen. (Matthew Auld) v5: - Fix a Potential double-free (Matthew Auld) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211101122444.114607-1-thomas.hellstrom@linux.intel.com
2021-11-01 13:24:44 +01:00
struct i915_refct_sgt *
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
struct ttm_resource *res)
{
if (mem->is_range_manager) {
struct ttm_range_mgr_node *range_node =
to_ttm_range_mgr_node(res);
drm/i915: Introduce refcounted sg-tables As we start to introduce asynchronous failsafe object migration, where we update the object state and then submit asynchronous commands we need to record what memory resources are actually used by various part of the command stream. Initially for three purposes: 1) Error capture. 2) Asynchronous migration error recovery. 3) Asynchronous vma bind. At the time where these happens, the object state may have been updated to be several migrations ahead and object sg-tables discarded. In order to make it possible to keep sg-tables with memory resource information for these operations, introduce refcounted sg-tables that aren't freed until the last user is done with them. The alternative would be to reference information sitting on the corresponding ttm_resources which typically have the same lifetime as these refcountes sg_tables, but that leads to other awkward constructs: Due to the design direction chosen for ttm resource managers that would lead to diamond-style inheritance, the LMEM resources may sometimes be prematurely freed, and finally the subclassed struct ttm_resource would have to bleed into the asynchronous vma bind code. v3: - Address a number of style issues (Matthew Auld) v4: - Dont check for st->sgl being NULL in i915_ttm_tt__shmem_unpopulate(), that should never happen. (Matthew Auld) v5: - Fix a Potential double-free (Matthew Auld) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211101122444.114607-1-thomas.hellstrom@linux.intel.com
2021-11-01 13:24:44 +01:00
return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
mem->region.start);
} else {
drm/i915: Introduce refcounted sg-tables As we start to introduce asynchronous failsafe object migration, where we update the object state and then submit asynchronous commands we need to record what memory resources are actually used by various part of the command stream. Initially for three purposes: 1) Error capture. 2) Asynchronous migration error recovery. 3) Asynchronous vma bind. At the time where these happens, the object state may have been updated to be several migrations ahead and object sg-tables discarded. In order to make it possible to keep sg-tables with memory resource information for these operations, introduce refcounted sg-tables that aren't freed until the last user is done with them. The alternative would be to reference information sitting on the corresponding ttm_resources which typically have the same lifetime as these refcountes sg_tables, but that leads to other awkward constructs: Due to the design direction chosen for ttm resource managers that would lead to diamond-style inheritance, the LMEM resources may sometimes be prematurely freed, and finally the subclassed struct ttm_resource would have to bleed into the asynchronous vma bind code. v3: - Address a number of style issues (Matthew Auld) v4: - Dont check for st->sgl being NULL in i915_ttm_tt__shmem_unpopulate(), that should never happen. (Matthew Auld) v5: - Fix a Potential double-free (Matthew Auld) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211101122444.114607-1-thomas.hellstrom@linux.intel.com
2021-11-01 13:24:44 +01:00
return i915_rsgt_from_buddy_resource(res, mem->region.start);
}
}
#ifdef CONFIG_DRM_I915_SELFTEST
/**
* intel_region_ttm_resource_alloc - Allocate memory resources from a region
* @mem: The memory region,
* @size: The requested size in bytes
* @flags: Allocation flags
*
* This functionality is provided only for callers that need to allocate
* memory from standalone TTM range managers, without the TTM eviction
* functionality. Don't use if you are not completely sure that's the
* case. The returned opaque node can be converted to an sg_table using
* intel_region_ttm_resource_to_st(), and can be freed using
* intel_region_ttm_resource_free().
*
* Return: A valid pointer on success, an error pointer on failure.
*/
struct ttm_resource *
intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
resource_size_t offset,
resource_size_t size,
unsigned int flags)
{
struct ttm_resource_manager *man = mem->region_private;
struct ttm_place place = {};
struct ttm_buffer_object mock_bo = {};
struct ttm_resource *res;
int ret;
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (offset != I915_BO_INVALID_OFFSET) {
place.fpfn = offset >> PAGE_SHIFT;
place.lpfn = place.fpfn + (size >> PAGE_SHIFT);
} else if (mem->io_size && mem->io_size < mem->total) {
if (flags & I915_BO_ALLOC_GPU_ONLY) {
place.flags |= TTM_PL_FLAG_TOPDOWN;
} else {
place.fpfn = 0;
place.lpfn = mem->io_size >> PAGE_SHIFT;
}
}
mock_bo.base.size = size;
drm/i915/ttm: fixup the mock_bo When running the mock selftests we currently blow up with: <6> [299.836278] i915: Running i915_gem_huge_page_mock_selftests/igt_mock_memory_region_huge_pages <1> [299.836356] BUG: kernel NULL pointer dereference, address: 00000000000000c8 <1> [299.836361] #PF: supervisor read access in kernel mode <1> [299.836364] #PF: error_code(0x0000) - not-present page <6> [299.836367] PGD 0 P4D 0 <4> [299.836369] Oops: 0000 [#1] PREEMPT SMP NOPTI <4> [299.836372] CPU: 1 PID: 1429 Comm: i915_selftest Tainted: G U 5.17.0-rc4-CI-CI_DRM_11227+ #1 <4> [299.836376] Hardware name: Intel(R) Client Systems NUC11TNHi5/NUC11TNBi5, BIOS TNTGL357.0042.2020.1221.1743 12/21/2020 <4> [299.836380] RIP: 0010:ttm_resource_init+0x57/0x90 [ttm] <4> [299.836392] RSP: 0018:ffffc90001e4f680 EFLAGS: 00010203 <4> [299.836395] RAX: 0000000000000000 RBX: ffffc90001e4f708 RCX: 0000000000000000 <4> [299.836398] RDX: ffff888116172528 RSI: ffffc90001e4f6f8 RDI: 0000000000000000 <4> [299.836401] RBP: ffffc90001e4f6f8 R08: 00000000000001b0 R09: ffff888116172528 <4> [299.836403] R10: 0000000000000001 R11: 00000000a4cb2e51 R12: ffffc90001e4fa90 <4> [299.836406] R13: ffff888116172528 R14: ffff888130d7f4b0 R15: ffff888130d7f400 <4> [299.836409] FS: 00007ff241684500(0000) GS:ffff88849fe80000(0000) knlGS:0000000000000000 <4> [299.836412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [299.836416] CR2: 00000000000000c8 CR3: 0000000107b80001 CR4: 0000000000770ee0 <4> [299.836418] PKRU: 55555554 <4> [299.836420] Call Trace: <4> [299.836422] <TASK> <4> [299.836423] i915_ttm_buddy_man_alloc+0x68/0x240 [i915] ttm_resource_init() now needs to access the bo->bdev, and also wants to store the bo reference. Try to keep both working. The mock_bo is a hack so we can interface directly with the ttm managers alloc() and free() hooks for our mock testing, without invoking other TTM features like eviction, moves, etc. v2: make sure we only touch res->bo if the alloc() returns successfully Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5123 Fixes: 0e05fc49c358 ("drm/ttm: add common accounting to the resource mgr v3") Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Christian König <christian.koenig@amd.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220221121103.2473831-1-matthew.auld@intel.com
2022-02-21 12:11:03 +00:00
mock_bo.bdev = &mem->i915->bdev;
ret = man->func->alloc(man, &mock_bo, &place, &res);
if (ret == -ENOSPC)
ret = -ENXIO;
drm/i915/ttm: fixup the mock_bo When running the mock selftests we currently blow up with: <6> [299.836278] i915: Running i915_gem_huge_page_mock_selftests/igt_mock_memory_region_huge_pages <1> [299.836356] BUG: kernel NULL pointer dereference, address: 00000000000000c8 <1> [299.836361] #PF: supervisor read access in kernel mode <1> [299.836364] #PF: error_code(0x0000) - not-present page <6> [299.836367] PGD 0 P4D 0 <4> [299.836369] Oops: 0000 [#1] PREEMPT SMP NOPTI <4> [299.836372] CPU: 1 PID: 1429 Comm: i915_selftest Tainted: G U 5.17.0-rc4-CI-CI_DRM_11227+ #1 <4> [299.836376] Hardware name: Intel(R) Client Systems NUC11TNHi5/NUC11TNBi5, BIOS TNTGL357.0042.2020.1221.1743 12/21/2020 <4> [299.836380] RIP: 0010:ttm_resource_init+0x57/0x90 [ttm] <4> [299.836392] RSP: 0018:ffffc90001e4f680 EFLAGS: 00010203 <4> [299.836395] RAX: 0000000000000000 RBX: ffffc90001e4f708 RCX: 0000000000000000 <4> [299.836398] RDX: ffff888116172528 RSI: ffffc90001e4f6f8 RDI: 0000000000000000 <4> [299.836401] RBP: ffffc90001e4f6f8 R08: 00000000000001b0 R09: ffff888116172528 <4> [299.836403] R10: 0000000000000001 R11: 00000000a4cb2e51 R12: ffffc90001e4fa90 <4> [299.836406] R13: ffff888116172528 R14: ffff888130d7f4b0 R15: ffff888130d7f400 <4> [299.836409] FS: 00007ff241684500(0000) GS:ffff88849fe80000(0000) knlGS:0000000000000000 <4> [299.836412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [299.836416] CR2: 00000000000000c8 CR3: 0000000107b80001 CR4: 0000000000770ee0 <4> [299.836418] PKRU: 55555554 <4> [299.836420] Call Trace: <4> [299.836422] <TASK> <4> [299.836423] i915_ttm_buddy_man_alloc+0x68/0x240 [i915] ttm_resource_init() now needs to access the bo->bdev, and also wants to store the bo reference. Try to keep both working. The mock_bo is a hack so we can interface directly with the ttm managers alloc() and free() hooks for our mock testing, without invoking other TTM features like eviction, moves, etc. v2: make sure we only touch res->bo if the alloc() returns successfully Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5123 Fixes: 0e05fc49c358 ("drm/ttm: add common accounting to the resource mgr v3") Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Christian König <christian.koenig@amd.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220221121103.2473831-1-matthew.auld@intel.com
2022-02-21 12:11:03 +00:00
if (!ret)
res->bo = NULL; /* Rather blow up, then some uaf */
return ret ? ERR_PTR(ret) : res;
}
#endif
/**
* intel_region_ttm_resource_free - Free a resource allocated from a resource manager
* @mem: The region the resource was allocated from.
* @res: The opaque resource representing an allocation.
*/
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
struct ttm_resource *res)
{
struct ttm_resource_manager *man = mem->region_private;
drm/i915/ttm: fixup the mock_bo When running the mock selftests we currently blow up with: <6> [299.836278] i915: Running i915_gem_huge_page_mock_selftests/igt_mock_memory_region_huge_pages <1> [299.836356] BUG: kernel NULL pointer dereference, address: 00000000000000c8 <1> [299.836361] #PF: supervisor read access in kernel mode <1> [299.836364] #PF: error_code(0x0000) - not-present page <6> [299.836367] PGD 0 P4D 0 <4> [299.836369] Oops: 0000 [#1] PREEMPT SMP NOPTI <4> [299.836372] CPU: 1 PID: 1429 Comm: i915_selftest Tainted: G U 5.17.0-rc4-CI-CI_DRM_11227+ #1 <4> [299.836376] Hardware name: Intel(R) Client Systems NUC11TNHi5/NUC11TNBi5, BIOS TNTGL357.0042.2020.1221.1743 12/21/2020 <4> [299.836380] RIP: 0010:ttm_resource_init+0x57/0x90 [ttm] <4> [299.836392] RSP: 0018:ffffc90001e4f680 EFLAGS: 00010203 <4> [299.836395] RAX: 0000000000000000 RBX: ffffc90001e4f708 RCX: 0000000000000000 <4> [299.836398] RDX: ffff888116172528 RSI: ffffc90001e4f6f8 RDI: 0000000000000000 <4> [299.836401] RBP: ffffc90001e4f6f8 R08: 00000000000001b0 R09: ffff888116172528 <4> [299.836403] R10: 0000000000000001 R11: 00000000a4cb2e51 R12: ffffc90001e4fa90 <4> [299.836406] R13: ffff888116172528 R14: ffff888130d7f4b0 R15: ffff888130d7f400 <4> [299.836409] FS: 00007ff241684500(0000) GS:ffff88849fe80000(0000) knlGS:0000000000000000 <4> [299.836412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [299.836416] CR2: 00000000000000c8 CR3: 0000000107b80001 CR4: 0000000000770ee0 <4> [299.836418] PKRU: 55555554 <4> [299.836420] Call Trace: <4> [299.836422] <TASK> <4> [299.836423] i915_ttm_buddy_man_alloc+0x68/0x240 [i915] ttm_resource_init() now needs to access the bo->bdev, and also wants to store the bo reference. Try to keep both working. The mock_bo is a hack so we can interface directly with the ttm managers alloc() and free() hooks for our mock testing, without invoking other TTM features like eviction, moves, etc. v2: make sure we only touch res->bo if the alloc() returns successfully Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5123 Fixes: 0e05fc49c358 ("drm/ttm: add common accounting to the resource mgr v3") Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Christian König <christian.koenig@amd.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220221121103.2473831-1-matthew.auld@intel.com
2022-02-21 12:11:03 +00:00
struct ttm_buffer_object mock_bo = {};
mock_bo.base.size = res->num_pages << PAGE_SHIFT;
mock_bo.bdev = &mem->i915->bdev;
res->bo = &mock_bo;
man->func->free(man, res);
}