linux/drivers/gpu/drm/i915/selftests/intel_memory_region.c

1238 lines
27 KiB
C
Raw Normal View History

// SPDX-License-Identifier: MIT
/*
* Copyright © 2019 Intel Corporation
*/
#include <linux/prime_numbers.h>
#include <linux/sort.h>
drm: move the buddy allocator from i915 into common drm Move the base i915 buddy allocator code into drm - Move i915_buddy.h to include/drm - Move i915_buddy.c to drm root folder - Rename "i915" string with "drm" string wherever applicable - Rename "I915" string with "DRM" string wherever applicable - Fix header file dependencies - Fix alignment issues - add Makefile support for drm buddy - export functions and write kerneldoc description - Remove i915 selftest config check condition as buddy selftest will be moved to drm selftest folder cleanup i915 buddy references in i915 driver module and replace with drm buddy v2: - include header file in alphabetical order(Thomas) - merged changes listed in the body section into a single patch to keep the build intact(Christian, Jani) v3: - make drm buddy a separate module(Thomas, Christian) v4: - Fix build error reported by kernel test robot <lkp@intel.com> - removed i915 buddy selftest from i915_mock_selftests.h to avoid build error - removed selftests/i915_buddy.c file as we create a new set of buddy test cases in drm/selftests folder v5: - Fix merge conflict issue v6: - replace drm_buddy_mm structure name as drm_buddy(Thomas, Christian) - replace drm_buddy_alloc() function name as drm_buddy_alloc_blocks() (Thomas) - replace drm_buddy_free() function name as drm_buddy_free_block() (Thomas) - export drm_buddy_free_block() function - fix multiple instances of KMEM_CACHE() entry v7: - fix warnings reported by kernel test robot <lkp@intel.com> - modify the license(Christian) v8: - fix warnings reported by kernel test robot <lkp@intel.com> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220118104504.2349-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König <christian.koenig@amd.com>
2022-01-18 16:14:59 +05:30
#include <drm/drm_buddy.h>
#include "../i915_selftest.h"
#include "mock_drm.h"
#include "mock_gem_device.h"
#include "mock_region.h"
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gem/selftests/igt_gem_utils.h"
#include "gem/selftests/mock_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "gt/intel_migrate.h"
#include "i915_memcpy.h"
#include "i915_ttm_buddy_manager.h"
#include "selftests/igt_flush_test.h"
#include "selftests/i915_random.h"
static void close_objects(struct intel_memory_region *mem,
struct list_head *objects)
{
struct drm_i915_private *i915 = mem->i915;
struct drm_i915_gem_object *obj, *on;
list_for_each_entry_safe(obj, on, objects, st_link) {
i915_gem_object_lock(obj, NULL);
if (i915_gem_object_has_pinned_pages(obj))
i915_gem_object_unpin_pages(obj);
/* No polluting the memory region between tests */
drm/i915: Switch obj->mm.lock lockdep annotations on its head The trouble with having a plain nesting flag for locks which do not naturally nest (unlike block devices and their partitions, which is the original motivation for nesting levels) is that lockdep will never spot a true deadlock if you screw up. This patch is an attempt at trying better, by highlighting a bit more of the actual nature of the nesting that's going on. Essentially we have two kinds of objects: - objects without pages allocated, which cannot be on any lru and are hence inaccessible to the shrinker. - objects which have pages allocated, which are on an lru, and which the shrinker can decide to throw out. For the former type of object, memory allocations while holding obj->mm.lock are permissible. For the latter they are not. And get/put_pages transitions between the two types of objects. This is still not entirely fool-proof since the rules might change. But as long as we run such a code ever at runtime lockdep should be able to observe the inconsistency and complain (like with any other lockdep class that we've split up in multiple classes). But there are a few clear benefits: - We can drop the nesting flag parameter from __i915_gem_object_put_pages, because that function by definition is never going allocate memory, and calling it on an object which doesn't have its pages allocated would be a bug. - We strictly catch more bugs, since there's not only one place in the entire tree which is annotated with the special class. All the other places that had explicit lockdep nesting annotations we're now going to leave up to lockdep again. - Specifically this catches stuff like calling get_pages from put_pages (which isn't really a good idea, if we can call get_pages so could the shrinker). I've seen patches do exactly that. Of course I fully expect CI will show me for the fool I am with this one here :-) v2: There can only be one (lockdep only has a cache for the first subclass, not for deeper ones, and we don't want to make these locks even slower). Still separate enums for better documentation. Real fix: don't forget about phys objs and pin_map(), and fix the shrinker to have the right annotations ... silly me. v3: Forgot usertptr too ... v4: Improve comment for pages_pin_count, drop the IMPORTANT comment and instead prime lockdep (Chris). v5: Appease checkpatch, no double empty lines (Chris) v6: More rebasing over selftest changes. Also somehow I forgot to push this patch :-/ Also format comments consistently while at it. v7: Fix typo in commit message (Joonas) Also drop the priming, with the lmem merge we now have allocations while holding the lmem lock, which wreaks the generic priming I've done in earlier patches. Should probably be resurrected when lmem is fixed. See commit 232a6ebae419193f5b8da4fa869ae5089ab105c2 Author: Matthew Auld <matthew.auld@intel.com> Date: Tue Oct 8 17:01:14 2019 +0100 drm/i915: introduce intel_memory_region I'm keeping the priming patch locally so it wont get lost. Cc: Matthew Auld <matthew.auld@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Tang, CQ" <cq.tang@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5) Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6) Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch [mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
__i915_gem_object_put_pages(obj);
i915_gem_object_unlock(obj);
list_del(&obj->st_link);
i915_gem_object_put(obj);
}
cond_resched();
i915_gem_drain_freed_objects(i915);
}
static int igt_mock_fill(void *arg)
{
struct intel_memory_region *mem = arg;
resource_size_t total = resource_size(&mem->region);
resource_size_t page_size;
resource_size_t rem;
unsigned long max_pages;
unsigned long page_num;
LIST_HEAD(objects);
int err = 0;
page_size = PAGE_SIZE;
max_pages = div64_u64(total, page_size);
rem = total;
for_each_prime_number_from(page_num, 1, max_pages) {
resource_size_t size = page_num * page_size;
struct drm_i915_gem_object *obj;
obj = i915_gem_object_create_region(mem, size, 0, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
break;
}
err = i915_gem_object_pin_pages_unlocked(obj);
if (err) {
i915_gem_object_put(obj);
break;
}
list_add(&obj->st_link, &objects);
rem -= size;
}
if (err == -ENOMEM)
err = 0;
if (err == -ENXIO) {
if (page_num * page_size <= rem) {
pr_err("%s failed, space still left in region\n",
__func__);
err = -EINVAL;
} else {
err = 0;
}
}
close_objects(mem, &objects);
return err;
}
static struct drm_i915_gem_object *
igt_object_create(struct intel_memory_region *mem,
struct list_head *objects,
u64 size,
unsigned int flags)
{
struct drm_i915_gem_object *obj;
int err;
obj = i915_gem_object_create_region(mem, size, 0, flags);
if (IS_ERR(obj))
return obj;
err = i915_gem_object_pin_pages_unlocked(obj);
if (err)
goto put;
list_add(&obj->st_link, objects);
return obj;
put:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
static void igt_object_release(struct drm_i915_gem_object *obj)
{
i915_gem_object_lock(obj, NULL);
i915_gem_object_unpin_pages(obj);
drm/i915: Switch obj->mm.lock lockdep annotations on its head The trouble with having a plain nesting flag for locks which do not naturally nest (unlike block devices and their partitions, which is the original motivation for nesting levels) is that lockdep will never spot a true deadlock if you screw up. This patch is an attempt at trying better, by highlighting a bit more of the actual nature of the nesting that's going on. Essentially we have two kinds of objects: - objects without pages allocated, which cannot be on any lru and are hence inaccessible to the shrinker. - objects which have pages allocated, which are on an lru, and which the shrinker can decide to throw out. For the former type of object, memory allocations while holding obj->mm.lock are permissible. For the latter they are not. And get/put_pages transitions between the two types of objects. This is still not entirely fool-proof since the rules might change. But as long as we run such a code ever at runtime lockdep should be able to observe the inconsistency and complain (like with any other lockdep class that we've split up in multiple classes). But there are a few clear benefits: - We can drop the nesting flag parameter from __i915_gem_object_put_pages, because that function by definition is never going allocate memory, and calling it on an object which doesn't have its pages allocated would be a bug. - We strictly catch more bugs, since there's not only one place in the entire tree which is annotated with the special class. All the other places that had explicit lockdep nesting annotations we're now going to leave up to lockdep again. - Specifically this catches stuff like calling get_pages from put_pages (which isn't really a good idea, if we can call get_pages so could the shrinker). I've seen patches do exactly that. Of course I fully expect CI will show me for the fool I am with this one here :-) v2: There can only be one (lockdep only has a cache for the first subclass, not for deeper ones, and we don't want to make these locks even slower). Still separate enums for better documentation. Real fix: don't forget about phys objs and pin_map(), and fix the shrinker to have the right annotations ... silly me. v3: Forgot usertptr too ... v4: Improve comment for pages_pin_count, drop the IMPORTANT comment and instead prime lockdep (Chris). v5: Appease checkpatch, no double empty lines (Chris) v6: More rebasing over selftest changes. Also somehow I forgot to push this patch :-/ Also format comments consistently while at it. v7: Fix typo in commit message (Joonas) Also drop the priming, with the lmem merge we now have allocations while holding the lmem lock, which wreaks the generic priming I've done in earlier patches. Should probably be resurrected when lmem is fixed. See commit 232a6ebae419193f5b8da4fa869ae5089ab105c2 Author: Matthew Auld <matthew.auld@intel.com> Date: Tue Oct 8 17:01:14 2019 +0100 drm/i915: introduce intel_memory_region I'm keeping the priming patch locally so it wont get lost. Cc: Matthew Auld <matthew.auld@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Tang, CQ" <cq.tang@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5) Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6) Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch [mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
__i915_gem_object_put_pages(obj);
i915_gem_object_unlock(obj);
list_del(&obj->st_link);
i915_gem_object_put(obj);
}
static bool is_contiguous(struct drm_i915_gem_object *obj)
{
struct scatterlist *sg;
dma_addr_t addr = -1;
for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
if (addr != -1 && sg_dma_address(sg) != addr)
return false;
addr = sg_dma_address(sg) + sg_dma_len(sg);
}
return true;
}
static int igt_mock_reserve(void *arg)
{
struct intel_memory_region *mem = arg;
struct drm_i915_private *i915 = mem->i915;
resource_size_t avail = resource_size(&mem->region);
struct drm_i915_gem_object *obj;
const u32 chunk_size = SZ_32M;
u32 i, offset, count, *order;
u64 allocated, cur_avail;
I915_RND_STATE(prng);
LIST_HEAD(objects);
int err = 0;
count = avail / chunk_size;
order = i915_random_order(count, &prng);
if (!order)
return 0;
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
if (IS_ERR(mem)) {
pr_err("failed to create memory region\n");
err = PTR_ERR(mem);
goto out_free_order;
}
/* Reserve a bunch of ranges within the region */
for (i = 0; i < count; ++i) {
u64 start = order[i] * chunk_size;
u64 size = i915_prandom_u32_max_state(chunk_size, &prng);
/* Allow for some really big holes */
if (!size)
continue;
size = round_up(size, PAGE_SIZE);
offset = igt_random_offset(&prng, 0, chunk_size, size,
PAGE_SIZE);
err = intel_memory_region_reserve(mem, start + offset, size);
if (err) {
pr_err("%s failed to reserve range", __func__);
goto out_close;
}
/* XXX: maybe sanity check the block range here? */
avail -= size;
}
/* Try to see if we can allocate from the remaining space */
allocated = 0;
cur_avail = avail;
do {
u32 size = i915_prandom_u32_max_state(cur_avail, &prng);
size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE);
obj = igt_object_create(mem, &objects, size, 0);
if (IS_ERR(obj)) {
if (PTR_ERR(obj) == -ENXIO)
break;
err = PTR_ERR(obj);
goto out_close;
}
cur_avail -= size;
allocated += size;
} while (1);
if (allocated != avail) {
pr_err("%s mismatch between allocation and free space", __func__);
err = -EINVAL;
}
out_close:
close_objects(mem, &objects);
intel_memory_region_destroy(mem);
out_free_order:
kfree(order);
return err;
}
static int igt_mock_contiguous(void *arg)
{
struct intel_memory_region *mem = arg;
struct drm_i915_gem_object *obj;
unsigned long n_objects;
LIST_HEAD(objects);
LIST_HEAD(holes);
I915_RND_STATE(prng);
resource_size_t total;
resource_size_t min;
u64 target;
int err = 0;
total = resource_size(&mem->region);
/* Min size */
obj = igt_object_create(mem, &objects, PAGE_SIZE,
I915_BO_ALLOC_CONTIGUOUS);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (!is_contiguous(obj)) {
pr_err("%s min object spans disjoint sg entries\n", __func__);
err = -EINVAL;
goto err_close_objects;
}
igt_object_release(obj);
/* Max size */
obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (!is_contiguous(obj)) {
pr_err("%s max object spans disjoint sg entries\n", __func__);
err = -EINVAL;
goto err_close_objects;
}
igt_object_release(obj);
/* Internal fragmentation should not bleed into the object size */
target = i915_prandom_u64_state(&prng);
div64_u64_rem(target, total, &target);
target = round_up(target, PAGE_SIZE);
target = max_t(u64, PAGE_SIZE, target);
obj = igt_object_create(mem, &objects, target,
I915_BO_ALLOC_CONTIGUOUS);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (obj->base.size != target) {
pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
obj->base.size, target);
err = -EINVAL;
goto err_close_objects;
}
if (!is_contiguous(obj)) {
pr_err("%s object spans disjoint sg entries\n", __func__);
err = -EINVAL;
goto err_close_objects;
}
igt_object_release(obj);
/*
* Try to fragment the address space, such that half of it is free, but
* the max contiguous block size is SZ_64K.
*/
target = SZ_64K;
n_objects = div64_u64(total, target);
while (n_objects--) {
struct list_head *list;
if (n_objects % 2)
list = &holes;
else
list = &objects;
obj = igt_object_create(mem, list, target,
I915_BO_ALLOC_CONTIGUOUS);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_close_objects;
}
}
close_objects(mem, &holes);
min = target;
target = total >> 1;
/* Make sure we can still allocate all the fragmented space */
obj = igt_object_create(mem, &objects, target, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_close_objects;
}
igt_object_release(obj);
/*
* Even though we have enough free space, we don't have a big enough
* contiguous block. Make sure that holds true.
*/
do {
bool should_fail = target > min;
obj = igt_object_create(mem, &objects, target,
I915_BO_ALLOC_CONTIGUOUS);
if (should_fail != IS_ERR(obj)) {
pr_err("%s target allocation(%llx) mismatch\n",
__func__, target);
err = -EINVAL;
goto err_close_objects;
}
target >>= 1;
} while (target >= PAGE_SIZE);
err_close_objects:
list_splice_tail(&holes, &objects);
close_objects(mem, &objects);
return err;
}
static int igt_mock_splintered_region(void *arg)
{
struct intel_memory_region *mem = arg;
struct drm_i915_private *i915 = mem->i915;
struct i915_ttm_buddy_resource *res;
struct drm_i915_gem_object *obj;
drm: move the buddy allocator from i915 into common drm Move the base i915 buddy allocator code into drm - Move i915_buddy.h to include/drm - Move i915_buddy.c to drm root folder - Rename "i915" string with "drm" string wherever applicable - Rename "I915" string with "DRM" string wherever applicable - Fix header file dependencies - Fix alignment issues - add Makefile support for drm buddy - export functions and write kerneldoc description - Remove i915 selftest config check condition as buddy selftest will be moved to drm selftest folder cleanup i915 buddy references in i915 driver module and replace with drm buddy v2: - include header file in alphabetical order(Thomas) - merged changes listed in the body section into a single patch to keep the build intact(Christian, Jani) v3: - make drm buddy a separate module(Thomas, Christian) v4: - Fix build error reported by kernel test robot <lkp@intel.com> - removed i915 buddy selftest from i915_mock_selftests.h to avoid build error - removed selftests/i915_buddy.c file as we create a new set of buddy test cases in drm/selftests folder v5: - Fix merge conflict issue v6: - replace drm_buddy_mm structure name as drm_buddy(Thomas, Christian) - replace drm_buddy_alloc() function name as drm_buddy_alloc_blocks() (Thomas) - replace drm_buddy_free() function name as drm_buddy_free_block() (Thomas) - export drm_buddy_free_block() function - fix multiple instances of KMEM_CACHE() entry v7: - fix warnings reported by kernel test robot <lkp@intel.com> - modify the license(Christian) v8: - fix warnings reported by kernel test robot <lkp@intel.com> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220118104504.2349-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König <christian.koenig@amd.com>
2022-01-18 16:14:59 +05:30
struct drm_buddy *mm;
unsigned int expected_order;
LIST_HEAD(objects);
u64 size;
int err = 0;
/*
* Sanity check we can still allocate everything even if the
* mm.max_order != mm.size. i.e our starting address space size is not a
* power-of-two.
*/
size = (SZ_4G - 1) & PAGE_MASK;
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
if (IS_ERR(mem))
return PTR_ERR(mem);
obj = igt_object_create(mem, &objects, size, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto out_close;
}
res = to_ttm_buddy_resource(obj->mm.res);
mm = res->mm;
if (mm->size != size) {
pr_err("%s size mismatch(%llu != %llu)\n",
__func__, mm->size, size);
err = -EINVAL;
goto out_put;
}
expected_order = get_order(rounddown_pow_of_two(size));
if (mm->max_order != expected_order) {
pr_err("%s order mismatch(%u != %u)\n",
__func__, mm->max_order, expected_order);
err = -EINVAL;
goto out_put;
}
close_objects(mem, &objects);
/*
* While we should be able allocate everything without any flag
* restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are
* actually limited to the largest power-of-two for the region size i.e
* max_order, due to the inner workings of the buddy allocator. So make
* sure that does indeed hold true.
*/
obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS);
if (!IS_ERR(obj)) {
pr_err("%s too large contiguous allocation was not rejected\n",
__func__);
err = -EINVAL;
goto out_close;
}
obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size),
I915_BO_ALLOC_CONTIGUOUS);
if (IS_ERR(obj)) {
pr_err("%s largest possible contiguous allocation failed\n",
__func__);
err = PTR_ERR(obj);
goto out_close;
}
out_close:
close_objects(mem, &objects);
out_put:
intel_memory_region_destroy(mem);
return err;
}
#ifndef SZ_8G
#define SZ_8G BIT_ULL(33)
#endif
static int igt_mock_max_segment(void *arg)
{
const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE);
struct intel_memory_region *mem = arg;
struct drm_i915_private *i915 = mem->i915;
struct i915_ttm_buddy_resource *res;
struct drm_i915_gem_object *obj;
drm: move the buddy allocator from i915 into common drm Move the base i915 buddy allocator code into drm - Move i915_buddy.h to include/drm - Move i915_buddy.c to drm root folder - Rename "i915" string with "drm" string wherever applicable - Rename "I915" string with "DRM" string wherever applicable - Fix header file dependencies - Fix alignment issues - add Makefile support for drm buddy - export functions and write kerneldoc description - Remove i915 selftest config check condition as buddy selftest will be moved to drm selftest folder cleanup i915 buddy references in i915 driver module and replace with drm buddy v2: - include header file in alphabetical order(Thomas) - merged changes listed in the body section into a single patch to keep the build intact(Christian, Jani) v3: - make drm buddy a separate module(Thomas, Christian) v4: - Fix build error reported by kernel test robot <lkp@intel.com> - removed i915 buddy selftest from i915_mock_selftests.h to avoid build error - removed selftests/i915_buddy.c file as we create a new set of buddy test cases in drm/selftests folder v5: - Fix merge conflict issue v6: - replace drm_buddy_mm structure name as drm_buddy(Thomas, Christian) - replace drm_buddy_alloc() function name as drm_buddy_alloc_blocks() (Thomas) - replace drm_buddy_free() function name as drm_buddy_free_block() (Thomas) - export drm_buddy_free_block() function - fix multiple instances of KMEM_CACHE() entry v7: - fix warnings reported by kernel test robot <lkp@intel.com> - modify the license(Christian) v8: - fix warnings reported by kernel test robot <lkp@intel.com> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220118104504.2349-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König <christian.koenig@amd.com>
2022-01-18 16:14:59 +05:30
struct drm_buddy_block *block;
struct drm_buddy *mm;
struct list_head *blocks;
struct scatterlist *sg;
LIST_HEAD(objects);
u64 size;
int err = 0;
/*
* While we may create very large contiguous blocks, we may need
* to break those down for consumption elsewhere. In particular,
* dma-mapping with scatterlist elements have an implicit limit of
* UINT_MAX on each element.
*/
size = SZ_8G;
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
if (IS_ERR(mem))
return PTR_ERR(mem);
obj = igt_object_create(mem, &objects, size, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto out_put;
}
res = to_ttm_buddy_resource(obj->mm.res);
blocks = &res->blocks;
mm = res->mm;
size = 0;
list_for_each_entry(block, blocks, link) {
drm: move the buddy allocator from i915 into common drm Move the base i915 buddy allocator code into drm - Move i915_buddy.h to include/drm - Move i915_buddy.c to drm root folder - Rename "i915" string with "drm" string wherever applicable - Rename "I915" string with "DRM" string wherever applicable - Fix header file dependencies - Fix alignment issues - add Makefile support for drm buddy - export functions and write kerneldoc description - Remove i915 selftest config check condition as buddy selftest will be moved to drm selftest folder cleanup i915 buddy references in i915 driver module and replace with drm buddy v2: - include header file in alphabetical order(Thomas) - merged changes listed in the body section into a single patch to keep the build intact(Christian, Jani) v3: - make drm buddy a separate module(Thomas, Christian) v4: - Fix build error reported by kernel test robot <lkp@intel.com> - removed i915 buddy selftest from i915_mock_selftests.h to avoid build error - removed selftests/i915_buddy.c file as we create a new set of buddy test cases in drm/selftests folder v5: - Fix merge conflict issue v6: - replace drm_buddy_mm structure name as drm_buddy(Thomas, Christian) - replace drm_buddy_alloc() function name as drm_buddy_alloc_blocks() (Thomas) - replace drm_buddy_free() function name as drm_buddy_free_block() (Thomas) - export drm_buddy_free_block() function - fix multiple instances of KMEM_CACHE() entry v7: - fix warnings reported by kernel test robot <lkp@intel.com> - modify the license(Christian) v8: - fix warnings reported by kernel test robot <lkp@intel.com> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220118104504.2349-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König <christian.koenig@amd.com>
2022-01-18 16:14:59 +05:30
if (drm_buddy_block_size(mm, block) > size)
size = drm_buddy_block_size(mm, block);
}
if (size < max_segment) {
pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
__func__, max_segment, size);
err = -EINVAL;
goto out_close;
}
for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
if (sg->length > max_segment) {
pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
__func__, sg->length, max_segment);
err = -EINVAL;
goto out_close;
}
}
out_close:
close_objects(mem, &objects);
out_put:
intel_memory_region_destroy(mem);
return err;
}
static int igt_gpu_write_dw(struct intel_context *ce,
struct i915_vma *vma,
u32 dword,
u32 value)
{
return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
vma->size >> PAGE_SHIFT, value);
}
static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
{
unsigned long n = obj->base.size >> PAGE_SHIFT;
u32 *ptr;
int err;
err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
if (err)
return err;
ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
ptr += dword;
while (n--) {
if (*ptr != val) {
pr_err("base[%u]=%08x, val=%08x\n",
dword, *ptr, val);
err = -EINVAL;
break;
}
ptr += PAGE_SIZE / sizeof(*ptr);
}
i915_gem_object_unpin_map(obj);
return err;
}
static int igt_gpu_write(struct i915_gem_context *ctx,
struct drm_i915_gem_object *obj)
{
struct i915_gem_engines *engines;
struct i915_gem_engines_iter it;
struct i915_address_space *vm;
struct intel_context *ce;
I915_RND_STATE(prng);
IGT_TIMEOUT(end_time);
unsigned int count;
struct i915_vma *vma;
int *order;
int i, n;
int err = 0;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
n = 0;
count = 0;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
count++;
if (!intel_engine_can_store_dword(ce->engine))
continue;
vm = ce->vm;
n++;
}
i915_gem_context_unlock_engines(ctx);
if (!n)
return 0;
order = i915_random_order(count * count, &prng);
if (!order)
return -ENOMEM;
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto out_free;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
goto out_free;
i = 0;
engines = i915_gem_context_lock_engines(ctx);
do {
u32 rng = prandom_u32_state(&prng);
u32 dword = offset_in_page(rng) / 4;
ce = engines->engines[order[i] % engines->num_engines];
i = (i + 1) % (count * count);
if (!ce || !intel_engine_can_store_dword(ce->engine))
continue;
err = igt_gpu_write_dw(ce, vma, dword, rng);
if (err)
break;
i915_gem_object_lock(obj, NULL);
err = igt_cpu_check(obj, dword, rng);
i915_gem_object_unlock(obj);
if (err)
break;
} while (!__igt_timeout(end_time, NULL));
i915_gem_context_unlock_engines(ctx);
out_free:
kfree(order);
if (err == -ENOMEM)
err = 0;
return err;
}
static int igt_lmem_create(void *arg)
{
struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
int err = 0;
obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
if (IS_ERR(obj))
return PTR_ERR(obj);
err = i915_gem_object_pin_pages_unlocked(obj);
if (err)
goto out_put;
i915_gem_object_unpin_pages(obj);
out_put:
i915_gem_object_put(obj);
return err;
}
static int igt_lmem_create_with_ps(void *arg)
{
struct drm_i915_private *i915 = arg;
int err = 0;
u32 ps;
for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) {
struct drm_i915_gem_object *obj;
dma_addr_t daddr;
obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
if (err == -ENXIO || err == -E2BIG) {
pr_info("%s not enough lmem for ps(%u) err=%d\n",
__func__, ps, err);
err = 0;
}
break;
}
if (obj->base.size != ps) {
pr_err("%s size(%zu) != ps(%u)\n",
__func__, obj->base.size, ps);
err = -EINVAL;
goto out_put;
}
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_pin_pages(obj);
if (err)
goto out_put;
daddr = i915_gem_object_get_dma_address(obj, 0);
if (!IS_ALIGNED(daddr, ps)) {
pr_err("%s daddr(%pa) not aligned with ps(%u)\n",
__func__, &daddr, ps);
err = -EINVAL;
goto out_unpin;
}
out_unpin:
i915_gem_object_unpin_pages(obj);
__i915_gem_object_put_pages(obj);
out_put:
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
if (err)
break;
}
return err;
}
static int igt_lmem_create_cleared_cpu(void *arg)
{
struct drm_i915_private *i915 = arg;
I915_RND_STATE(prng);
IGT_TIMEOUT(end_time);
u32 size, i;
int err;
i915_gem_drain_freed_objects(i915);
size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng));
size = round_up(size, PAGE_SIZE);
i = 0;
do {
struct drm_i915_gem_object *obj;
unsigned int flags;
u32 dword, val;
void *vaddr;
/*
* Alternate between cleared and uncleared allocations, while
* also dirtying the pages each time to check that the pages are
* always cleared if requested, since we should get some overlap
* of the underlying pages, if not all, since we are the only
* user.
*/
flags = I915_BO_ALLOC_CPU_CLEAR;
if (i & 1)
flags = 0;
obj = i915_gem_object_create_lmem(i915, size, flags);
if (IS_ERR(obj))
return PTR_ERR(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_pin_pages(obj);
if (err)
goto out_put;
dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32),
&prng);
if (flags & I915_BO_ALLOC_CPU_CLEAR) {
err = igt_cpu_check(obj, dword, 0);
if (err) {
pr_err("%s failed with size=%u, flags=%u\n",
__func__, size, flags);
goto out_unpin;
}
}
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto out_unpin;
}
val = prandom_u32_state(&prng);
memset32(vaddr, val, obj->base.size / sizeof(u32));
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
out_unpin:
i915_gem_object_unpin_pages(obj);
__i915_gem_object_put_pages(obj);
out_put:
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
if (err)
break;
++i;
} while (!__igt_timeout(end_time, NULL));
pr_info("%s completed (%u) iterations\n", __func__, i);
return err;
}
static int igt_lmem_write_gpu(void *arg)
{
struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
struct i915_gem_context *ctx;
struct file *file;
I915_RND_STATE(prng);
u32 sz;
int err;
file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = live_context(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out_file;
}
sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
obj = i915_gem_object_create_lmem(i915, sz, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto out_file;
}
err = i915_gem_object_pin_pages_unlocked(obj);
if (err)
goto out_put;
err = igt_gpu_write(ctx, obj);
if (err)
pr_err("igt_gpu_write failed(%d)\n", err);
i915_gem_object_unpin_pages(obj);
out_put:
i915_gem_object_put(obj);
out_file:
fput(file);
return err;
}
static struct intel_engine_cs *
random_engine_class(struct drm_i915_private *i915,
unsigned int class,
struct rnd_state *prng)
{
struct intel_engine_cs *engine;
unsigned int count;
count = 0;
for (engine = intel_engine_lookup_user(i915, class, 0);
engine && engine->uabi_class == class;
engine = rb_entry_safe(rb_next(&engine->uabi_node),
typeof(*engine), uabi_node))
count++;
count = i915_prandom_u32_max_state(count, prng);
return intel_engine_lookup_user(i915, class, count);
}
static int igt_lmem_write_cpu(void *arg)
{
struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
I915_RND_STATE(prng);
IGT_TIMEOUT(end_time);
u32 bytes[] = {
0, /* rng placeholder */
sizeof(u32),
sizeof(u64),
64, /* cl */
PAGE_SIZE,
PAGE_SIZE - sizeof(u32),
PAGE_SIZE - sizeof(u64),
PAGE_SIZE - 64,
};
struct intel_engine_cs *engine;
struct i915_request *rq;
u32 *vaddr;
u32 sz;
u32 i;
int *order;
int count;
int err;
engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng);
if (!engine)
return 0;
pr_info("%s: using %s\n", __func__, engine->name);
sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
sz = max_t(u32, 2 * PAGE_SIZE, sz);
obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS);
if (IS_ERR(obj))
return PTR_ERR(obj);
vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto out_put;
}
i915_gem_object_lock(obj, NULL);
/* Put the pages into a known state -- from the gpu for added fun */
intel_engine_pm_get(engine);
err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
obj->mm.pages->sgl, I915_CACHE_NONE,
true, 0xdeadbeaf, &rq);
if (rq) {
dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
i915_request_put(rq);
}
intel_engine_pm_put(engine);
if (!err)
err = i915_gem_object_set_to_wc_domain(obj, true);
i915_gem_object_unlock(obj);
if (err)
goto out_unpin;
count = ARRAY_SIZE(bytes);
order = i915_random_order(count * count, &prng);
if (!order) {
err = -ENOMEM;
goto out_unpin;
}
/* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
i = 0;
do {
u32 offset;
u32 align;
u32 dword;
u32 size;
u32 val;
size = bytes[order[i] % count];
i = (i + 1) % (count * count);
align = bytes[order[i] % count];
i = (i + 1) % (count * count);
align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align));
offset = igt_random_offset(&prng, 0, obj->base.size,
size, align);
val = prandom_u32_state(&prng);
memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf,
size / sizeof(u32));
/*
* Sample random dw -- don't waste precious time reading every
* single dw.
*/
dword = igt_random_offset(&prng, offset,
offset + size,
sizeof(u32), sizeof(u32));
dword /= sizeof(u32);
if (vaddr[dword] != (val ^ 0xdeadbeaf)) {
pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n",
__func__, dword, vaddr[dword], val ^ 0xdeadbeaf,
size, align, offset);
err = -EINVAL;
break;
}
} while (!__igt_timeout(end_time, NULL));
out_unpin:
i915_gem_object_unpin_map(obj);
out_put:
i915_gem_object_put(obj);
return err;
}
static const char *repr_type(u32 type)
{
switch (type) {
case I915_MAP_WB:
return "WB";
case I915_MAP_WC:
return "WC";
}
return "";
}
static struct drm_i915_gem_object *
create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type,
void **out_addr)
{
struct drm_i915_gem_object *obj;
void *addr;
obj = i915_gem_object_create_region(mr, size, 0, 0);
if (IS_ERR(obj)) {
if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */
return ERR_PTR(-ENODEV);
return obj;
}
addr = i915_gem_object_pin_map_unlocked(obj, type);
if (IS_ERR(addr)) {
i915_gem_object_put(obj);
if (PTR_ERR(addr) == -ENXIO)
return ERR_PTR(-ENODEV);
return addr;
}
*out_addr = addr;
return obj;
}
static int wrap_ktime_compare(const void *A, const void *B)
{
const ktime_t *a = A, *b = B;
return ktime_compare(*a, *b);
}
static void igt_memcpy_long(void *dst, const void *src, size_t size)
{
unsigned long *tmp = dst;
const unsigned long *s = src;
size = size / sizeof(unsigned long);
while (size--)
*tmp++ = *s++;
}
static inline void igt_memcpy(void *dst, const void *src, size_t size)
{
memcpy(dst, src, size);
}
static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
{
i915_memcpy_from_wc(dst, src, size);
}
static int _perf_memcpy(struct intel_memory_region *src_mr,
struct intel_memory_region *dst_mr,
u64 size, u32 src_type, u32 dst_type)
{
struct drm_i915_private *i915 = src_mr->i915;
const struct {
const char *name;
void (*copy)(void *dst, const void *src, size_t size);
bool skip;
} tests[] = {
{
"memcpy",
igt_memcpy,
},
{
"memcpy_long",
igt_memcpy_long,
},
{
"memcpy_from_wc",
igt_memcpy_from_wc,
!i915_has_memcpy_from_wc(),
},
};
struct drm_i915_gem_object *src, *dst;
void *src_addr, *dst_addr;
int ret = 0;
int i;
src = create_region_for_mapping(src_mr, size, src_type, &src_addr);
if (IS_ERR(src)) {
ret = PTR_ERR(src);
goto out;
}
dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr);
if (IS_ERR(dst)) {
ret = PTR_ERR(dst);
goto out_unpin_src;
}
for (i = 0; i < ARRAY_SIZE(tests); ++i) {
ktime_t t[5];
int pass;
if (tests[i].skip)
continue;
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
ktime_t t0, t1;
t0 = ktime_get();
tests[i].copy(dst_addr, src_addr, size);
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
}
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
if (t[0] <= 0) {
/* ignore the impossible to protect our sanity */
pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n",
__func__,
src_mr->name, repr_type(src_type),
dst_mr->name, repr_type(dst_type),
tests[i].name, size >> 10,
t[0], t[4]);
continue;
}
pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n",
__func__,
src_mr->name, repr_type(src_type),
dst_mr->name, repr_type(dst_type),
tests[i].name, size >> 10,
div64_u64(mul_u32_u32(4 * size,
1000 * 1000 * 1000),
t[1] + 2 * t[2] + t[3]) >> 20);
cond_resched();
}
i915_gem_object_unpin_map(dst);
i915_gem_object_put(dst);
out_unpin_src:
i915_gem_object_unpin_map(src);
i915_gem_object_put(src);
i915_gem_drain_freed_objects(i915);
out:
if (ret == -ENODEV)
ret = 0;
return ret;
}
static int perf_memcpy(void *arg)
{
struct drm_i915_private *i915 = arg;
static const u32 types[] = {
I915_MAP_WB,
I915_MAP_WC,
};
static const u32 sizes[] = {
SZ_4K,
SZ_64K,
SZ_4M,
};
struct intel_memory_region *src_mr, *dst_mr;
int src_id, dst_id;
int i, j, k;
int ret;
for_each_memory_region(src_mr, i915, src_id) {
for_each_memory_region(dst_mr, i915, dst_id) {
for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
for (j = 0; j < ARRAY_SIZE(types); ++j) {
for (k = 0; k < ARRAY_SIZE(types); ++k) {
ret = _perf_memcpy(src_mr,
dst_mr,
sizes[i],
types[j],
types[k]);
if (ret)
return ret;
}
}
}
}
}
return 0;
}
int intel_memory_region_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_mock_reserve),
SUBTEST(igt_mock_fill),
SUBTEST(igt_mock_contiguous),
SUBTEST(igt_mock_splintered_region),
SUBTEST(igt_mock_max_segment),
};
struct intel_memory_region *mem;
struct drm_i915_private *i915;
int err;
i915 = mock_gem_device();
if (!i915)
return -ENOMEM;
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
if (IS_ERR(mem)) {
pr_err("failed to create memory region\n");
err = PTR_ERR(mem);
goto out_unref;
}
err = i915_subtests(tests, mem);
intel_memory_region_destroy(mem);
out_unref:
mock_destroy_device(i915);
return err;
}
int intel_memory_region_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_lmem_create),
SUBTEST(igt_lmem_create_with_ps),
SUBTEST(igt_lmem_create_cleared_cpu),
SUBTEST(igt_lmem_write_cpu),
SUBTEST(igt_lmem_write_gpu),
};
if (!HAS_LMEM(i915)) {
pr_info("device lacks LMEM support, skipping\n");
return 0;
}
if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
}
int intel_memory_region_perf_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(perf_memcpy),
};
if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
}