linux/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
Chris Wilson 8e4ee5e87c drm/i915: Wrap all access to i915_vma.node.start|size
We already wrap i915_vma.node.start for use with the GGTT, as there we
can perform additional sanity checks that the node belongs to the GGTT
and fits within the 32b registers. In the next couple of patches, we
will introduce guard pages around the objects _inside_ the drm_mm_node
allocation. That is we will offset the vma->pages so that the first page
is at drm_mm_node.start + vma->guard (not 0 as is currently the case).
All users must then not use i915_vma.node.start directly, but compute
the guard offset, thus all users are converted to use a
i915_vma_offset() wrapper.

The notable exceptions are the selftests that are testing exact
behaviour of i915_vma_pin/i915_vma_insert.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221130235805.221010-3-andi.shyti@linux.intel.com
2022-12-06 10:52:42 +01:00

440 lines
8.3 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright © 2018 Intel Corporation
*/
#include <linux/sort.h>
#include "intel_gpu_commands.h"
#include "intel_gt_pm.h"
#include "intel_rps.h"
#include "i915_selftest.h"
#include "selftests/igt_flush_test.h"
#define COUNT 5
static int cmp_u32(const void *A, const void *B)
{
const u32 *a = A, *b = B;
return *a - *b;
}
static void perf_begin(struct intel_gt *gt)
{
intel_gt_pm_get(gt);
/* Boost gpufreq to max [waitboost] and keep it fixed */
atomic_inc(&gt->rps.num_waiters);
schedule_work(&gt->rps.work);
flush_work(&gt->rps.work);
}
static int perf_end(struct intel_gt *gt)
{
atomic_dec(&gt->rps.num_waiters);
intel_gt_pm_put(gt);
return igt_flush_test(gt->i915);
}
static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
return RING_TIMESTAMP_UDW(engine->mmio_base);
else
return RING_TIMESTAMP(engine->mmio_base);
}
static int write_timestamp(struct i915_request *rq, int slot)
{
struct intel_timeline *tl =
rcu_dereference_protected(rq->timeline,
!i915_request_signaled(rq));
u32 cmd;
u32 *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
if (GRAPHICS_VER(rq->engine->i915) >= 8)
cmd++;
*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
*cs++ = tl->hwsp_offset + slot * sizeof(u32);
*cs++ = 0;
intel_ring_advance(rq, cs);
return 0;
}
static struct i915_vma *create_empty_batch(struct intel_context *ce)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u32 *cs;
int err;
obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err_put;
}
cs[0] = MI_BATCH_BUFFER_END;
i915_gem_object_flush_map(obj);
vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_unpin;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
goto err_unpin;
i915_gem_object_unpin_map(obj);
return vma;
err_unpin:
i915_gem_object_unpin_map(obj);
err_put:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
static u32 trifilter(u32 *a)
{
u64 sum;
sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
sum = mul_u32_u32(a[2], 2);
sum += a[1];
sum += a[3];
return sum >> 2;
}
static int perf_mi_bb_start(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *batch;
u32 cycles[COUNT];
int i;
if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
continue;
intel_engine_pm_get(engine);
batch = create_empty_batch(ce);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
intel_engine_pm_put(engine);
break;
}
err = i915_vma_sync(batch);
if (err) {
intel_engine_pm_put(engine);
i915_vma_put(batch);
break;
}
for (i = 0; i < ARRAY_SIZE(cycles); i++) {
struct i915_request *rq;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
}
err = write_timestamp(rq, 2);
if (err)
goto out;
err = rq->engine->emit_bb_start(rq,
i915_vma_offset(batch), 8,
0);
if (err)
goto out;
err = write_timestamp(rq, 3);
if (err)
goto out;
out:
i915_request_get(rq);
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0)
err = -EIO;
i915_request_put(rq);
if (err)
break;
cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
}
i915_vma_put(batch);
intel_engine_pm_put(engine);
if (err)
break;
pr_info("%s: MI_BB_START cycles: %u\n",
engine->name, trifilter(cycles));
}
if (perf_end(gt))
err = -EIO;
return err;
}
static struct i915_vma *create_nop_batch(struct intel_context *ce)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u32 *cs;
int err;
obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
if (IS_ERR(obj))
return ERR_CAST(obj);
cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err_put;
}
memset(cs, 0, SZ_64K);
cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
i915_gem_object_flush_map(obj);
vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_unpin;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
goto err_unpin;
i915_gem_object_unpin_map(obj);
return vma;
err_unpin:
i915_gem_object_unpin_map(obj);
err_put:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
static int perf_mi_noop(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *base, *nop;
u32 cycles[COUNT];
int i;
if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
continue;
intel_engine_pm_get(engine);
base = create_empty_batch(ce);
if (IS_ERR(base)) {
err = PTR_ERR(base);
intel_engine_pm_put(engine);
break;
}
err = i915_vma_sync(base);
if (err) {
i915_vma_put(base);
intel_engine_pm_put(engine);
break;
}
nop = create_nop_batch(ce);
if (IS_ERR(nop)) {
err = PTR_ERR(nop);
i915_vma_put(base);
intel_engine_pm_put(engine);
break;
}
err = i915_vma_sync(nop);
if (err) {
i915_vma_put(nop);
i915_vma_put(base);
intel_engine_pm_put(engine);
break;
}
for (i = 0; i < ARRAY_SIZE(cycles); i++) {
struct i915_request *rq;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
}
err = write_timestamp(rq, 2);
if (err)
goto out;
err = rq->engine->emit_bb_start(rq,
i915_vma_offset(base), 8,
0);
if (err)
goto out;
err = write_timestamp(rq, 3);
if (err)
goto out;
err = rq->engine->emit_bb_start(rq,
i915_vma_offset(nop),
i915_vma_size(nop),
0);
if (err)
goto out;
err = write_timestamp(rq, 4);
if (err)
goto out;
out:
i915_request_get(rq);
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0)
err = -EIO;
i915_request_put(rq);
if (err)
break;
cycles[i] =
(rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
(rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
}
i915_vma_put(nop);
i915_vma_put(base);
intel_engine_pm_put(engine);
if (err)
break;
pr_info("%s: 16K MI_NOOP cycles: %u\n",
engine->name, trifilter(cycles));
}
if (perf_end(gt))
err = -EIO;
return err;
}
int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(perf_mi_bb_start),
SUBTEST(perf_mi_noop),
};
if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return intel_gt_live_subtests(tests, to_gt(i915));
}
static int intel_mmio_bases_check(void *arg)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
const struct engine_info *info = &intel_engines[i];
u8 prev = U8_MAX;
for (j = 0; j < MAX_MMIO_BASES; j++) {
u8 ver = info->mmio_bases[j].graphics_ver;
u32 base = info->mmio_bases[j].base;
if (ver >= prev) {
pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n",
__func__,
intel_engine_class_repr(info->class),
info->class, info->instance,
prev, ver);
return -EINVAL;
}
if (ver == 0)
break;
if (!base) {
pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n",
__func__,
intel_engine_class_repr(info->class),
info->class, info->instance,
base, ver, j);
return -EINVAL;
}
prev = ver;
}
pr_debug("%s: min graphics version supported for %s%d is %u\n",
__func__,
intel_engine_class_repr(info->class),
info->instance,
prev);
}
return 0;
}
int intel_engine_cs_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
SUBTEST(intel_mmio_bases_check),
};
return i915_subtests(tests, NULL);
}