Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controlled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson <chris.p.wilson@linux.intel.com> Signed-off-by: Fei Yang <fei.yang@intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
1035 lines
22 KiB
C
1035 lines
22 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2020 Intel Corporation
|
|
*/
|
|
|
|
#include <linux/sort.h>
|
|
|
|
#include "gem/i915_gem_internal.h"
|
|
#include "gem/i915_gem_lmem.h"
|
|
|
|
#include "selftests/igt_spinner.h"
|
|
#include "selftests/i915_random.h"
|
|
|
|
static const unsigned int sizes[] = {
|
|
SZ_4K,
|
|
SZ_64K,
|
|
SZ_2M,
|
|
CHUNK_SZ - SZ_4K,
|
|
CHUNK_SZ,
|
|
CHUNK_SZ + SZ_4K,
|
|
SZ_64M,
|
|
};
|
|
|
|
static struct drm_i915_gem_object *
|
|
create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
|
|
{
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
obj = i915_gem_object_create_lmem(i915, size, 0);
|
|
if (!IS_ERR(obj))
|
|
return obj;
|
|
|
|
return i915_gem_object_create_internal(i915, size);
|
|
}
|
|
|
|
static int copy(struct intel_migrate *migrate,
|
|
int (*fn)(struct intel_migrate *migrate,
|
|
struct i915_gem_ww_ctx *ww,
|
|
struct drm_i915_gem_object *src,
|
|
struct drm_i915_gem_object *dst,
|
|
struct i915_request **out),
|
|
u32 sz, struct rnd_state *prng)
|
|
{
|
|
struct drm_i915_private *i915 = migrate->context->engine->i915;
|
|
struct drm_i915_gem_object *src, *dst;
|
|
struct i915_request *rq;
|
|
struct i915_gem_ww_ctx ww;
|
|
u32 *vaddr;
|
|
int err = 0;
|
|
int i;
|
|
|
|
src = create_lmem_or_internal(i915, sz);
|
|
if (IS_ERR(src))
|
|
return 0;
|
|
|
|
sz = src->base.size;
|
|
dst = i915_gem_object_create_internal(i915, sz);
|
|
if (IS_ERR(dst))
|
|
goto err_free_src;
|
|
|
|
for_i915_gem_ww(&ww, err, true) {
|
|
err = i915_gem_object_lock(src, &ww);
|
|
if (err)
|
|
continue;
|
|
|
|
err = i915_gem_object_lock(dst, &ww);
|
|
if (err)
|
|
continue;
|
|
|
|
vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
|
|
if (IS_ERR(vaddr)) {
|
|
err = PTR_ERR(vaddr);
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < sz / sizeof(u32); i++)
|
|
vaddr[i] = i;
|
|
i915_gem_object_flush_map(src);
|
|
|
|
vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
|
|
if (IS_ERR(vaddr)) {
|
|
err = PTR_ERR(vaddr);
|
|
goto unpin_src;
|
|
}
|
|
|
|
for (i = 0; i < sz / sizeof(u32); i++)
|
|
vaddr[i] = ~i;
|
|
i915_gem_object_flush_map(dst);
|
|
|
|
err = fn(migrate, &ww, src, dst, &rq);
|
|
if (!err)
|
|
continue;
|
|
|
|
if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
|
|
pr_err("%ps failed, size: %u\n", fn, sz);
|
|
if (rq) {
|
|
i915_request_wait(rq, 0, HZ);
|
|
i915_request_put(rq);
|
|
}
|
|
i915_gem_object_unpin_map(dst);
|
|
unpin_src:
|
|
i915_gem_object_unpin_map(src);
|
|
}
|
|
if (err)
|
|
goto err_out;
|
|
|
|
if (rq) {
|
|
if (i915_request_wait(rq, 0, HZ) < 0) {
|
|
pr_err("%ps timed out, size: %u\n", fn, sz);
|
|
err = -ETIME;
|
|
}
|
|
i915_request_put(rq);
|
|
}
|
|
|
|
for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
|
|
int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
|
|
|
|
if (vaddr[x] != x) {
|
|
pr_err("%ps failed, size: %u, offset: %zu\n",
|
|
fn, sz, x * sizeof(u32));
|
|
igt_hexdump(vaddr + i * 1024, 4096);
|
|
err = -EINVAL;
|
|
}
|
|
}
|
|
|
|
i915_gem_object_unpin_map(dst);
|
|
i915_gem_object_unpin_map(src);
|
|
|
|
err_out:
|
|
i915_gem_object_put(dst);
|
|
err_free_src:
|
|
i915_gem_object_put(src);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int intel_context_copy_ccs(struct intel_context *ce,
|
|
const struct i915_deps *deps,
|
|
struct scatterlist *sg,
|
|
unsigned int pat_index,
|
|
bool write_to_ccs,
|
|
struct i915_request **out)
|
|
{
|
|
u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
|
|
u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
|
|
struct sgt_dma it = sg_sgt(sg);
|
|
struct i915_request *rq;
|
|
u32 offset;
|
|
int err;
|
|
|
|
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
|
|
*out = NULL;
|
|
|
|
GEM_BUG_ON(ce->ring->size < SZ_64K);
|
|
|
|
offset = 0;
|
|
if (HAS_64K_PAGES(ce->engine->i915))
|
|
offset = CHUNK_SZ;
|
|
|
|
do {
|
|
int len;
|
|
|
|
rq = i915_request_create(ce);
|
|
if (IS_ERR(rq)) {
|
|
err = PTR_ERR(rq);
|
|
goto out_ce;
|
|
}
|
|
|
|
if (deps) {
|
|
err = i915_request_await_deps(rq, deps);
|
|
if (err)
|
|
goto out_rq;
|
|
|
|
if (rq->engine->emit_init_breadcrumb) {
|
|
err = rq->engine->emit_init_breadcrumb(rq);
|
|
if (err)
|
|
goto out_rq;
|
|
}
|
|
|
|
deps = NULL;
|
|
}
|
|
|
|
/* The PTE updates + clear must not be interrupted. */
|
|
err = emit_no_arbitration(rq);
|
|
if (err)
|
|
goto out_rq;
|
|
|
|
len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
|
|
if (len <= 0) {
|
|
err = len;
|
|
goto out_rq;
|
|
}
|
|
|
|
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
|
|
if (err)
|
|
goto out_rq;
|
|
|
|
err = emit_copy_ccs(rq, offset, dst_access,
|
|
offset, src_access, len);
|
|
if (err)
|
|
goto out_rq;
|
|
|
|
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
|
|
|
|
/* Arbitration is re-enabled between requests. */
|
|
out_rq:
|
|
if (*out)
|
|
i915_request_put(*out);
|
|
*out = i915_request_get(rq);
|
|
i915_request_add(rq);
|
|
if (err || !it.sg || !sg_dma_len(it.sg))
|
|
break;
|
|
|
|
cond_resched();
|
|
} while (1);
|
|
|
|
out_ce:
|
|
return err;
|
|
}
|
|
|
|
static int
|
|
intel_migrate_ccs_copy(struct intel_migrate *m,
|
|
struct i915_gem_ww_ctx *ww,
|
|
const struct i915_deps *deps,
|
|
struct scatterlist *sg,
|
|
unsigned int pat_index,
|
|
bool write_to_ccs,
|
|
struct i915_request **out)
|
|
{
|
|
struct intel_context *ce;
|
|
int err;
|
|
|
|
*out = NULL;
|
|
if (!m->context)
|
|
return -ENODEV;
|
|
|
|
ce = intel_migrate_create_context(m);
|
|
if (IS_ERR(ce))
|
|
ce = intel_context_get(m->context);
|
|
GEM_BUG_ON(IS_ERR(ce));
|
|
|
|
err = intel_context_pin_ww(ce, ww);
|
|
if (err)
|
|
goto out;
|
|
|
|
err = intel_context_copy_ccs(ce, deps, sg, pat_index,
|
|
write_to_ccs, out);
|
|
|
|
intel_context_unpin(ce);
|
|
out:
|
|
intel_context_put(ce);
|
|
return err;
|
|
}
|
|
|
|
static int clear(struct intel_migrate *migrate,
|
|
int (*fn)(struct intel_migrate *migrate,
|
|
struct i915_gem_ww_ctx *ww,
|
|
struct drm_i915_gem_object *obj,
|
|
u32 value,
|
|
struct i915_request **out),
|
|
u32 sz, struct rnd_state *prng)
|
|
{
|
|
struct drm_i915_private *i915 = migrate->context->engine->i915;
|
|
struct drm_i915_gem_object *obj;
|
|
struct i915_request *rq;
|
|
struct i915_gem_ww_ctx ww;
|
|
u32 *vaddr, val = 0;
|
|
bool ccs_cap = false;
|
|
int err = 0;
|
|
int i;
|
|
|
|
obj = create_lmem_or_internal(i915, sz);
|
|
if (IS_ERR(obj))
|
|
return 0;
|
|
|
|
/* Consider the rounded up memory too */
|
|
sz = obj->base.size;
|
|
|
|
if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
|
|
ccs_cap = true;
|
|
|
|
for_i915_gem_ww(&ww, err, true) {
|
|
int ccs_bytes, ccs_bytes_per_chunk;
|
|
|
|
err = i915_gem_object_lock(obj, &ww);
|
|
if (err)
|
|
continue;
|
|
|
|
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
|
|
if (IS_ERR(vaddr)) {
|
|
err = PTR_ERR(vaddr);
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < sz / sizeof(u32); i++)
|
|
vaddr[i] = ~i;
|
|
i915_gem_object_flush_map(obj);
|
|
|
|
if (ccs_cap && !val) {
|
|
/* Write the obj data into ccs surface */
|
|
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
|
|
obj->mm.pages->sgl,
|
|
obj->pat_index,
|
|
true, &rq);
|
|
if (rq && !err) {
|
|
if (i915_request_wait(rq, 0, HZ) < 0) {
|
|
pr_err("%ps timed out, size: %u\n",
|
|
fn, sz);
|
|
err = -ETIME;
|
|
}
|
|
i915_request_put(rq);
|
|
rq = NULL;
|
|
}
|
|
if (err)
|
|
continue;
|
|
}
|
|
|
|
err = fn(migrate, &ww, obj, val, &rq);
|
|
if (rq && !err) {
|
|
if (i915_request_wait(rq, 0, HZ) < 0) {
|
|
pr_err("%ps timed out, size: %u\n", fn, sz);
|
|
err = -ETIME;
|
|
}
|
|
i915_request_put(rq);
|
|
rq = NULL;
|
|
}
|
|
if (err)
|
|
continue;
|
|
|
|
i915_gem_object_flush_map(obj);
|
|
|
|
/* Verify the set/clear of the obj mem */
|
|
for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
|
|
int x = i * 1024 +
|
|
i915_prandom_u32_max_state(1024, prng);
|
|
|
|
if (vaddr[x] != val) {
|
|
pr_err("%ps failed, (%u != %u), offset: %zu\n",
|
|
fn, vaddr[x], val, x * sizeof(u32));
|
|
igt_hexdump(vaddr + i * 1024, 4096);
|
|
err = -EINVAL;
|
|
}
|
|
}
|
|
if (err)
|
|
continue;
|
|
|
|
if (ccs_cap && !val) {
|
|
for (i = 0; i < sz / sizeof(u32); i++)
|
|
vaddr[i] = ~i;
|
|
i915_gem_object_flush_map(obj);
|
|
|
|
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
|
|
obj->mm.pages->sgl,
|
|
obj->pat_index,
|
|
false, &rq);
|
|
if (rq && !err) {
|
|
if (i915_request_wait(rq, 0, HZ) < 0) {
|
|
pr_err("%ps timed out, size: %u\n",
|
|
fn, sz);
|
|
err = -ETIME;
|
|
}
|
|
i915_request_put(rq);
|
|
rq = NULL;
|
|
}
|
|
if (err)
|
|
continue;
|
|
|
|
ccs_bytes = GET_CCS_BYTES(i915, sz);
|
|
ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
|
|
i915_gem_object_flush_map(obj);
|
|
|
|
for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
|
|
int offset = ((i * PAGE_SIZE) /
|
|
ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
|
|
int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
|
|
int x = i915_prandom_u32_max_state(min_t(int, 1024,
|
|
ccs_bytes_left), prng);
|
|
|
|
if (vaddr[offset + x]) {
|
|
pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
|
|
fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
|
|
igt_hexdump(vaddr + offset,
|
|
min_t(int, 4096,
|
|
ccs_bytes_left * sizeof(u32)));
|
|
err = -EINVAL;
|
|
}
|
|
}
|
|
|
|
if (err)
|
|
continue;
|
|
}
|
|
i915_gem_object_unpin_map(obj);
|
|
}
|
|
|
|
if (err) {
|
|
if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
|
|
pr_err("%ps failed, size: %u\n", fn, sz);
|
|
if (rq && err != -EINVAL) {
|
|
i915_request_wait(rq, 0, HZ);
|
|
i915_request_put(rq);
|
|
}
|
|
|
|
i915_gem_object_unpin_map(obj);
|
|
}
|
|
|
|
i915_gem_object_put(obj);
|
|
return err;
|
|
}
|
|
|
|
static int __migrate_copy(struct intel_migrate *migrate,
|
|
struct i915_gem_ww_ctx *ww,
|
|
struct drm_i915_gem_object *src,
|
|
struct drm_i915_gem_object *dst,
|
|
struct i915_request **out)
|
|
{
|
|
return intel_migrate_copy(migrate, ww, NULL,
|
|
src->mm.pages->sgl, src->pat_index,
|
|
i915_gem_object_is_lmem(src),
|
|
dst->mm.pages->sgl, dst->pat_index,
|
|
i915_gem_object_is_lmem(dst),
|
|
out);
|
|
}
|
|
|
|
static int __global_copy(struct intel_migrate *migrate,
|
|
struct i915_gem_ww_ctx *ww,
|
|
struct drm_i915_gem_object *src,
|
|
struct drm_i915_gem_object *dst,
|
|
struct i915_request **out)
|
|
{
|
|
return intel_context_migrate_copy(migrate->context, NULL,
|
|
src->mm.pages->sgl, src->pat_index,
|
|
i915_gem_object_is_lmem(src),
|
|
dst->mm.pages->sgl, dst->pat_index,
|
|
i915_gem_object_is_lmem(dst),
|
|
out);
|
|
}
|
|
|
|
static int
|
|
migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
|
|
{
|
|
return copy(migrate, __migrate_copy, sz, prng);
|
|
}
|
|
|
|
static int
|
|
global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
|
|
{
|
|
return copy(migrate, __global_copy, sz, prng);
|
|
}
|
|
|
|
static int __migrate_clear(struct intel_migrate *migrate,
|
|
struct i915_gem_ww_ctx *ww,
|
|
struct drm_i915_gem_object *obj,
|
|
u32 value,
|
|
struct i915_request **out)
|
|
{
|
|
return intel_migrate_clear(migrate, ww, NULL,
|
|
obj->mm.pages->sgl,
|
|
obj->pat_index,
|
|
i915_gem_object_is_lmem(obj),
|
|
value, out);
|
|
}
|
|
|
|
static int __global_clear(struct intel_migrate *migrate,
|
|
struct i915_gem_ww_ctx *ww,
|
|
struct drm_i915_gem_object *obj,
|
|
u32 value,
|
|
struct i915_request **out)
|
|
{
|
|
return intel_context_migrate_clear(migrate->context, NULL,
|
|
obj->mm.pages->sgl,
|
|
obj->pat_index,
|
|
i915_gem_object_is_lmem(obj),
|
|
value, out);
|
|
}
|
|
|
|
static int
|
|
migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
|
|
{
|
|
return clear(migrate, __migrate_clear, sz, prng);
|
|
}
|
|
|
|
static int
|
|
global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
|
|
{
|
|
return clear(migrate, __global_clear, sz, prng);
|
|
}
|
|
|
|
static int live_migrate_copy(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
struct drm_i915_private *i915 = migrate->context->engine->i915;
|
|
I915_RND_STATE(prng);
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
|
|
int err;
|
|
|
|
err = migrate_copy(migrate, sizes[i], &prng);
|
|
if (err == 0)
|
|
err = global_copy(migrate, sizes[i], &prng);
|
|
i915_gem_drain_freed_objects(i915);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int live_migrate_clear(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
struct drm_i915_private *i915 = migrate->context->engine->i915;
|
|
I915_RND_STATE(prng);
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
|
|
int err;
|
|
|
|
err = migrate_clear(migrate, sizes[i], &prng);
|
|
if (err == 0)
|
|
err = global_clear(migrate, sizes[i], &prng);
|
|
|
|
i915_gem_drain_freed_objects(i915);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct spinner_timer {
|
|
struct timer_list timer;
|
|
struct igt_spinner spin;
|
|
};
|
|
|
|
static void spinner_kill(struct timer_list *timer)
|
|
{
|
|
struct spinner_timer *st = from_timer(st, timer, timer);
|
|
|
|
igt_spinner_end(&st->spin);
|
|
pr_info("%s\n", __func__);
|
|
}
|
|
|
|
static int live_emit_pte_full_ring(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
struct drm_i915_private *i915 = migrate->context->engine->i915;
|
|
struct drm_i915_gem_object *obj;
|
|
struct intel_context *ce;
|
|
struct i915_request *rq, *prev;
|
|
struct spinner_timer st;
|
|
struct sgt_dma it;
|
|
int len, sz, err;
|
|
u32 *cs;
|
|
|
|
/*
|
|
* Simple regression test to check that we don't trample the
|
|
* rq->reserved_space when returning from emit_pte(), if the ring is
|
|
* nearly full.
|
|
*/
|
|
|
|
if (igt_spinner_init(&st.spin, to_gt(i915)))
|
|
return -ENOMEM;
|
|
|
|
obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
|
|
if (IS_ERR(obj)) {
|
|
err = PTR_ERR(obj);
|
|
goto out_spinner;
|
|
}
|
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
|
if (err)
|
|
goto out_obj;
|
|
|
|
ce = intel_migrate_create_context(migrate);
|
|
if (IS_ERR(ce)) {
|
|
err = PTR_ERR(ce);
|
|
goto out_obj;
|
|
}
|
|
|
|
ce->ring_size = SZ_4K; /* Not too big */
|
|
|
|
err = intel_context_pin(ce);
|
|
if (err)
|
|
goto out_put;
|
|
|
|
rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
|
|
if (IS_ERR(rq)) {
|
|
err = PTR_ERR(rq);
|
|
goto out_unpin;
|
|
}
|
|
|
|
i915_request_add(rq);
|
|
if (!igt_wait_for_spinner(&st.spin, rq)) {
|
|
err = -EIO;
|
|
goto out_unpin;
|
|
}
|
|
|
|
/*
|
|
* Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
|
|
* ring->reserved_space at the end. To actually emit the PTEs we require
|
|
* slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
|
|
* greater than PAGE_SIZE. The correct behaviour is to wait for more
|
|
* ring space in emit_pte(), otherwise we trample on the reserved_space
|
|
* resulting in crashes when later submitting the rq.
|
|
*/
|
|
|
|
prev = NULL;
|
|
do {
|
|
if (prev)
|
|
i915_request_add(rq);
|
|
|
|
rq = i915_request_create(ce);
|
|
if (IS_ERR(rq)) {
|
|
err = PTR_ERR(rq);
|
|
goto out_unpin;
|
|
}
|
|
|
|
sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
|
|
I915_EMIT_PTE_NUM_DWORDS;
|
|
sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
|
|
I915_EMIT_PTE_NUM_DWORDS);
|
|
cs = intel_ring_begin(rq, sz);
|
|
if (IS_ERR(cs)) {
|
|
err = PTR_ERR(cs);
|
|
goto out_rq;
|
|
}
|
|
|
|
memset32(cs, MI_NOOP, sz);
|
|
cs += sz;
|
|
intel_ring_advance(rq, cs);
|
|
|
|
pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
|
|
|
|
prev = rq;
|
|
} while (rq->ring->space > (rq->reserved_space +
|
|
I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
|
|
|
|
timer_setup_on_stack(&st.timer, spinner_kill, 0);
|
|
mod_timer(&st.timer, jiffies + 2 * HZ);
|
|
|
|
/*
|
|
* This should wait for the spinner to be killed, otherwise we should go
|
|
* down in flames when doing i915_request_add().
|
|
*/
|
|
pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
|
|
it = sg_sgt(obj->mm.pages->sgl);
|
|
len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
|
|
if (!len) {
|
|
err = -EINVAL;
|
|
goto out_rq;
|
|
}
|
|
if (len < 0) {
|
|
err = len;
|
|
goto out_rq;
|
|
}
|
|
|
|
out_rq:
|
|
i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
|
|
del_timer_sync(&st.timer);
|
|
destroy_timer_on_stack(&st.timer);
|
|
out_unpin:
|
|
intel_context_unpin(ce);
|
|
out_put:
|
|
intel_context_put(ce);
|
|
out_obj:
|
|
i915_gem_object_put(obj);
|
|
out_spinner:
|
|
igt_spinner_fini(&st.spin);
|
|
return err;
|
|
}
|
|
|
|
struct threaded_migrate {
|
|
struct intel_migrate *migrate;
|
|
struct task_struct *tsk;
|
|
struct rnd_state prng;
|
|
};
|
|
|
|
static int threaded_migrate(struct intel_migrate *migrate,
|
|
int (*fn)(void *arg),
|
|
unsigned int flags)
|
|
{
|
|
const unsigned int n_cpus = num_online_cpus() + 1;
|
|
struct threaded_migrate *thread;
|
|
I915_RND_STATE(prng);
|
|
unsigned int i;
|
|
int err = 0;
|
|
|
|
thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
|
|
if (!thread)
|
|
return 0;
|
|
|
|
for (i = 0; i < n_cpus; ++i) {
|
|
struct task_struct *tsk;
|
|
|
|
thread[i].migrate = migrate;
|
|
thread[i].prng =
|
|
I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
|
|
|
|
tsk = kthread_run(fn, &thread[i], "igt-%d", i);
|
|
if (IS_ERR(tsk)) {
|
|
err = PTR_ERR(tsk);
|
|
break;
|
|
}
|
|
|
|
get_task_struct(tsk);
|
|
thread[i].tsk = tsk;
|
|
}
|
|
|
|
msleep(10); /* start all threads before we kthread_stop() */
|
|
|
|
for (i = 0; i < n_cpus; ++i) {
|
|
struct task_struct *tsk = thread[i].tsk;
|
|
int status;
|
|
|
|
if (IS_ERR_OR_NULL(tsk))
|
|
continue;
|
|
|
|
status = kthread_stop(tsk);
|
|
if (status && !err)
|
|
err = status;
|
|
|
|
put_task_struct(tsk);
|
|
}
|
|
|
|
kfree(thread);
|
|
return err;
|
|
}
|
|
|
|
static int __thread_migrate_copy(void *arg)
|
|
{
|
|
struct threaded_migrate *tm = arg;
|
|
|
|
return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
|
|
}
|
|
|
|
static int thread_migrate_copy(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
|
|
return threaded_migrate(migrate, __thread_migrate_copy, 0);
|
|
}
|
|
|
|
static int __thread_global_copy(void *arg)
|
|
{
|
|
struct threaded_migrate *tm = arg;
|
|
|
|
return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
|
|
}
|
|
|
|
static int thread_global_copy(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
|
|
return threaded_migrate(migrate, __thread_global_copy, 0);
|
|
}
|
|
|
|
static int __thread_migrate_clear(void *arg)
|
|
{
|
|
struct threaded_migrate *tm = arg;
|
|
|
|
return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
|
|
}
|
|
|
|
static int __thread_global_clear(void *arg)
|
|
{
|
|
struct threaded_migrate *tm = arg;
|
|
|
|
return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
|
|
}
|
|
|
|
static int thread_migrate_clear(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
|
|
return threaded_migrate(migrate, __thread_migrate_clear, 0);
|
|
}
|
|
|
|
static int thread_global_clear(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
struct intel_migrate *migrate = >->migrate;
|
|
|
|
return threaded_migrate(migrate, __thread_global_clear, 0);
|
|
}
|
|
|
|
int intel_migrate_live_selftests(struct drm_i915_private *i915)
|
|
{
|
|
static const struct i915_subtest tests[] = {
|
|
SUBTEST(live_migrate_copy),
|
|
SUBTEST(live_migrate_clear),
|
|
SUBTEST(live_emit_pte_full_ring),
|
|
SUBTEST(thread_migrate_copy),
|
|
SUBTEST(thread_migrate_clear),
|
|
SUBTEST(thread_global_copy),
|
|
SUBTEST(thread_global_clear),
|
|
};
|
|
struct intel_gt *gt = to_gt(i915);
|
|
|
|
if (!gt->migrate.context)
|
|
return 0;
|
|
|
|
return intel_gt_live_subtests(tests, gt);
|
|
}
|
|
|
|
static struct drm_i915_gem_object *
|
|
create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
|
|
{
|
|
struct drm_i915_gem_object *obj = NULL;
|
|
int err;
|
|
|
|
if (try_lmem)
|
|
obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
|
|
|
|
if (IS_ERR_OR_NULL(obj)) {
|
|
obj = i915_gem_object_create_internal(gt->i915, sz);
|
|
if (IS_ERR(obj))
|
|
return obj;
|
|
}
|
|
|
|
i915_gem_object_trylock(obj, NULL);
|
|
err = i915_gem_object_pin_pages(obj);
|
|
if (err) {
|
|
i915_gem_object_unlock(obj);
|
|
i915_gem_object_put(obj);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
return obj;
|
|
}
|
|
|
|
static int wrap_ktime_compare(const void *A, const void *B)
|
|
{
|
|
const ktime_t *a = A, *b = B;
|
|
|
|
return ktime_compare(*a, *b);
|
|
}
|
|
|
|
static int __perf_clear_blt(struct intel_context *ce,
|
|
struct scatterlist *sg,
|
|
unsigned int pat_index,
|
|
bool is_lmem,
|
|
size_t sz)
|
|
{
|
|
ktime_t t[5];
|
|
int pass;
|
|
int err = 0;
|
|
|
|
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
|
|
struct i915_request *rq;
|
|
ktime_t t0, t1;
|
|
|
|
t0 = ktime_get();
|
|
|
|
err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
|
|
is_lmem, 0, &rq);
|
|
if (rq) {
|
|
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
|
|
err = -EIO;
|
|
i915_request_put(rq);
|
|
}
|
|
if (err)
|
|
break;
|
|
|
|
t1 = ktime_get();
|
|
t[pass] = ktime_sub(t1, t0);
|
|
}
|
|
if (err)
|
|
return err;
|
|
|
|
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
|
|
pr_info("%s: %zd KiB fill: %lld MiB/s\n",
|
|
ce->engine->name, sz >> 10,
|
|
div64_u64(mul_u32_u32(4 * sz,
|
|
1000 * 1000 * 1000),
|
|
t[1] + 2 * t[2] + t[3]) >> 20);
|
|
return 0;
|
|
}
|
|
|
|
static int perf_clear_blt(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
static const unsigned long sizes[] = {
|
|
SZ_4K,
|
|
SZ_64K,
|
|
SZ_2M,
|
|
SZ_64M
|
|
};
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
|
|
struct drm_i915_gem_object *dst;
|
|
int err;
|
|
|
|
dst = create_init_lmem_internal(gt, sizes[i], true);
|
|
if (IS_ERR(dst))
|
|
return PTR_ERR(dst);
|
|
|
|
err = __perf_clear_blt(gt->migrate.context,
|
|
dst->mm.pages->sgl,
|
|
i915_gem_get_pat_index(gt->i915,
|
|
I915_CACHE_NONE),
|
|
i915_gem_object_is_lmem(dst),
|
|
sizes[i]);
|
|
|
|
i915_gem_object_unlock(dst);
|
|
i915_gem_object_put(dst);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __perf_copy_blt(struct intel_context *ce,
|
|
struct scatterlist *src,
|
|
unsigned int src_pat_index,
|
|
bool src_is_lmem,
|
|
struct scatterlist *dst,
|
|
unsigned int dst_pat_index,
|
|
bool dst_is_lmem,
|
|
size_t sz)
|
|
{
|
|
ktime_t t[5];
|
|
int pass;
|
|
int err = 0;
|
|
|
|
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
|
|
struct i915_request *rq;
|
|
ktime_t t0, t1;
|
|
|
|
t0 = ktime_get();
|
|
|
|
err = intel_context_migrate_copy(ce, NULL,
|
|
src, src_pat_index,
|
|
src_is_lmem,
|
|
dst, dst_pat_index,
|
|
dst_is_lmem,
|
|
&rq);
|
|
if (rq) {
|
|
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
|
|
err = -EIO;
|
|
i915_request_put(rq);
|
|
}
|
|
if (err)
|
|
break;
|
|
|
|
t1 = ktime_get();
|
|
t[pass] = ktime_sub(t1, t0);
|
|
}
|
|
if (err)
|
|
return err;
|
|
|
|
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
|
|
pr_info("%s: %zd KiB copy: %lld MiB/s\n",
|
|
ce->engine->name, sz >> 10,
|
|
div64_u64(mul_u32_u32(4 * sz,
|
|
1000 * 1000 * 1000),
|
|
t[1] + 2 * t[2] + t[3]) >> 20);
|
|
return 0;
|
|
}
|
|
|
|
static int perf_copy_blt(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
static const unsigned long sizes[] = {
|
|
SZ_4K,
|
|
SZ_64K,
|
|
SZ_2M,
|
|
SZ_64M
|
|
};
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
|
|
struct drm_i915_gem_object *src, *dst;
|
|
size_t sz;
|
|
int err;
|
|
|
|
src = create_init_lmem_internal(gt, sizes[i], true);
|
|
if (IS_ERR(src))
|
|
return PTR_ERR(src);
|
|
|
|
sz = src->base.size;
|
|
dst = create_init_lmem_internal(gt, sz, false);
|
|
if (IS_ERR(dst)) {
|
|
err = PTR_ERR(dst);
|
|
goto err_src;
|
|
}
|
|
|
|
err = __perf_copy_blt(gt->migrate.context,
|
|
src->mm.pages->sgl,
|
|
i915_gem_get_pat_index(gt->i915,
|
|
I915_CACHE_NONE),
|
|
i915_gem_object_is_lmem(src),
|
|
dst->mm.pages->sgl,
|
|
i915_gem_get_pat_index(gt->i915,
|
|
I915_CACHE_NONE),
|
|
i915_gem_object_is_lmem(dst),
|
|
sz);
|
|
|
|
i915_gem_object_unlock(dst);
|
|
i915_gem_object_put(dst);
|
|
err_src:
|
|
i915_gem_object_unlock(src);
|
|
i915_gem_object_put(src);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int intel_migrate_perf_selftests(struct drm_i915_private *i915)
|
|
{
|
|
static const struct i915_subtest tests[] = {
|
|
SUBTEST(perf_clear_blt),
|
|
SUBTEST(perf_copy_blt),
|
|
};
|
|
struct intel_gt *gt = to_gt(i915);
|
|
|
|
if (intel_gt_is_wedged(gt))
|
|
return 0;
|
|
|
|
if (!gt->migrate.context)
|
|
return 0;
|
|
|
|
return intel_gt_live_subtests(tests, gt);
|
|
}
|