linux/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
Matthew Brost d2420c2ed8 drm/i915/selftests: Add initial GuC selftest for scrubbing lost G2H
While debugging an issue with full GT resets I went down a rabbit hole
thinking the scrubbing of lost G2H wasn't working correctly. This proved
to be incorrect as this was working just fine but this chase inspired me
to write a selftest to prove that this works. This simple selftest
injects errors dropping various G2H and then issues a full GT reset
proving that the scrubbing of these G2H doesn't blow up.

v2:
 (Daniel Vetter)
  - Use ifdef instead of macros for selftests
v3:
 (Checkpatch)
  - A space after 'switch' statement
v4:
 (Daniele)
  - A comment saying GT won't idle if G2H are lost

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-12-matthew.brost@intel.com
2021-09-13 11:30:38 -07:00

101 lines
2.5 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
//#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "i915_drv.h"
#include "i915_selftest.h"
#include "selftests/intel_scheduler_helpers.h"
#define REDUCED_TIMESLICE 5
#define REDUCED_PREEMPT 10
#define WAIT_FOR_RESET_TIME 10000
struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, gt, id)
return engine;
pr_err("No valid engine found!\n");
return NULL;
}
int intel_selftest_modify_policy(struct intel_engine_cs *engine,
struct intel_selftest_saved_policy *saved,
u32 modify_type)
{
int err;
saved->reset = engine->i915->params.reset;
saved->flags = engine->flags;
saved->timeslice = engine->props.timeslice_duration_ms;
saved->preempt_timeout = engine->props.preempt_timeout_ms;
switch (modify_type) {
case SELFTEST_SCHEDULER_MODIFY_FAST_RESET:
/*
* Enable force pre-emption on time slice expiration
* together with engine reset on pre-emption timeout.
* This is required to make the GuC notice and reset
* the single hanging context.
* Also, reduce the preemption timeout to something
* small to speed the test up.
*/
engine->i915->params.reset = 2;
engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION;
engine->props.timeslice_duration_ms = REDUCED_TIMESLICE;
engine->props.preempt_timeout_ms = REDUCED_PREEMPT;
break;
case SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK:
engine->props.preempt_timeout_ms = 0;
break;
default:
pr_err("Invalid scheduler policy modification type: %d!\n", modify_type);
return -EINVAL;
}
if (!intel_engine_uses_guc(engine))
return 0;
err = intel_guc_global_policies_update(&engine->gt->uc.guc);
if (err)
intel_selftest_restore_policy(engine, saved);
return err;
}
int intel_selftest_restore_policy(struct intel_engine_cs *engine,
struct intel_selftest_saved_policy *saved)
{
/* Restore the original policies */
engine->i915->params.reset = saved->reset;
engine->flags = saved->flags;
engine->props.timeslice_duration_ms = saved->timeslice;
engine->props.preempt_timeout_ms = saved->preempt_timeout;
if (!intel_engine_uses_guc(engine))
return 0;
return intel_guc_global_policies_update(&engine->gt->uc.guc);
}
int intel_selftest_wait_for_rq(struct i915_request *rq)
{
long ret;
ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME);
if (ret < 0)
return ret;
return 0;
}