The GuC firmware had defined the interface for Translation Look-Aside Buffer (TLB) invalidation. We should use this interface when invalidating the engine and GuC TLBs. Add additional functionality to intel_gt_invalidate_tlb, invalidating the GuC TLBs and falling back to GT invalidation when the GuC is disabled. The invalidation is done by sending a request directly to the GuC tlb_lookup that invalidates the table. The invalidation is submitted as a wait request and is performed in the CT event handler. This means we cannot perform this TLB invalidation path if the CT is not enabled. If the request isn't fulfilled in two seconds, this would constitute an error in the invalidation as that would constitute either a lost request or a severe GuC overload. With this new invalidation routine, we can perform GuC-based GGTT invalidations. GuC-based GGTT invalidation is incompatible with MMIO invalidation so we should not perform MMIO invalidation when GuC-based GGTT invalidation is expected. The additional complexity incurred in this patch will be necessary for range-based tlb invalidations, which will be platformed in the future. Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> Signed-off-by: Bruce Chang <yu.bruce.chang@intel.com> Signed-off-by: Chris Wilson <chris.p.wilson@intel.com> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com> Signed-off-by: Fei Yang <fei.yang@intel.com> CC: Andi Shyti <andi.shyti@linux.intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Nirmoy Das <nirmoy.das@intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231017180806.3054290-4-jonathan.cavitt@intel.com
174 lines
4.4 KiB
C
174 lines
4.4 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2023 Intel Corporation
|
|
*/
|
|
|
|
#include "i915_drv.h"
|
|
#include "i915_perf_oa_regs.h"
|
|
#include "intel_engine_pm.h"
|
|
#include "intel_gt.h"
|
|
#include "intel_gt_mcr.h"
|
|
#include "intel_gt_pm.h"
|
|
#include "intel_gt_print.h"
|
|
#include "intel_gt_regs.h"
|
|
#include "intel_tlb.h"
|
|
#include "uc/intel_guc.h"
|
|
|
|
/*
|
|
* HW architecture suggest typical invalidation time at 40us,
|
|
* with pessimistic cases up to 100us and a recommendation to
|
|
* cap at 1ms. We go a bit higher just in case.
|
|
*/
|
|
#define TLB_INVAL_TIMEOUT_US 100
|
|
#define TLB_INVAL_TIMEOUT_MS 4
|
|
|
|
/*
|
|
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
|
|
* but are now considered MCR registers. Since they exist within a GAM range,
|
|
* the primary instance of the register rolls up the status from each unit.
|
|
*/
|
|
static int wait_for_invalidate(struct intel_engine_cs *engine)
|
|
{
|
|
if (engine->tlb_inv.mcr)
|
|
return intel_gt_mcr_wait_for_reg(engine->gt,
|
|
engine->tlb_inv.reg.mcr_reg,
|
|
engine->tlb_inv.done,
|
|
0,
|
|
TLB_INVAL_TIMEOUT_US,
|
|
TLB_INVAL_TIMEOUT_MS);
|
|
else
|
|
return __intel_wait_for_register_fw(engine->gt->uncore,
|
|
engine->tlb_inv.reg.reg,
|
|
engine->tlb_inv.done,
|
|
0,
|
|
TLB_INVAL_TIMEOUT_US,
|
|
TLB_INVAL_TIMEOUT_MS,
|
|
NULL);
|
|
}
|
|
|
|
static void mmio_invalidate_full(struct intel_gt *gt)
|
|
{
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
struct intel_uncore *uncore = gt->uncore;
|
|
struct intel_engine_cs *engine;
|
|
intel_engine_mask_t awake, tmp;
|
|
enum intel_engine_id id;
|
|
unsigned long flags;
|
|
|
|
if (GRAPHICS_VER(i915) < 8)
|
|
return;
|
|
|
|
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
|
|
|
|
intel_gt_mcr_lock(gt, &flags);
|
|
spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
|
|
|
|
awake = 0;
|
|
for_each_engine(engine, gt, id) {
|
|
if (!intel_engine_pm_is_awake(engine))
|
|
continue;
|
|
|
|
if (engine->tlb_inv.mcr)
|
|
intel_gt_mcr_multicast_write_fw(gt,
|
|
engine->tlb_inv.reg.mcr_reg,
|
|
engine->tlb_inv.request);
|
|
else
|
|
intel_uncore_write_fw(uncore,
|
|
engine->tlb_inv.reg.reg,
|
|
engine->tlb_inv.request);
|
|
|
|
awake |= engine->mask;
|
|
}
|
|
|
|
GT_TRACE(gt, "invalidated engines %08x\n", awake);
|
|
|
|
/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
|
|
if (awake &&
|
|
(IS_TIGERLAKE(i915) ||
|
|
IS_DG1(i915) ||
|
|
IS_ROCKETLAKE(i915) ||
|
|
IS_ALDERLAKE_S(i915) ||
|
|
IS_ALDERLAKE_P(i915)))
|
|
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
|
|
|
|
spin_unlock(&uncore->lock);
|
|
intel_gt_mcr_unlock(gt, flags);
|
|
|
|
for_each_engine_masked(engine, gt, awake, tmp) {
|
|
if (wait_for_invalidate(engine))
|
|
gt_err_ratelimited(gt,
|
|
"%s TLB invalidation did not complete in %ums!\n",
|
|
engine->name, TLB_INVAL_TIMEOUT_MS);
|
|
}
|
|
|
|
/*
|
|
* Use delayed put since a) we mostly expect a flurry of TLB
|
|
* invalidations so it is good to avoid paying the forcewake cost and
|
|
* b) it works around a bug in Icelake which cannot cope with too rapid
|
|
* transitions.
|
|
*/
|
|
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
|
|
}
|
|
|
|
static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
|
|
{
|
|
u32 cur = intel_gt_tlb_seqno(gt);
|
|
|
|
/* Only skip if a *full* TLB invalidate barrier has passed */
|
|
return (s32)(cur - ALIGN(seqno, 2)) > 0;
|
|
}
|
|
|
|
void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
|
|
{
|
|
intel_wakeref_t wakeref;
|
|
|
|
if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
|
|
return;
|
|
|
|
if (intel_gt_is_wedged(gt))
|
|
return;
|
|
|
|
if (tlb_seqno_passed(gt, seqno))
|
|
return;
|
|
|
|
with_intel_gt_pm_if_awake(gt, wakeref) {
|
|
struct intel_guc *guc = >->uc.guc;
|
|
|
|
mutex_lock(>->tlb.invalidate_lock);
|
|
if (tlb_seqno_passed(gt, seqno))
|
|
goto unlock;
|
|
|
|
if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
|
|
/*
|
|
* Only perform GuC TLB invalidation if GuC is ready.
|
|
* The only time GuC could not be ready is on GT reset,
|
|
* which would clobber all the TLBs anyways, making
|
|
* any TLB invalidation path here unnecessary.
|
|
*/
|
|
if (intel_guc_is_ready(guc))
|
|
intel_guc_invalidate_tlb_engines(guc);
|
|
} else {
|
|
mmio_invalidate_full(gt);
|
|
}
|
|
|
|
write_seqcount_invalidate(>->tlb.seqno);
|
|
unlock:
|
|
mutex_unlock(>->tlb.invalidate_lock);
|
|
}
|
|
}
|
|
|
|
void intel_gt_init_tlb(struct intel_gt *gt)
|
|
{
|
|
mutex_init(>->tlb.invalidate_lock);
|
|
seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock);
|
|
}
|
|
|
|
void intel_gt_fini_tlb(struct intel_gt *gt)
|
|
{
|
|
mutex_destroy(>->tlb.invalidate_lock);
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
|
#include "selftest_tlb.c"
|
|
#endif
|