drm/i915/guc: Use context hints for GT frequency
Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). v3: Minor review comments (Tvrtko) v4: Update comment (Sushma) Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Acked-by: Ivan Briano <ivan.briano@intel.com> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240306012759.204938-1-vinay.belgaumkar@intel.com
This commit is contained in:
parent
7127128017
commit
cec82816d0
@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
|
||||
struct i915_gem_proto_context *pc,
|
||||
struct drm_i915_gem_context_param *args)
|
||||
{
|
||||
struct drm_i915_private *i915 = fpriv->i915;
|
||||
int ret = 0;
|
||||
|
||||
switch (args->param) {
|
||||
@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
|
||||
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
|
||||
break;
|
||||
|
||||
case I915_CONTEXT_PARAM_LOW_LATENCY:
|
||||
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
|
||||
pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
|
||||
case I915_CONTEXT_PARAM_RECOVERABLE:
|
||||
if (args->size)
|
||||
ret = -EINVAL;
|
||||
@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
|
||||
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
|
||||
ret = intel_context_reconfigure_sseu(ce, sseu);
|
||||
|
||||
if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags))
|
||||
__set_bit(CONTEXT_LOW_LATENCY, &ce->flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915,
|
||||
if (vm)
|
||||
ctx->vm = vm;
|
||||
|
||||
/* Assign early so intel_context_set_gem can access these flags */
|
||||
ctx->user_flags = pc->user_flags;
|
||||
|
||||
mutex_init(&ctx->engines_mutex);
|
||||
if (pc->num_user_engines >= 0) {
|
||||
i915_gem_context_set_user_engines(ctx);
|
||||
@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
|
||||
* is no remap info, it will be a NOP. */
|
||||
ctx->remap_slice = ALL_L3_SLICES(i915);
|
||||
|
||||
ctx->user_flags = pc->user_flags;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
|
||||
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
|
||||
|
||||
|
@ -338,6 +338,7 @@ struct i915_gem_context {
|
||||
#define UCONTEXT_BANNABLE 2
|
||||
#define UCONTEXT_RECOVERABLE 3
|
||||
#define UCONTEXT_PERSISTENCE 4
|
||||
#define UCONTEXT_LOW_LATENCY 5
|
||||
|
||||
/**
|
||||
* @flags: small set of booleans
|
||||
|
@ -130,6 +130,7 @@ struct intel_context {
|
||||
#define CONTEXT_PERMA_PIN 11
|
||||
#define CONTEXT_IS_PARKING 12
|
||||
#define CONTEXT_EXITING 13
|
||||
#define CONTEXT_LOW_LATENCY 14
|
||||
|
||||
struct {
|
||||
u64 timeout_us;
|
||||
|
@ -1013,6 +1013,10 @@ void intel_rps_boost(struct i915_request *rq)
|
||||
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
|
||||
return;
|
||||
|
||||
/* Waitboost is not needed for contexts marked with a Freq hint */
|
||||
if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
|
||||
return;
|
||||
|
||||
/* Serializes with i915_request_retire() */
|
||||
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
|
||||
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
|
||||
|
@ -207,6 +207,27 @@ struct slpc_shared_data {
|
||||
u8 reserved_mode_definition[4096];
|
||||
} __packed;
|
||||
|
||||
struct slpc_context_frequency_request {
|
||||
u32 frequency_request:16;
|
||||
u32 reserved:12;
|
||||
u32 is_compute:1;
|
||||
u32 ignore_busyness:1;
|
||||
u32 is_minimum:1;
|
||||
u32 is_predefined:1;
|
||||
} __packed;
|
||||
|
||||
#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28)
|
||||
|
||||
struct slpc_optimized_strategies {
|
||||
u32 compute:1;
|
||||
u32 async_flip:1;
|
||||
u32 media:1;
|
||||
u32 vsync_flip:1;
|
||||
u32 reserved:28;
|
||||
} __packed;
|
||||
|
||||
#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
|
||||
|
||||
/**
|
||||
* DOC: SLPC H2G MESSAGE FORMAT
|
||||
*
|
||||
|
@ -537,6 +537,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
intel_wakeref_t wakeref;
|
||||
int ret = 0;
|
||||
|
||||
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
|
||||
ret = slpc_set_param(slpc,
|
||||
SLPC_PARAM_STRATEGIES,
|
||||
val);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
@ -711,6 +725,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
|
||||
/* Set cached media freq ratio mode */
|
||||
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
|
||||
|
||||
/* Enable SLPC Optimized Strategy for compute */
|
||||
intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -45,5 +45,6 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
|
||||
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
|
||||
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
|
||||
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
|
||||
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
|
||||
|
||||
#endif
|
||||
|
@ -2645,6 +2645,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
|
||||
MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
|
||||
MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
|
||||
MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
|
||||
MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
|
||||
|
||||
#undef MAKE_CONTEXT_POLICY_ADD
|
||||
|
||||
@ -2664,6 +2665,7 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
|
||||
struct context_policy policy;
|
||||
u32 execution_quantum;
|
||||
u32 preemption_timeout;
|
||||
u32 slpc_ctx_freq_req = 0;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
@ -2675,11 +2677,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
|
||||
execution_quantum = engine->props.timeslice_duration_ms * 1000;
|
||||
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
|
||||
|
||||
if (ce && (ce->flags & BIT(CONTEXT_LOW_LATENCY)))
|
||||
slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
|
||||
|
||||
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
|
||||
|
||||
__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
|
||||
__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
|
||||
__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
|
||||
__guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
|
||||
|
||||
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
|
||||
__guc_context_policy_add_preempt_to_idle(&policy, 1);
|
||||
|
@ -155,6 +155,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
|
||||
*/
|
||||
value = 1;
|
||||
break;
|
||||
case I915_PARAM_HAS_CONTEXT_FREQ_HINT:
|
||||
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
|
||||
value = 1;
|
||||
else
|
||||
value = -EINVAL;
|
||||
break;
|
||||
case I915_PARAM_HAS_CONTEXT_ISOLATION:
|
||||
value = intel_engines_has_context_isolation(i915);
|
||||
break;
|
||||
|
@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait {
|
||||
*/
|
||||
#define I915_PARAM_PXP_STATUS 58
|
||||
|
||||
/*
|
||||
* Query if kernel allows marking a context to send a Freq hint to SLPC. This
|
||||
* will enable use of the strategies allowed by the SLPC algorithm.
|
||||
*/
|
||||
#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59
|
||||
|
||||
/* Must be kept compact -- no holes and well documented */
|
||||
|
||||
/**
|
||||
@ -2148,6 +2154,15 @@ struct drm_i915_gem_context_param {
|
||||
* -EIO: The firmware did not succeed in creating the protected context.
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
|
||||
|
||||
/*
|
||||
* I915_CONTEXT_PARAM_LOW_LATENCY:
|
||||
*
|
||||
* Mark this context as a low latency workload which requires aggressive GT
|
||||
* frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
|
||||
* supports this per context flag.
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
|
||||
/* Must be kept compact -- no holes and well documented */
|
||||
|
||||
/** @value: Context parameter value to be set or queried */
|
||||
|
Loading…
x
Reference in New Issue
Block a user