From 8172f41859cf7516e73eb957297e6752b3073119 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 20 Jan 2022 20:31:16 -0800 Subject: [PATCH 1/6] drm/i915: Allocate intel_engine_coredump_alloc with ALLOW_FAIL Allocate intel_engine_coredump_alloc with ALLOW_FAIL rather than GFP_KERNEL to fully decouple the error capture from fence signalling. v2: (John Harrison) - Fix typo in commit message (s/do/to) Fixes: 8b91cdd4f8649 ("drm/i915: Use __GFP_KSWAPD_RECLAIM in the capture code") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220121043118.24886-2-matthew.brost@intel.com (cherry picked from commit 4f72fc3c7f3d9f29a438bb0e17c7773f2fc8242a) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5ae812d60abe..0633888a411e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1522,7 +1522,7 @@ capture_engine(struct intel_engine_cs *engine, struct i915_request *rq = NULL; unsigned long flags; - ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); + ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL); if (!ee) return NULL; From 5ae13c305ef8cb54efc4f0ba4565709b9f320fed Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jan 2022 08:39:29 -0800 Subject: [PATCH 2/6] drm/i915: Lock timeline mutex directly in error path of eb_pin_timeline Don't use the interruptable version of the timeline mutex lock in the error path of eb_pin_timeline as the cleanup must always happen. v2: (John Harrison) - Don't check for interrupt during mutex lock v3: (Tvrtko) - A comment explaining why lock helper isn't used Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220111163929.14017-1-matthew.brost@intel.com (cherry picked from commit cb935c4618bd2ff9058feee4af7088446da6a763) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3a5b247be738..1736efa43339 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2505,9 +2505,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, timeout) < 0) { i915_request_put(rq); - tl = intel_context_timeline_lock(ce); + /* + * Error path, cannot use intel_context_timeline_lock as + * that is user interruptable and this clean up step + * must be done. + */ + mutex_lock(&ce->timeline->mutex); intel_context_exit(ce); - intel_context_timeline_unlock(tl); + mutex_unlock(&ce->timeline->mutex); if (nonblock) return -EWOULDBLOCK; From 90a3d22ff02b196d5884e111f39271a1d4ee8e3e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Jan 2022 15:24:09 +0300 Subject: [PATCH 3/6] drm/i915/overlay: Prevent divide by zero bugs in scaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smatch detected a divide by zero bug in check_overlay_scaling(). drivers/gpu/drm/i915/display/intel_overlay.c:976 check_overlay_scaling() error: potential divide by zero bug '/ rec->dst_height'. drivers/gpu/drm/i915/display/intel_overlay.c:980 check_overlay_scaling() error: potential divide by zero bug '/ rec->dst_width'. Prevent this by ensuring that the dst height and width are non-zero. Fixes: 02e792fbaadb ("drm/i915: implement drmmode overlay support v4") Signed-off-by: Dan Carpenter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220124122409.GA31673@kili (cherry picked from commit cf5b64f7f10b28bebb9b7c9d25e7aee5cbe43918) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_overlay.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 1a376e9a1ff3..d610e48cab94 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay, const struct intel_crtc_state *pipe_config = overlay->crtc->config; + if (rec->dst_height == 0 || rec->dst_width == 0) + return -EINVAL; + if (rec->dst_x < pipe_config->pipe_src_w && rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w && rec->dst_y < pipe_config->pipe_src_h && From b3f74938d65665f892d1b7807c51140f68dc911c Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 10 Jan 2022 17:55:23 -0800 Subject: [PATCH 4/6] drm/i915/pmu: Use PM timestamp instead of RING TIMESTAMP for reference All timestamps returned by GuC for GuC PMU busyness are captured from GUC PM TIMESTAMP. Since this timestamp does not tick when GuC goes idle, kmd uses RING_TIMESTAMP to measure busyness of an engine with an active context. In further stress testing, the MMIO read of the RING_TIMESTAMP is seen to cause a rare hang. Resolve the issue by using gt specific timestamp from PM which is in sync with the GuC PM timestamp. Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220111015523.225562-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 721fd84ea1fe957453587efad5fdc44dfba58e04) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 5 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 56 ++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 3 +- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f9240d4baa69..3aabe164c329 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -206,6 +206,11 @@ struct intel_guc { * context usage for overflows. */ struct delayed_work work; + + /** + * @shift: Right shift value for the gpm timestamp + */ + u32 shift; } timestamp; #ifdef CONFIG_DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e7517206af82..fcec2cb833af 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1149,23 +1149,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) } } -static void guc_update_pm_timestamp(struct intel_guc *guc, - struct intel_engine_cs *engine, - ktime_t *now) +static u32 gpm_timestamp_shift(struct intel_gt *gt) { - u32 gt_stamp_now, gt_stamp_hi; + intel_wakeref_t wakeref; + u32 reg, shift; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); + + shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; + + return 3 - shift; +} + +static u64 gpm_timestamp(struct intel_gt *gt) +{ + u32 lo, hi, old_hi, loop = 0; + + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + do { + lo = intel_uncore_read(gt->uncore, MISC_STATUS0); + old_hi = hi; + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + } while (old_hi != hi && loop++ < 2); + + return ((u64)hi << 32) | lo; +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 gt_stamp_lo, gt_stamp_hi; + u64 gpm_ts; lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); - gt_stamp_now = intel_uncore_read(engine->uncore, - RING_TIMESTAMP(engine->mmio_base)); + gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; + gt_stamp_lo = lower_32_bits(gpm_ts); *now = ktime_get(); - if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) gt_stamp_hi++; - guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; } /* @@ -1209,7 +1237,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; guc_update_engine_gt_clks(engine); - guc_update_pm_timestamp(guc, engine, now); + guc_update_pm_timestamp(guc, now); intel_gt_pm_put_async(gt); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; @@ -1241,8 +1269,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); guc_update_engine_gt_clks(engine); engine->stats.guc.prev_total = 0; } @@ -1259,10 +1287,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) ktime_t unused; spin_lock_irqsave(&guc->timestamp.lock, flags); - for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); + + guc_update_pm_timestamp(guc, &unused); + for_each_engine(engine, gt, id) guc_update_engine_gt_clks(engine); - } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); } @@ -1783,6 +1812,7 @@ int intel_guc_submission_init(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + guc->timestamp.shift = gpm_timestamp_shift(gt); return 0; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c32420cb8ed5..c2bb33febb68 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2684,7 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ -#define GUCPMTIMESTAMP _MMIO(0xC3E8) +#define MISC_STATUS0 _MMIO(0xA500) +#define MISC_STATUS1 _MMIO(0xA504) /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ #define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) From 3c6f13ad723e7206f03bb2752b01d18202b7fc9d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 26 Jan 2022 12:43:56 +0200 Subject: [PATCH 5/6] drm/i915/adlp: Fix TypeC PHY-ready status readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TCSS_DDI_STATUS register is indexed by tc_port not by the FIA port index, fix this up. This only caused an issue on TC#3/4 ports in legacy mode, as in all other cases the two indices either match (on TC#1/2) or the TCSS_DDI_STATUS_READY flag is set regardless of something being connected or not (on TC#1/2/3/4 in dp-alt and tbt-alt modes). Reported-and-tested-by: Chia-Lin Kao (AceLan) Fixes: 55ce306c2aa1 ("drm/i915/adl_p: Implement TC sequences") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4698 Cc: José Roberto de Souza Cc: # v5.14+ Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220126104356.2022975-1-imre.deak@intel.com (cherry picked from commit 516b33460c5bee78b2055637b0547bdb0e6af754) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 40faa18947c9..dbd7d0d83a14 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -345,10 +345,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; - val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx)); + val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port)); if (val == 0xffffffff) { drm_dbg_kms(&i915->drm, "Port %s: PHY in TCCOLD, assuming not complete\n", From 7d73c602154df56802a9e75ac212505fc1e9a2b6 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 24 Jan 2022 18:01:24 -0800 Subject: [PATCH 6/6] drm/i915/pmu: Fix KMD and GuC race on accessing busyness GuC updates shared memory and KMD reads it. Since this is not synchronized, we run into a race where the value read is inconsistent. Sometimes the inconsistency is in reading the upper MSB bytes of the last_switch_in value. 2 types of cases are seen - upper 8 bits are zero and upper 24 bits are zero. Since these are non-zero values, it is not trivial to determine validity of these values. Instead we read the values multiple times until they are consistent. In test runs, 3 attempts results in consistent values. The upper bound is set to 6 attempts and may need to be tuned as per any new occurences. Since the duration that gt is parked can vary, the patch also updates the gt timestamp on unpark before starting the worker. v2: - Initialize i - Use READ_ONCE to access engine record Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220125020124.788679-2-umesh.nerlige.ramappa@intel.com (cherry picked from commit 512712a824de9b856a4e61343e3e4390eba2c391) Signed-off-by: Tvrtko Ursulin --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fcec2cb833af..154ad726e266 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) if (new_start == lower_32_bits(*prev_start)) return; + /* + * When gt is unparked, we update the gt timestamp and start the ping + * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt + * is unparked, all switched in contexts will have a start time that is + * within +/- POLL_TIME_CLKS of the most recent gt_stamp. + * + * If neither gt_stamp nor new_start has rolled over, then the + * gt_stamp_hi does not need to be adjusted, however if one of them has + * rolled over, we need to adjust gt_stamp_hi accordingly. + * + * The below conditions address the cases of new_start rollover and + * gt_stamp_last rollover respectively. + */ if (new_start < gt_stamp_last && (new_start - gt_stamp_last) <= POLL_TIME_CLKS) gt_stamp_hi++; @@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) *prev_start = ((u64)gt_stamp_hi << 32) | new_start; } -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +/* + * GuC updates shared memory and KMD reads it. Since this is not synchronized, + * we run into a race where the value read is inconsistent. Sometimes the + * inconsistency is in reading the upper MSB bytes of the last_in value when + * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper + * 24 bits are zero. Since these are non-zero values, it is non-trivial to + * determine validity of these values. Instead we read the values multiple times + * until they are consistent. In test runs, 3 attempts results in consistent + * values. The upper bound is set to 6 attempts and may need to be tuned as per + * any new occurences. + */ +static void __get_engine_usage_record(struct intel_engine_cs *engine, + u32 *last_in, u32 *id, u32 *total) { struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + int i = 0; + + do { + *last_in = READ_ONCE(rec->last_switch_in_stamp); + *id = READ_ONCE(rec->current_context_index); + *total = READ_ONCE(rec->total_runtime); + + if (READ_ONCE(rec->last_switch_in_stamp) == *last_in && + READ_ONCE(rec->current_context_index) == *id && + READ_ONCE(rec->total_runtime) == *total) + break; + } while (++i < 6); +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ struct intel_engine_guc_stats *stats = &engine->stats.guc; struct intel_guc *guc = &engine->gt->uc.guc; - u32 last_switch = rec->last_switch_in_stamp; - u32 ctx_id = rec->current_context_index; - u32 total = rec->total_runtime; + u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); + __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); + stats->running = ctx_id != ~0U && last_switch; if (stats->running) __extend_last_switch(guc, &stats->start_gt_clk, last_switch); @@ -1236,6 +1277,10 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) if (!in_reset && intel_gt_pm_get_if_awake(gt)) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; + /* + * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - + * start_gt_clk' calculation below for active engines. + */ guc_update_engine_gt_clks(engine); guc_update_pm_timestamp(guc, now); intel_gt_pm_put_async(gt); @@ -1364,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { struct intel_guc *guc = >->uc.guc; + unsigned long flags; + ktime_t unused; if (!guc_submission_initialized(guc)) return; + spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); + spin_unlock_irqrestore(&guc->timestamp.lock, flags); mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); }