From 26eb4cd6c7c7793ee76c73b66621f63daff51953 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jun 2018 14:59:29 +0100 Subject: [PATCH 001/114] drm/i915: Disable bh around call to tasklet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guc submission backends expects to only be run from (at least) softirq context, but during our intel_engine_is_idle() check we would call into the tasklet to make sure it was flushed. As this could occur from process context, occasionally we would be caught out using a wait_for_atomic() not from an atomic context: [ 59.939091] WARN_ON_ONCE((1) && !(preempt_count() != 0)) [ 59.939142] WARNING: CPU: 1 PID: 2901 at drivers/gpu/drm/i915/intel_guc_submission.c:615 guc_submission_tasklet+0x784/0xa90 [i915] [ 59.939143] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul snd_hda_intel crc32_pclmul snd_hda_codec ghash_clmulni_intel snd_hwdep snd_hda_core e1000e snd_pcm mei_me mei prime_numbers [ 59.939164] CPU: 1 PID: 2901 Comm: gem_exec_schedu Tainted: G U W 4.18.0-rc1-g93475d62c730-drmtip_67+ #1 [ 59.939165] Hardware name: System manufacturer System Product Name/Z170M-PLUS, BIOS 3610 03/29/2018 [ 59.939188] RIP: 0010:guc_submission_tasklet+0x784/0xa90 [i915] [ 59.939189] Code: fc ff ff 80 3d 2f 87 11 00 00 0f 85 80 fb ff ff 48 c7 c6 f8 49 40 c0 48 c7 c7 80 41 3e c0 c6 05 14 87 11 00 01 e8 2c ea d6 d3 <0f> 0b e9 5f fb ff ff 8b 46 38 89 cf 31 c7 83 e7 c0 75 08 39 c1 0f [ 59.939253] RSP: 0018:ffffaafe08a03c10 EFLAGS: 00010286 [ 59.939255] RAX: 0000000000000000 RBX: ffff8f9112c246f0 RCX: 0000000000000001 [ 59.939256] RDX: 0000000080000001 RSI: ffffffff95086d8e RDI: 00000000ffffffff [ 59.939257] RBP: ffff8f9112c24680 R08: 000000009517be77 R09: 0000000000000000 [ 59.939258] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8f9112c24700 [ 59.939259] R13: ffff8f9112c24700 R14: 0000000000000000 R15: ffff8f9112c242a8 [ 59.939260] FS: 00007fc2cc7e5980(0000) GS:ffff8f9136c40000(0000) knlGS:0000000000000000 [ 59.939261] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 59.939262] CR2: 00007fc2cc815040 CR3: 000000021f10e003 CR4: 00000000003606e0 [ 59.939263] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 59.939264] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 59.939265] Call Trace: [ 59.939288] ? intel_engine_is_idle+0x64/0x160 [i915] [ 59.939323] ? intel_engine_dump+0x638/0x890 [i915] [ 59.939327] ? seq_printf+0x49/0x70 [ 59.939353] ? i915_engine_info+0xc8/0x100 [i915] [ 59.939356] ? drm_get_color_range_name+0x20/0x20 [ 59.939361] ? seq_read+0xf1/0x470 [ 59.939365] ? trace_hardirqs_on_caller+0xe0/0x1b0 [ 59.939370] ? full_proxy_read+0x51/0x80 [ 59.939389] ? __vfs_read+0x31/0x170 [ 59.939395] ? do_sys_open+0x13b/0x240 [ 59.939398] ? rcu_read_lock_sched_held+0x6f/0x80 [ 59.939401] ? vfs_read+0x9e/0x140 [ 59.939404] ? ksys_read+0x50/0xc0 [ 59.939409] ? do_syscall_64+0x55/0x190 [ 59.939412] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 59.939420] irq event stamp: 552834 [ 59.939422] hardirqs last enabled at (552833): [] console_unlock+0x3fc/0x600 [ 59.939425] hardirqs last disabled at (552834): [] error_entry+0x7c/0x100 [ 59.939451] softirqs last enabled at (552614): [] i915_request_add+0x2e3/0x7b0 [i915] [ 59.939470] softirqs last disabled at (552604): [] i915_request_add+0x25b/0x7b0 [i915] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106977 Fixes: dd0cf235d81f ("drm/i915: Speed up idle detection by kicking the tasklets") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Michal Wajdeczko Cc: Michał Winiarski Cc: Michel Thierry Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180620135929.23956-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 32bf3a408d46..d3264bd6e9dc 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1000,10 +1000,12 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (READ_ONCE(engine->execlists.active)) { struct intel_engine_execlists *execlists = &engine->execlists; + local_bh_disable(); if (tasklet_trylock(&execlists->tasklet)) { execlists->tasklet.func(execlists->tasklet.data); tasklet_unlock(&execlists->tasklet); } + local_bh_enable(); if (READ_ONCE(execlists->active)) return false; From 827db9d8bb4a01314f214065e3f18c8345c5fc9f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jun 2018 08:32:05 +0100 Subject: [PATCH 002/114] drm/i915: Ignore applying the self-relocation BIAS if no relocations We only need to apply the BIAS for self-relocations into the batchbuffer iff the execobject has any relocations. This suppresses some warnings we may get with a full gtt (so the batch object has wound up at 0 from a previous invocation), but doesn't fix the underlying problem of how we tried to move a pinned batch vma (how we have a pinned user vma outside of execbuf, I do not know, though this being on an aliasing ppgtt means it could be a spurious pinning via the global gtt). One step at a time... References: https://bugs.freedesktop.org/show_bug.cgi?id=106744#c1 Testcase: igt/gem_exec_gttfill # byt (sporadic) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180621073205.26701-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 60dc2a865f5f..437441f4af41 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -534,7 +534,8 @@ eb_add_vma(struct i915_execbuffer *eb, * paranoia do it everywhere. */ if (i == batch_idx) { - if (!(eb->flags[i] & EXEC_OBJECT_PINNED)) + if (entry->relocation_count && + !(eb->flags[i] & EXEC_OBJECT_PINNED)) eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; if (eb->reloc_cache.has_fence) eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; From d20ac620f0010f9cdcbbe8408e5f22aae3c49faa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jun 2018 09:01:50 +0100 Subject: [PATCH 003/114] drm/i915: Redefine EINVAL for debugging To aide debugging spurious EINVALs, include a debug message every time we emit one from execbuf. References: https://bugs.freedesktop.org/show_bug.cgi?id=106744 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180621080150.8110-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 437441f4af41..c2dd9b4cdace 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -66,6 +66,15 @@ enum { #define __I915_EXEC_ILLEGAL_FLAGS \ (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) +/* Catch emission of unexpected errors for CI! */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#undef EINVAL +#define EINVAL ({ \ + DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ + 22; \ +}) +#endif + /** * DOC: User command execution * From 8a29c778fa1a50a25a3e66cf9589888758858d24 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 23 May 2018 11:04:35 -0700 Subject: [PATCH 004/114] drm/i915: remove check for aux irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This became dead code with commit 309bd8ed464f ("drm/i915: Reinstate GMBUS and AUX interrupts on gen4/g4x"). v2: Move comment about HW behavior to where decision is made to enable MSI (Ville). Cc: Ville Syrjälä Signed-off-by: Lucas De Marchi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180523180435.18042-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 6 ++++++ drivers/gpu/drm/i915/i915_drv.h | 10 ---------- drivers/gpu/drm/i915/intel_dp.c | 22 +++++++--------------- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_psr.c | 2 +- 5 files changed, 14 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 33453d56c386..2959c88a37a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1165,6 +1165,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) * get lost on g4x as well, and interrupt delivery seems to stay * properly dead afterwards. So we'll just disable them for all * pre-gen5 chipsets. + * + * dp aux and gmbus irq on gen4 seems to be able to generate legacy + * interrupts even when in MSI mode. This results in spurious + * interrupt warnings if the legacy irq no. is shared with another + * device. The kernel then disables that interrupt source and so + * prevents the other device from working properly. */ if (INTEL_GEN(dev_priv) >= 5) { if (pci_enable_msi(pdev) < 0) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 735f695cb889..6f08ab310118 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2581,16 +2581,6 @@ intel_info(const struct drm_i915_private *dev_priv) (IS_CANNONLAKE(dev_priv) || \ IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) -/* - * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts - * even when in MSI mode. This results in spurious interrupt warnings if the - * legacy irq no. is shared with another device. The kernel then disables that - * interrupt source and so prevents the other device from working properly. - * - * Since we don't enable MSI anymore on gen4, we can always use GMBUS/AUX - * interrupts. - */ -#define HAS_AUX_IRQ(dev_priv) true #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6ac6c8787dcf..c1b2f00f324b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -953,7 +953,7 @@ intel_dp_check_edp(struct intel_dp *intel_dp) } static uint32_t -intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) +intel_dp_aux_wait_done(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); @@ -961,14 +961,10 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) bool done; #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) - if (has_aux_irq) - done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, - msecs_to_jiffies_timeout(10)); - else - done = wait_for(C, 10) == 0; + done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, + msecs_to_jiffies_timeout(10)); if (!done) - DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n", - has_aux_irq); + DRM_ERROR("dp aux hw did not signal timeout!\n"); #undef C return status; @@ -1033,7 +1029,6 @@ static uint32_t skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) } static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, - bool has_aux_irq, int send_bytes, uint32_t aux_clock_divider) { @@ -1054,7 +1049,7 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, return DP_AUX_CH_CTL_SEND_BUSY | DP_AUX_CH_CTL_DONE | - (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) | + DP_AUX_CH_CTL_INTERRUPT | DP_AUX_CH_CTL_TIME_OUT_ERROR | timeout | DP_AUX_CH_CTL_RECEIVE_ERROR | @@ -1064,13 +1059,12 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, } static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, - bool has_aux_irq, int send_bytes, uint32_t unused) { return DP_AUX_CH_CTL_SEND_BUSY | DP_AUX_CH_CTL_DONE | - (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) | + DP_AUX_CH_CTL_INTERRUPT | DP_AUX_CH_CTL_TIME_OUT_ERROR | DP_AUX_CH_CTL_TIME_OUT_MAX | DP_AUX_CH_CTL_RECEIVE_ERROR | @@ -1093,7 +1087,6 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, int i, ret, recv_bytes; uint32_t status; int try, clock = 0; - bool has_aux_irq = HAS_AUX_IRQ(dev_priv); bool vdd; ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); @@ -1148,7 +1141,6 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, while ((aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, clock++))) { u32 send_ctl = intel_dp->get_aux_send_ctl(intel_dp, - has_aux_irq, send_bytes, aux_clock_divider); @@ -1165,7 +1157,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, /* Send the command and wait for it to complete */ I915_WRITE(ch_ctl, send_ctl); - status = intel_dp_aux_wait_done(intel_dp, has_aux_irq); + status = intel_dp_aux_wait_done(intel_dp); /* Clear done status and any errors */ I915_WRITE(ch_ctl, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 22831acc7c30..9e0a8cc48521 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1132,7 +1132,6 @@ struct intel_dp { * register with to kick off an AUX transaction. */ uint32_t (*get_aux_send_ctl)(struct intel_dp *dp, - bool has_aux_irq, int send_bytes, uint32_t aux_clock_divider); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index d4cd19fea148..aea81ace854b 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -336,7 +336,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0); /* Start with bits set for DDI_AUX_CTL register */ - aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg), + aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, sizeof(aux_msg), aux_clock_divider); /* Select only valid bits for SRD_AUX_CTL */ From 9fc59bae0f4a8ec3676a9245abed7721b4d3d8c9 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 15 Jun 2018 17:39:10 +0300 Subject: [PATCH 005/114] drm/i915/icl: Fix MG PLL setup when refclk is 38.4MHz Atm we're zeroing out fields in MG_PLL_BIAS and MG_PLL_TDC_COLDST_BIAS if refclk is 38.4MHz, whereas the spec tells us to preserve them. Although the calculated values mostly match the register defaults even for the 38.4MHz case, there are some differences wrt. what BIOS programs (I noticed at least differences in the MG_PLL_BIAS/IREFTRIM and MG_PLL_BIAS/BIASCAL_EN fields). In the lack of further info on how to program these fields, just do what the spec says and preserve the BIOS state. v2: - Preserve the BIOS programmed reg fields instead of programming them. Cc: Vandita Kulkarni Cc: Paulo Zanoni Cc: James Ausmus Signed-off-by: Imre Deak Reviewed-by: James Ausmus (v1) Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20180615143911.31082-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 63 +++++++++++++++++++-------- drivers/gpu/drm/i915/intel_dpll_mgr.h | 2 + 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 132fe63e042a..d4c7bacbe83e 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2812,25 +2812,31 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, MG_PLL_SSC_FLLEN | MG_PLL_SSC_STEPSIZE(ssc_stepsize); - pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART; + pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); - if (refclk_khz != 38400) { - pll_state->mg_pll_tdc_coldst_bias |= - MG_PLL_TDC_COLDST_IREFINT_EN | - MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | - MG_PLL_TDC_COLDST_COLDSTART | - MG_PLL_TDC_TDCOVCCORR_EN | - MG_PLL_TDC_TDCSEL(3); + pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); - pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | - MG_PLL_BIAS_INIT_DCOAMP(0x3F) | - MG_PLL_BIAS_BIAS_BONUS(10) | - MG_PLL_BIAS_BIASCAL_EN | - MG_PLL_BIAS_CTRIM(12) | - MG_PLL_BIAS_VREF_RDAC(4) | - MG_PLL_BIAS_IREFTRIM(iref_trim); + if (refclk_khz == 38400) { + pll_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; + pll_state->mg_pll_bias_mask = 0; + } else { + pll_state->mg_pll_tdc_coldst_bias_mask = -1U; + pll_state->mg_pll_bias_mask = -1U; } + pll_state->mg_pll_tdc_coldst_bias &= pll_state->mg_pll_tdc_coldst_bias_mask; + pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + return true; } @@ -2948,9 +2954,21 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); hw_state->mg_pll_tdc_coldst_bias = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + + if (dev_priv->cdclk.hw.ref == 38400) { + hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; + hw_state->mg_pll_bias_mask = 0; + } else { + hw_state->mg_pll_tdc_coldst_bias_mask = -1U; + hw_state->mg_pll_bias_mask = -1U; + } + + hw_state->mg_pll_tdc_coldst_bias &= hw_state->mg_pll_tdc_coldst_bias_mask; + hw_state->mg_pll_bias &= hw_state->mg_pll_bias_mask; break; default: MISSING_CASE(id); @@ -2978,6 +2996,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, { struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; enum port port = icl_mg_pll_id_to_port(pll->info->id); + u32 val; I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), @@ -2988,9 +3007,17 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); - I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias); - I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), - hw_state->mg_pll_tdc_coldst_bias); + + val = I915_READ(MG_PLL_BIAS(port)); + val &= ~hw_state->mg_pll_bias_mask; + val |= hw_state->mg_pll_bias; + I915_WRITE(MG_PLL_BIAS(port), val); + + val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + val &= ~hw_state->mg_pll_tdc_coldst_bias_mask; + val |= hw_state->mg_pll_tdc_coldst_bias; + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); } diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index ba925c7ee482..7e522cf4f13f 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -180,6 +180,8 @@ struct intel_dpll_hw_state { uint32_t mg_pll_ssc; uint32_t mg_pll_bias; uint32_t mg_pll_tdc_coldst_bias; + uint32_t mg_pll_bias_mask; + uint32_t mg_pll_tdc_coldst_bias_mask; }; /** From bd99ce085f165a07fe8b33ad04157b91c91b8668 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 19 Jun 2018 19:41:15 +0300 Subject: [PATCH 006/114] drm/i915/icl: Do read-modify-write as needed during MG PLL programming Some MG PLL registers have fields that need to be preserved at their HW default or BIOS programmed values. So make sure we preserve them. v2: - Add comment to icl_mg_pll_write() explaining the need for register masks. (Vandita) - Fix patchwork checkpatch warning. v3: - Rebase on drm-tip. Cc: Vandita Kulkarni Cc: Paulo Zanoni Cc: James Ausmus Signed-off-by: Imre Deak Reviewed-by: James Ausmus (v1) Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20180619164115.7835-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 13 +++++++++ drivers/gpu/drm/i915/intel_dpll_mgr.c | 39 ++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4bfd7a9bd75f..65b222287ee4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9047,6 +9047,7 @@ enum skl_power_gate { #define _MG_REFCLKIN_CTL_PORT3 0x16A92C #define _MG_REFCLKIN_CTL_PORT4 0x16B92C #define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) +#define MG_REFCLKIN_CTL_OD_2_MUX_MASK (0x7 << 8) #define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ _MG_REFCLKIN_CTL_PORT1, \ _MG_REFCLKIN_CTL_PORT2) @@ -9056,7 +9057,9 @@ enum skl_power_gate { #define _MG_CLKTOP2_CORECLKCTL1_PORT3 0x16A8D8 #define _MG_CLKTOP2_CORECLKCTL1_PORT4 0x16B8D8 #define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x) ((x) << 16) +#define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK (0xff << 16) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) +#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK (0xff << 8) #define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ _MG_CLKTOP2_CORECLKCTL1_PORT1, \ _MG_CLKTOP2_CORECLKCTL1_PORT2) @@ -9066,9 +9069,13 @@ enum skl_power_gate { #define _MG_CLKTOP2_HSCLKCTL_PORT3 0x16A8D4 #define _MG_CLKTOP2_HSCLKCTL_PORT4 0x16B8D4 #define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x) ((x) << 16) +#define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK (0x1 << 16) #define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x) ((x) << 14) +#define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK (0x3 << 14) #define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x) ((x) << 12) +#define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK (0x3 << 12) #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) +#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK (0xf << 8) #define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ _MG_CLKTOP2_HSCLKCTL_PORT1, \ _MG_CLKTOP2_HSCLKCTL_PORT2) @@ -9142,12 +9149,18 @@ enum skl_power_gate { #define _MG_PLL_BIAS_PORT3 0x16AA14 #define _MG_PLL_BIAS_PORT4 0x16BA14 #define MG_PLL_BIAS_BIAS_GB_SEL(x) ((x) << 30) +#define MG_PLL_BIAS_BIAS_GB_SEL_MASK (0x3 << 30) #define MG_PLL_BIAS_INIT_DCOAMP(x) ((x) << 24) +#define MG_PLL_BIAS_INIT_DCOAMP_MASK (0x3f << 24) #define MG_PLL_BIAS_BIAS_BONUS(x) ((x) << 16) +#define MG_PLL_BIAS_BIAS_BONUS_MASK (0xff << 16) #define MG_PLL_BIAS_BIASCAL_EN (1 << 15) #define MG_PLL_BIAS_CTRIM(x) ((x) << 8) +#define MG_PLL_BIAS_CTRIM_MASK (0x1f << 8) #define MG_PLL_BIAS_VREF_RDAC(x) ((x) << 5) +#define MG_PLL_BIAS_VREF_RDAC_MASK (0x7 << 5) #define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) +#define MG_PLL_BIAS_IREFTRIM_MASK (0x1f << 0) #define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ _MG_PLL_BIAS_PORT2) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index d4c7bacbe83e..57342364fd30 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2945,10 +2945,21 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, case DPLL_ID_ICL_MGPLL4: port = icl_mg_pll_id_to_port(id); hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; + hw_state->mg_clktop2_coreclkctl1 = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + hw_state->mg_clktop2_coreclkctl1 &= + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + hw_state->mg_clktop2_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + hw_state->mg_clktop2_hsclkctl &= + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); @@ -2998,10 +3009,30 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, enum port port = icl_mg_pll_id_to_port(pll->info->id); u32 val; - I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); - I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), - hw_state->mg_clktop2_coreclkctl1); - I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl); + /* + * Some of the following registers have reserved fields, so program + * these with RMW based on a mask. The mask can be fixed or generated + * during the calc/readout phase if the mask depends on some other HW + * state like refclk, see icl_calc_mg_pll_state(). + */ + val = I915_READ(MG_REFCLKIN_CTL(port)); + val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; + val |= hw_state->mg_refclkin_ctl; + I915_WRITE(MG_REFCLKIN_CTL(port), val); + + val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + val |= hw_state->mg_clktop2_coreclkctl1; + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val); + + val = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); + val |= hw_state->mg_clktop2_hsclkctl; + I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val); + I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); From e16a375086337af3fcf21dbd512e1573049ba6cb Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Thu, 21 Jun 2018 20:43:56 +0530 Subject: [PATCH 007/114] drm/i915: Enable hw workaround to bypass alpha Alpha blending with alpha 0 and 0xff passes through alpha math and rounding logic causing differences compared to fully transparent or opaque plane,resulting in CRC mismatch. This WA on icl and above enables hardware to bypass alpha math and rounding for per pixel alpha values of 00 and 0xff v2: Fix patchwork checkpatch warnings. Signed-off-by: Vandita Kulkarni Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/1529594036-25036-1-git-send-email-vandita.kulkarni@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ drivers/gpu/drm/i915/intel_display.c | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 65b222287ee4..caad19f5f557 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7366,6 +7366,14 @@ enum { #define BDW_SCRATCH1 _MMIO(0xb11c) #define GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE (1 << 2) +/*GEN11 chicken */ +#define _PIPEA_CHICKEN 0x70038 +#define _PIPEB_CHICKEN 0x71038 +#define _PIPEC_CHICKEN 0x72038 +#define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) +#define PIPE_CHICKEN(pipe) _MMIO_PIPE(pipe, _PIPEA_CHICKEN,\ + _PIPEB_CHICKEN) + /* PCH */ /* south display engine interrupt: IBX */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b2a5a9ed9404..4db576c3e364 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5633,6 +5633,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); bool psl_clkgate_wa; + u32 pipe_chicken; if (WARN_ON(intel_crtc->active)) return; @@ -5692,6 +5693,17 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); + /* + * Display WA #1153: enable hardware to bypass the alpha math + * and rounding for per-pixel values 00 and 0xff + */ + if (INTEL_GEN(dev_priv) >= 11) { + pipe_chicken = I915_READ(PIPE_CHICKEN(pipe)); + if (!(pipe_chicken & PER_PIXEL_ALPHA_BYPASS_EN)) + I915_WRITE_FW(PIPE_CHICKEN(pipe), + pipe_chicken | PER_PIXEL_ALPHA_BYPASS_EN); + } + intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_transcoder_func(pipe_config); From aa58f58d9c6471a1087ede57e4ab6cc0817c949f Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Fri, 22 Jun 2018 11:19:03 -0700 Subject: [PATCH 008/114] drm/i915/guc: Remove USES_GUC_SUBMISSION for ads programming In the guc_ctl_debug_flags, the ads struct is programmed only when USES_GUC_SUBMISSION is satisfied. But, this has to be programmed for all suspend/resume cases. Remove the condition and program the ads struct for both huc loading and guc submission. This issue was noticed when CI threw errors for enable_guc=2 (load huc; disable submission) v2: - Change commit title. - Correct the shifts. (Daniele) Credits to: Daniele Ceraolo Spurio Cc: John Spotswood Cc: Oscar Mateo Cc: Daniele Ceraolo Spurio Signed-off-by: Anusha Srivatsa Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: John Spotswood Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1529691543-28606-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 1aff30b0870c..f651e57b4c61 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -206,7 +206,11 @@ void intel_guc_fini(struct intel_guc *guc) static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); - u32 flags = 0; + u32 flags; + u32 ads; + + ads = intel_guc_ggtt_offset(guc, guc->ads_vma) >> PAGE_SHIFT; + flags = ads << GUC_ADS_ADDR_SHIFT | GUC_ADS_ENABLED; if (!GUC_LOG_LEVEL_IS_ENABLED(level)) flags |= GUC_LOG_DEFAULT_DISABLED; @@ -217,13 +221,6 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc) flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << GUC_LOG_VERBOSITY_SHIFT; - if (USES_GUC_SUBMISSION(guc_to_i915(guc))) { - u32 ads = intel_guc_ggtt_offset(guc, guc->ads_vma) - >> PAGE_SHIFT; - - flags |= ads << GUC_ADS_ADDR_SHIFT | GUC_ADS_ENABLED; - } - return flags; } From 8d52e447807b350b98ffb4e64bc2fcc1f181c5be Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 23 Jun 2018 11:39:51 +0100 Subject: [PATCH 009/114] drm/i915: Defer modeset cleanup to a secondary task If we avoid cleaning up the old state immediately in intel_atomic_commit_tail() and defer it to a second task, we can avoid taking heavily contended locks when the caller is ready to procede. Subsequent modesets will wait for the cleanup operation (either directly via the ordered modeset wq or indirectly through the atomic helperr) which keeps the number of inflight cleanup tasks in check. As an example, during reset an immediate modeset is performed to disable the displays before the HW is reset, which must avoid struct_mutex to avoid recursion. Moving the cleanup to a separate task, defers acquiring the struct_mutex to after the GPU is running again, allowing it to complete. Even in a few patches time (optimist!) when we no longer require struct_mutex to unpin the framebuffers, it will still be good practice to minimise the number of contention points along reset. The mutex dependency still exists (as one modeset flushes the other), but in the short term it resolves the deadlock for simple reset cases. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101600 Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180623103951.23889-1-chris@chris-wilson.co.uk Acked-by: Maarten Lankhorst Acked-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 30 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4db576c3e364..1469a56f6c46 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12555,6 +12555,19 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat finish_wait(&dev_priv->gpu_error.wait_queue, &wait_reset); } +static void intel_atomic_cleanup_work(struct work_struct *work) +{ + struct drm_atomic_state *state = + container_of(work, struct drm_atomic_state, commit_work); + struct drm_i915_private *i915 = to_i915(state->dev); + + drm_atomic_helper_cleanup_planes(&i915->drm, state); + drm_atomic_helper_commit_cleanup_done(state); + drm_atomic_state_put(state); + + intel_atomic_helper_free_state(i915); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -12715,13 +12728,16 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET); } - drm_atomic_helper_cleanup_planes(dev, state); - - drm_atomic_helper_commit_cleanup_done(state); - - drm_atomic_state_put(state); - - intel_atomic_helper_free_state(dev_priv); + /* + * Defer the cleanup of the old state to a separate worker to not + * impede the current task (userspace for blocking modesets) that + * are executed inline. For out-of-line asynchronous modesets/flips, + * deferring to a new worker seems overkill, but we would place a + * schedule point (cond_resched()) here anyway to keep latencies + * down. + */ + INIT_WORK(&state->commit_work, intel_atomic_cleanup_work); + schedule_work(&state->commit_work); } static void intel_atomic_commit_work(struct work_struct *work) From dd12c6ca5b45fd54e80c1424b11a5a25ad913dbb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Jun 2018 11:06:03 +0100 Subject: [PATCH 010/114] drm/i915/execlists: Check for ce->state before destroy As we may cancel the ce->state allocation during context pinning (but crucially after we mark ce as operational), that means we may be asked to destroy a nonexistent ce->state. Given the choice in handing a complex error path on pinning, and just ignoring the lack of state in destroy, choice the latter for simplicity. Reported-by: Zhao Yakui Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180625100604.22598-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 10deebe49543..8f020537a34b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1338,9 +1338,11 @@ static void execlists_schedule(struct i915_request *request, static void execlists_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(!ce->state); GEM_BUG_ON(ce->pin_count); + if (!ce->state) + return; + intel_ring_free(ce->ring); __i915_gem_object_release_unless_active(ce->state->obj); } From efe79d48a7debf57ad156a43a9215f8b0c9210d4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Jun 2018 11:06:04 +0100 Subject: [PATCH 011/114] drm/i915: Context objects can never be active when freed Due to how we only release the pining on the context state on retirement and never track activity on the context vma itself, the object can never be active at the point of release. Replace the conditional transfer of ownership onto an active-reference with an assert that the object is idle. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180625100604.22598-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f020537a34b..46572976c942 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1344,7 +1344,9 @@ static void execlists_context_destroy(struct intel_context *ce) return; intel_ring_free(ce->ring); - __i915_gem_object_release_unless_active(ce->state->obj); + + GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); + i915_gem_object_put(ce->state->obj); } static void execlists_context_unpin(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4dae23885006..d248742b1473 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1167,8 +1167,11 @@ static void intel_ring_context_destroy(struct intel_context *ce) { GEM_BUG_ON(ce->pin_count); - if (ce->state) - __i915_gem_object_release_unless_active(ce->state->obj); + if (!ce->state) + return; + + GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); + i915_gem_object_put(ce->state->obj); } static int __context_pin_ppgtt(struct i915_gem_context *ctx) From 525280552b21722d2dfb48a7020dcd56f9e2023c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 21 Jun 2018 21:44:49 +0300 Subject: [PATCH 012/114] drm/i915/ddi: Get AUX power domain for DP main link too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we got an AUX power domain reference only for the duration of DP AUX transfers. However, the following suggests that we also need these for main link functionality: - The specification doesn't state whether it's needed or not for main link functionality, but suggests that these power wells need to be enabled already during display core initialization (Sequences to Initialize Display). - For PSR we need to keep the AUX power well enabled. - On ICL combo PHY ports (non-TC) the AUX power well is needed for link training too: while the port is enabled with a DP link training test pattern trying to toggle the AUX power well will time out. - On ICL MG PHY ports (TC) the AUX power well is needed also for main link functionality (both in DP and HDMI modes). - Windows enables these power wells both for main and AUX lane functionality. Based on the above take an AUX power reference for main link functionality too. This makes a difference only on GEN10+ (GLK+) platforms, where we have separate port specific AUX power wells. For PSR we still need to distinguish between port A and the other ports, since on port A DC states must stay enabled for main link functionality, but DC states must be disabled for driver initiated AUX transfers. So re-use the corresponding helper from intel_psr.c. Since we take now a reference for main link functionality on all DP ports we can forgo taking the separate power ref for PSR functionality. v2: - Make sure DC states stay enabled when taking the ref on port A. (Ville) v3: (Ville) - Fix comment about logic for encoders without a crtc state and add FIXME note for a simplification to avoid calling get_power_domains in such cases. - Use intel_crtc_has_dp_encoder() instead !intel_crtc_has_type(HDMI). Cc: Ville Syrjälä Cc: Dhinakaran Pandiyan Cc: Paulo Zanoni Signed-off-by: Imre Deak [Clarified code comments in intel_ddi_main_link_aux_domain() and intel_ddi_get_power_domains() (Imre)] Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20180621184449.26634-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 54 +++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_display.c | 12 +++++- drivers/gpu/drm/i915/intel_drv.h | 3 +- drivers/gpu/drm/i915/intel_psr.c | 41 ------------------- drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + 5 files changed, 64 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 044fe1fb9872..0319825b725b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1983,15 +1983,55 @@ out: return ret; } -static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) +static inline enum intel_display_power_domain +intel_ddi_main_link_aux_domain(struct intel_dp *intel_dp) +{ + /* CNL HW requires corresponding AUX IOs to be powered up for PSR with + * DC states enabled at the same time, while for driver initiated AUX + * transfers we need the same AUX IOs to be powered but with DC states + * disabled. Accordingly use the AUX power domain here which leaves DC + * states enabled. + * However, for non-A AUX ports the corresponding non-EDP transcoders + * would have already enabled power well 2 and DC_OFF. This means we can + * acquire a wider POWER_DOMAIN_AUX_{B,C,D,F} reference instead of a + * specific AUX_IO reference without powering up any extra wells. + * Note that PSR is enabled only on Port A even though this function + * returns the correct domain for other ports too. + */ + return intel_dp->aux_ch == AUX_CH_A ? POWER_DOMAIN_AUX_IO_A : + intel_dp->aux_power_domain; +} + +static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); enum pipe pipe; + u64 domains; - if (intel_ddi_get_hw_state(encoder, &pipe)) - return BIT_ULL(dig_port->ddi_io_power_domain); + if (!intel_ddi_get_hw_state(encoder, &pipe)) + return 0; - return 0; + domains = BIT_ULL(dig_port->ddi_io_power_domain); + if (!crtc_state) + return domains; + + /* + * TODO: Add support for MST encoders. Atm, the following should never + * happen since this function will be called only for the primary + * encoder which doesn't have its own pipe/crtc_state. + */ + if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))) + return domains; + + /* AUX power is only needed for (e)DP mode, not for HDMI. */ + if (intel_crtc_has_dp_encoder(crtc_state)) { + struct intel_dp *intel_dp = &dig_port->dp; + + domains |= BIT_ULL(intel_ddi_main_link_aux_domain(intel_dp)); + } + + return domains; } void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) @@ -2631,6 +2671,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); + intel_display_power_get(dev_priv, + intel_ddi_main_link_aux_domain(intel_dp)); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); @@ -2775,6 +2818,9 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); + + intel_display_power_put(dev_priv, + intel_ddi_main_link_aux_domain(intel_dp)); } static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1469a56f6c46..0db23292eb35 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15661,11 +15661,21 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) for_each_intel_encoder(&dev_priv->drm, encoder) { u64 get_domains; enum intel_display_power_domain domain; + struct intel_crtc_state *crtc_state; if (!encoder->get_power_domains) continue; - get_domains = encoder->get_power_domains(encoder); + /* + * For MST and inactive encoders we don't have a crtc state. + * FIXME: no need to call get_power_domains in such cases, it + * will always return 0. + */ + crtc_state = encoder->base.crtc ? + to_intel_crtc_state(encoder->base.crtc->state) : + NULL; + + get_domains = encoder->get_power_domains(encoder, crtc_state); for_each_power_domain(domain, get_domains) intel_display_power_get(dev_priv, domain); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9e0a8cc48521..0be45b75cca2 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -254,7 +254,8 @@ struct intel_encoder { struct intel_crtc_state *pipe_config); /* Returns a mask of power domains that need to be referenced as part * of the hardware state readout code. */ - u64 (*get_power_domains)(struct intel_encoder *encoder); + u64 (*get_power_domains)(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index aea81ace854b..f7ae7e33f453 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -56,43 +56,6 @@ #include "intel_drv.h" #include "i915_drv.h" -static inline enum intel_display_power_domain -psr_aux_domain(struct intel_dp *intel_dp) -{ - /* CNL HW requires corresponding AUX IOs to be powered up for PSR. - * However, for non-A AUX ports the corresponding non-EDP transcoders - * would have already enabled power well 2 and DC_OFF. This means we can - * acquire a wider POWER_DOMAIN_AUX_{B,C,D,F} reference instead of a - * specific AUX_IO reference without powering up any extra wells. - * Note that PSR is enabled only on Port A even though this function - * returns the correct domain for other ports too. - */ - return intel_dp->aux_ch == AUX_CH_A ? POWER_DOMAIN_AUX_IO_A : - intel_dp->aux_power_domain; -} - -static void psr_aux_io_power_get(struct intel_dp *intel_dp) -{ - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - - if (INTEL_GEN(dev_priv) < 10) - return; - - intel_display_power_get(dev_priv, psr_aux_domain(intel_dp)); -} - -static void psr_aux_io_power_put(struct intel_dp *intel_dp) -{ - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - - if (INTEL_GEN(dev_priv) < 10) - return; - - intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); -} - void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug) { u32 debug_mask, mask; @@ -595,8 +558,6 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - psr_aux_io_power_get(intel_dp); - /* Only HSW and BDW have PSR AUX registers that need to be setup. SKL+ * use hardcoded values PSR AUX transactions */ @@ -717,8 +678,6 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); } - - psr_aux_io_power_put(intel_dp); } /** diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index de3a81034f77..2969787201ef 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1824,6 +1824,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_B_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ From cf5d862db2e301b8e487f42f99c5cf6f5228ddae Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 25 Jun 2018 22:25:36 -0700 Subject: [PATCH 013/114] drm/i915/psr: Kill useless function pointers. At some point we introduced the function pointers on PSR code to help with VLV/CHV separation logic because it had a different HW implementation from PSR. Since all converged to HSW PSR and we dropped the VLV/CHV support, let's also kill the useless function pointers and leave the code cleaner. Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180626052536.15137-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 8 ----- drivers/gpu/drm/i915/intel_psr.c | 55 +++++++++++--------------------- 2 files changed, 18 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6f08ab310118..2b684f431c60 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -631,14 +631,6 @@ struct i915_psr { bool debug; ktime_t last_entry_attempt; ktime_t last_exit; - - void (*enable_source)(struct intel_dp *, - const struct intel_crtc_state *); - void (*disable_source)(struct intel_dp *, - const struct intel_crtc_state *); - void (*enable_sink)(struct intel_dp *); - void (*activate)(struct intel_dp *); - void (*setup_vsc)(struct intel_dp *, const struct intel_crtc_state *); }; enum intel_pch { diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index f7ae7e33f453..ccaee4a748f2 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -241,8 +241,8 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) } } -static void hsw_psr_setup_vsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +static void intel_psr_setup_vsc(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); @@ -307,7 +307,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl); } -static void hsw_psr_enable_sink(struct intel_dp *intel_dp) +static void intel_psr_enable_sink(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = dig_port->base.base.dev; @@ -419,24 +419,6 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) I915_WRITE(EDP_PSR2_CTL, val); } -static void hsw_psr_activate(struct intel_dp *intel_dp) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - /* On HSW+ after we enable PSR on source it will activate it - * as soon as it match configure idle_frame count. So - * we just actually enable it here on activation time. - */ - - /* psr1 and psr2 are mutually exclusive.*/ - if (dev_priv->psr.psr2_enabled) - hsw_activate_psr2(intel_dp); - else - hsw_activate_psr1(intel_dp); -} - static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -546,12 +528,17 @@ static void intel_psr_activate(struct intel_dp *intel_dp) WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); - dev_priv->psr.activate(intel_dp); + /* psr1 and psr2 are mutually exclusive.*/ + if (dev_priv->psr.psr2_enabled) + hsw_activate_psr2(intel_dp); + else + hsw_activate_psr1(intel_dp); + dev_priv->psr.active = true; } -static void hsw_psr_enable_source(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +static void intel_psr_enable_source(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = dig_port->base.base.dev; @@ -627,9 +614,9 @@ void intel_psr_enable(struct intel_dp *intel_dp, dev_priv->psr.psr2_enabled = crtc_state->has_psr2; dev_priv->psr.busy_frontbuffer_bits = 0; - dev_priv->psr.setup_vsc(intel_dp, crtc_state); - dev_priv->psr.enable_sink(intel_dp); - dev_priv->psr.enable_source(intel_dp, crtc_state); + intel_psr_setup_vsc(intel_dp, crtc_state); + intel_psr_enable_sink(intel_dp); + intel_psr_enable_source(intel_dp, crtc_state); dev_priv->psr.enabled = intel_dp; intel_psr_activate(intel_dp); @@ -638,8 +625,9 @@ unlock: mutex_unlock(&dev_priv->psr.lock); } -static void hsw_psr_disable(struct intel_dp *intel_dp, - const struct intel_crtc_state *old_crtc_state) +static void +intel_psr_disable_source(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; @@ -706,7 +694,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, return; } - dev_priv->psr.disable_source(intel_dp, old_crtc_state); + intel_psr_disable_source(intel_dp, old_crtc_state); /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); @@ -945,11 +933,4 @@ void intel_psr_init(struct drm_i915_private *dev_priv) INIT_WORK(&dev_priv->psr.work, intel_psr_work); mutex_init(&dev_priv->psr.lock); - - dev_priv->psr.enable_source = hsw_psr_enable_source; - dev_priv->psr.disable_source = hsw_psr_disable; - dev_priv->psr.enable_sink = hsw_psr_enable_sink; - dev_priv->psr.activate = hsw_psr_activate; - dev_priv->psr.setup_vsc = hsw_psr_setup_vsc; - } From c12e0643a05d978657877630d4da1ace06ea3720 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Sun, 24 Jun 2018 22:47:40 -0700 Subject: [PATCH 014/114] drm/i915/psr: Fix race in intel_psr_work() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5422b37c907e ("drm/i915/psr: Kill delays when activating psr back.") switched from delayed work to the plain variant and while doing so removed the check for work_busy() before scheduling a PSR activation. This appears to cause consecutive executions of psr_activate() in this scenario - after a worker picks up the PSR work item for execution and before the work function can acquire the PSR mutex, a psr_flush() can get hold of the mutex and schedule another PSR work. Without a psr_exit() between the two psr_activate() calls, warning messages get printed. Further, since we drop the mutex in the midst of psr_work() to wait for PSR to idle, another work item can also get scheduled. Fix this by returning if PSR was already active. Fixes: 5422b37c907e ("drm/i915/psr: Kill delays when activating psr back.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106948 Cc: Rodrigo Vivi Cc: Chris Wilson Cc: José Roberto de Souza Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Chris Wilson Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180625054741.3919-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index ccaee4a748f2..276070d597e1 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -758,7 +758,7 @@ static void intel_psr_work(struct work_struct *work) * recheck. Since psr_flush first clears this and then reschedules we * won't ever miss a flush when bailing out here. */ - if (dev_priv->psr.busy_frontbuffer_bits) + if (dev_priv->psr.busy_frontbuffer_bits || dev_priv->psr.active) goto unlock; intel_psr_activate(dev_priv->psr.enabled); From bcc233b2aa7878bdcfbad6505ac66383a82a73dd Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 26 Jun 2018 02:05:22 -0700 Subject: [PATCH 015/114] drm/i915/psr: Warn for erroneous enabling of both PSR1 and PSR2. Depending whether PSR1 or PSR2 was configured, we print a warning if the corresponding control mmio indicated PSR was erroneously enabled. As Chris pointed out, it makes more sense to check for both the mmio's since we expect neither PSR1 nor PSR2 to be enabled when psr_activate() is called. v2: Read PSR2 control register only on supported platforms (Rodrigo) Cc: Rodrigo Vivi Cc: Chris Wilson Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180626090522.17682-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 276070d597e1..1b439629cb66 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -521,10 +521,9 @@ static void intel_psr_activate(struct intel_dp *intel_dp) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->psr.psr2_enabled) + if (INTEL_GEN(dev_priv) >= 9) WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - else - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); From 42f53ffcad7fcf11b5767767dd0c0ff607d99787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Jun 2018 13:16:40 -0700 Subject: [PATCH 016/114] drm/i915/psr: Remove intel_crtc_state parameter from disable_source() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was only used in VLV/CHV so after the removal of the PSR support for those platforms it is not necessary any more. v7: Rebased Reviewed-by: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180626201644.21932-1-jose.souza@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 1b439629cb66..f6d384a11b79 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -625,8 +625,7 @@ unlock: } static void -intel_psr_disable_source(struct intel_dp *intel_dp, - const struct intel_crtc_state *old_crtc_state) +intel_psr_disable_source(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; @@ -693,7 +692,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, return; } - intel_psr_disable_source(intel_dp, old_crtc_state); + intel_psr_disable_source(intel_dp); /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); From cc3054ff6214f6d14d35ffe629ff8d5032ace7f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Jun 2018 13:16:41 -0700 Subject: [PATCH 017/114] drm/i915/psr: Begin to handle PSR/PSR2 errors set by sink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eDP spec states that sink device will do a short pulse in HPD line when there is a PSR/PSR2 error that needs to be handled by source, this is handling the first and most simples error: DP_PSR_SINK_INTERNAL_ERROR. Here taking the safest approach and disabling PSR(at least until the next modeset), to avoid multiple rendering issues due to bad pannels. v5: added lockdep_assert in psr_disable and renamed psr_disable() to intel_psr_disable_locked() v4: Using CAN_PSR instead of HAS_PSR in intel_psr_short_pulse v3: disabling PSR instead of exiting on error Reviewed-by: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180626201644.21932-2-jose.souza@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 ++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_psr.c | 62 ++++++++++++++++++++++++++------ 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c1b2f00f324b..5be07e1d816d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4491,6 +4491,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) if (intel_dp_needs_link_retrain(intel_dp)) return false; + intel_psr_short_pulse(intel_dp); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); /* Send a Hotplug Uevent to userspace to start modeset */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0be45b75cca2..5a37db292d3a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1920,6 +1920,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); +void intel_psr_short_pulse(struct intel_dp *intel_dp); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index f6d384a11b79..445e97dc791d 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -666,6 +666,25 @@ intel_psr_disable_source(struct intel_dp *intel_dp) } } +static void intel_psr_disable_locked(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + + lockdep_assert_held(&dev_priv->psr.lock); + + if (!dev_priv->psr.enabled) + return; + + intel_psr_disable_source(intel_dp); + + /* Disable PSR on Sink */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); + + dev_priv->psr.enabled = NULL; +} + /** * intel_psr_disable - Disable PSR * @intel_dp: Intel DP @@ -687,17 +706,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, return; mutex_lock(&dev_priv->psr.lock); - if (!dev_priv->psr.enabled) { - mutex_unlock(&dev_priv->psr.lock); - return; - } - - intel_psr_disable_source(intel_dp); - - /* Disable PSR on Sink */ - drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); - - dev_priv->psr.enabled = NULL; + intel_psr_disable_locked(intel_dp); mutex_unlock(&dev_priv->psr.lock); cancel_work_sync(&dev_priv->psr.work); } @@ -932,3 +941,34 @@ void intel_psr_init(struct drm_i915_private *dev_priv) INIT_WORK(&dev_priv->psr.work, intel_psr_work); mutex_init(&dev_priv->psr.lock); } + +void intel_psr_short_pulse(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_psr *psr = &dev_priv->psr; + u8 val; + + if (!CAN_PSR(dev_priv) || !intel_dp_is_edp(intel_dp)) + return; + + mutex_lock(&psr->lock); + + if (psr->enabled != intel_dp) + goto exit; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_STATUS, &val) != 1) { + DRM_ERROR("PSR_STATUS dpcd read failed\n"); + goto exit; + } + + if ((val & DP_PSR_SINK_STATE_MASK) == DP_PSR_SINK_INTERNAL_ERROR) { + DRM_DEBUG_KMS("PSR sink internal error, disabling PSR\n"); + intel_psr_disable_locked(intel_dp); + } + + /* TODO: handle other PSR/PSR2 errors */ +exit: + mutex_unlock(&psr->lock); +} From 93bf76ed882d5b7c6824e95d868d608f61b4f663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Jun 2018 13:16:42 -0700 Subject: [PATCH 018/114] drm/i915/psr: Handle PSR errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sink will interrupt source when it have any PSR error. DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR is a PSR2 but already handling it here. The only missing error to be handled is DP_PSR_LINK_CRC_ERROR that will be taken in care in a futher patch. v6: not handling DP_PSR_LINK_CRC_ERROR here v5: handling all PSR errors here, so the commit message and comment have changed v3: disabling PSR instead of exiting on error Cc: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180626201644.21932-3-jose.souza@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 445e97dc791d..860b46b72403 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -949,6 +949,8 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct i915_psr *psr = &dev_priv->psr; u8 val; + const u8 errors = DP_PSR_RFB_STORAGE_ERROR | + DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR; if (!CAN_PSR(dev_priv) || !intel_dp_is_edp(intel_dp)) return; @@ -968,7 +970,25 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) intel_psr_disable_locked(intel_dp); } - /* TODO: handle other PSR/PSR2 errors */ + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_ERROR_STATUS, &val) != 1) { + DRM_ERROR("PSR_ERROR_STATUS dpcd read failed\n"); + goto exit; + } + + if (val & DP_PSR_RFB_STORAGE_ERROR) + DRM_DEBUG_KMS("PSR RFB storage error, disabling PSR\n"); + if (val & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) + DRM_DEBUG_KMS("PSR VSC SDP uncorrectable error, disabling PSR\n"); + + if (val & ~errors) + DRM_ERROR("PSR_ERROR_STATUS unhandled errors %x\n", + val & ~errors); + if (val & errors) + intel_psr_disable_locked(intel_dp); + /* clear status register */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ERROR_STATUS, val); + + /* TODO: handle PSR2 errors */ exit: mutex_unlock(&psr->lock); } From 3ebe3df50bb1db45c7bf1ce90c3d61c4eed1ba84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Jun 2018 13:16:43 -0700 Subject: [PATCH 019/114] drm/i915/psr: Avoid PSR exit max time timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specification requires that max time should be masked from bdw and forward but it can be also safely enabled to hsw. This will make PSR exits more deterministic and only when really needed. If this was used to fix a issue in some panel than can only self-refresh for a few seconds, that panel will interrupt and assert one of the PSR errors handled in: 'drm/i915/psr: Handle PSR RFB storage error' and 'drm/i915/psr: Begin to handle PSR/PSR2 errors set by sink' Spec: 21664 v4: patch moved to before 'drm/i915/psr/bdw+: Enable CRC check in the static frame on the sink side' to avoid touch in 2 patches EDP_PSR_DEBUG. Cc: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180626201644.21932-4-jose.souza@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 860b46b72403..aa98b62910b4 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -579,7 +579,8 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP | - EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE | + EDP_PSR_DEBUG_MASK_MAX_SLEEP); } } From 00c8f19463ab42d22332fc9c9fca605f12eadeb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Jun 2018 13:16:44 -0700 Subject: [PATCH 020/114] drm/i915/psr: Enable CRC check in the static frame on the sink side MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sink can be configured to calculate the CRC over the static frame and compare with the CRC calculated and transmited in the VSC SDP by source, if there is a mismatch sink will do a short pulse in HPD and set DP_PSR_LINK_CRC_ERROR in DP_PSR_ERROR_STATUS. Spec: 7723 v6: andling DP_PSR_LINK_CRC_ERROR here and remove "bdw+" from commit message v4: patch moved to after 'drm/i915/psr: Avoid PSR exit max time timeout' to avoid touch in 2 patches EDP_PSR_DEBUG. v3: disabling PSR instead of exiting on error Reviewed-by: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180626201644.21932-5-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_psr.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index caad19f5f557..43db91c19f52 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4044,6 +4044,7 @@ enum { #define EDP_PSR_SKIP_AUX_EXIT (1 << 12) #define EDP_PSR_TP1_TP2_SEL (0 << 11) #define EDP_PSR_TP1_TP3_SEL (1 << 11) +#define EDP_PSR_CRC_ENABLE (1 << 10) /* BDW+ */ #define EDP_PSR_TP2_TP3_TIME_500us (0 << 8) #define EDP_PSR_TP2_TP3_TIME_100us (1 << 8) #define EDP_PSR_TP2_TP3_TIME_2500us (2 << 8) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index aa98b62910b4..45f1cb7d6c04 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -323,6 +323,8 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp) if (dev_priv->psr.link_standby) dpcd_val |= DP_PSR_MAIN_LINK_ACTIVE; + if (!dev_priv->psr.psr2_enabled && INTEL_GEN(dev_priv) >= 8) + dpcd_val |= DP_PSR_CRC_VERIFICATION; drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val); drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0); @@ -378,6 +380,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) else val |= EDP_PSR_TP1_TP2_SEL; + if (INTEL_GEN(dev_priv) >= 8) + val |= EDP_PSR_CRC_ENABLE; + val |= I915_READ(EDP_PSR_CTL) & EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK; I915_WRITE(EDP_PSR_CTL, val); } @@ -951,7 +956,8 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) struct i915_psr *psr = &dev_priv->psr; u8 val; const u8 errors = DP_PSR_RFB_STORAGE_ERROR | - DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR; + DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR | + DP_PSR_LINK_CRC_ERROR; if (!CAN_PSR(dev_priv) || !intel_dp_is_edp(intel_dp)) return; @@ -980,6 +986,8 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_KMS("PSR RFB storage error, disabling PSR\n"); if (val & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) DRM_DEBUG_KMS("PSR VSC SDP uncorrectable error, disabling PSR\n"); + if (val & DP_PSR_LINK_CRC_ERROR) + DRM_ERROR("PSR Link CRC error, disabling PSR\n"); if (val & ~errors) DRM_ERROR("PSR_ERROR_STATUS unhandled errors %x\n", From 67ca07e7ac10d7cdc2aa7ac216cab7fb64c95e50 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 26 Jun 2018 17:22:32 +0300 Subject: [PATCH 021/114] drm/i915/icl: Add power well support Add the definition for ICL power wells and their mapping to power domains. On ICL there are 3 power well control registers, we'll select the correct one based on higher bits of the power well ID. The offset for the control and status flags within this register is based on the lower bits of the ID as on older platforms. As the DC state programming is also the same as on old platforms we can reuse the corresponding helpers. For this we mark here the DC-off power well as shared among multiple platforms. Other than the above the delta between old platforms and ICL: - Pipe C has its own power well, so we can save some additional power in the pipe A+B and (non-eDP) pipe A configurations. - Power wells for port E/F DDI/AUX IO and Thunderbolt 1-4 AUX IO v2: - Rebase on drm-tip after prep patch for this was merged there as requested by Paulo. - Actually add the new AUX and DDI power well control regs (Rakshmi) v3: - Fix power well register names in code comments - Add TBT AUX->power well 3 dependency v4: - Rebase v5: - Detach AUX power wells from the INIT power domain. These power wells can only be enabled in a TC/TBT connected state and otherwise not needed during driver initialization. v6: - Use _MMIO_PORT(...) instead _MMIO(_PICK(...)) (Paulo) Fix checkpatch warnings. Cc: Animesh Manna Cc: Rakshmi Bhatia Cc: Paulo Zanoni Signed-off-by: Imre Deak Reviewed-by: Animesh Manna (v1) Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20180626142232.22361-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 78 +++++- drivers/gpu/drm/i915/intel_display.h | 4 + drivers/gpu/drm/i915/intel_runtime_pm.c | 329 +++++++++++++++++++++++- 3 files changed, 395 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 43db91c19f52..2c20dc0db648 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1045,13 +1045,13 @@ enum i915_power_well_id { /* * HSW/BDW - * - HSW_PWR_WELL_CTL_DRIVER(0) (status bit: id*2, req bit: id*2+1) + * - _HSW_PWR_WELL_CTL1-4 (status bit: id*2, req bit: id*2+1) */ HSW_DISP_PW_GLOBAL = 15, /* * GEN9+ - * - HSW_PWR_WELL_CTL_DRIVER(0) (status bit: id*2, req bit: id*2+1) + * - _HSW_PWR_WELL_CTL1-4 (status bit: id*2, req bit: id*2+1) */ SKL_DISP_PW_MISC_IO = 0, SKL_DISP_PW_DDI_A_E, @@ -1075,17 +1075,54 @@ enum i915_power_well_id { SKL_DISP_PW_2, /* - custom power wells */ - SKL_DISP_PW_DC_OFF, BXT_DPIO_CMN_A, BXT_DPIO_CMN_BC, - GLK_DPIO_CMN_C, /* 19 */ + GLK_DPIO_CMN_C, /* 18 */ + + /* + * GEN11+ + * - _HSW_PWR_WELL_CTL1-4 + * (status bit: (id&15)*2, req bit:(id&15)*2+1) + */ + ICL_DISP_PW_1 = 0, + ICL_DISP_PW_2, + ICL_DISP_PW_3, + ICL_DISP_PW_4, + + /* + * - _HSW_PWR_WELL_CTL_AUX1/2/4 + * (status bit: (id&15)*2, req bit:(id&15)*2+1) + */ + ICL_DISP_PW_AUX_A = 16, + ICL_DISP_PW_AUX_B, + ICL_DISP_PW_AUX_C, + ICL_DISP_PW_AUX_D, + ICL_DISP_PW_AUX_E, + ICL_DISP_PW_AUX_F, + + ICL_DISP_PW_AUX_TBT1 = 24, + ICL_DISP_PW_AUX_TBT2, + ICL_DISP_PW_AUX_TBT3, + ICL_DISP_PW_AUX_TBT4, + + /* + * - _HSW_PWR_WELL_CTL_DDI1/2/4 + * (status bit: (id&15)*2, req bit:(id&15)*2+1) + */ + ICL_DISP_PW_DDI_A = 32, + ICL_DISP_PW_DDI_B, + ICL_DISP_PW_DDI_C, + ICL_DISP_PW_DDI_D, + ICL_DISP_PW_DDI_E, + ICL_DISP_PW_DDI_F, /* 37 */ /* * Multiple platforms. * Must start following the highest ID of any platform. * - custom power wells */ - I915_DISP_PW_ALWAYS_ON = 20, + SKL_DISP_PW_DC_OFF = 38, + I915_DISP_PW_ALWAYS_ON, }; #define PUNIT_REG_PWRGT_CTRL 0x60 @@ -1679,6 +1716,13 @@ enum i915_power_well_id { #define IREF1RC_OFFSET_MASK (0xFF << IREF1RC_OFFSET_SHIFT) #define BXT_PORT_CL1CM_DW10(phy) _BXT_PHY((phy), _PORT_CL1CM_DW10_BC) +#define _ICL_PORT_CL_DW12_A 0x162030 +#define _ICL_PORT_CL_DW12_B 0x6C030 +#define ICL_LANE_ENABLE_AUX (1 << 0) +#define ICL_PORT_CL_DW12(port) _MMIO_PORT((port), \ + _ICL_PORT_CL_DW12_A, \ + _ICL_PORT_CL_DW12_B) + #define _PORT_CL1CM_DW28_A 0x162070 #define _PORT_CL1CM_DW28_BC 0x6C070 #define OCL1_POWER_DOWN_EN (1 << 23) @@ -8564,6 +8608,14 @@ enum { #define _HSW_PWR_WELL_CTL3 0x45408 #define _HSW_PWR_WELL_CTL4 0x4540C +#define _ICL_PWR_WELL_CTL_AUX1 0x45440 +#define _ICL_PWR_WELL_CTL_AUX2 0x45444 +#define _ICL_PWR_WELL_CTL_AUX4 0x4544C + +#define _ICL_PWR_WELL_CTL_DDI1 0x45450 +#define _ICL_PWR_WELL_CTL_DDI2 0x45454 +#define _ICL_PWR_WELL_CTL_DDI4 0x4545C + /* * Each power well control register contains up to 16 (request, status) HW * flag tuples. The register index and HW flag shift is determined by the @@ -8573,14 +8625,20 @@ enum { */ #define _HSW_PW_REG_IDX(pw) ((pw) >> 4) #define _HSW_PW_SHIFT(pw) (((pw) & 0xf) * 2) -/* TODO: Add all PWR_WELL_CTL registers below for new platforms */ #define HSW_PWR_WELL_CTL_BIOS(pw) _MMIO(_PICK(_HSW_PW_REG_IDX(pw), \ - _HSW_PWR_WELL_CTL1)) + _HSW_PWR_WELL_CTL1, \ + _ICL_PWR_WELL_CTL_AUX1, \ + _ICL_PWR_WELL_CTL_DDI1)) #define HSW_PWR_WELL_CTL_DRIVER(pw) _MMIO(_PICK(_HSW_PW_REG_IDX(pw), \ - _HSW_PWR_WELL_CTL2)) + _HSW_PWR_WELL_CTL2, \ + _ICL_PWR_WELL_CTL_AUX2, \ + _ICL_PWR_WELL_CTL_DDI2)) +/* KVMR doesn't have a reg for AUX or DDI power well control */ #define HSW_PWR_WELL_CTL_KVMR _MMIO(_HSW_PWR_WELL_CTL3) #define HSW_PWR_WELL_CTL_DEBUG(pw) _MMIO(_PICK(_HSW_PW_REG_IDX(pw), \ - _HSW_PWR_WELL_CTL4)) + _HSW_PWR_WELL_CTL4, \ + _ICL_PWR_WELL_CTL_AUX4, \ + _ICL_PWR_WELL_CTL_DDI4)) #define HSW_PWR_WELL_CTL_REQ(pw) (1 << (_HSW_PW_SHIFT(pw) + 1)) #define HSW_PWR_WELL_CTL_STATE(pw) (1 << _HSW_PW_SHIFT(pw)) @@ -8601,6 +8659,8 @@ enum skl_power_gate { #define SKL_FUSE_DOWNLOAD_STATUS (1 << 31) /* PG0 (HW control->no power well ID), PG1..PG2 (SKL_DISP_PW1..SKL_DISP_PW2) */ #define SKL_PW_TO_PG(pw) ((pw) - SKL_DISP_PW_1 + SKL_PG1) +/* PG0 (HW control->no power well ID), PG1..PG4 (ICL_DISP_PW1..ICL_DISP_PW4) */ +#define ICL_PW_TO_PG(pw) ((pw) - ICL_DISP_PW_1 + SKL_PG1) #define SKL_FUSE_PG_DIST_STATUS(pg) (1 << (27 - (pg))) #define _CNL_AUX_REG_IDX(pw) ((pw) - 9) diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index dfb02da73ac8..a77dd29db2ec 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -199,6 +199,10 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_E, POWER_DOMAIN_AUX_F, POWER_DOMAIN_AUX_IO_A, + POWER_DOMAIN_AUX_TBT1, + POWER_DOMAIN_AUX_TBT2, + POWER_DOMAIN_AUX_TBT3, + POWER_DOMAIN_AUX_TBT4, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 2969787201ef..d81b2cfe1c5e 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -134,6 +134,14 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "AUX_F"; case POWER_DOMAIN_AUX_IO_A: return "AUX_IO_A"; + case POWER_DOMAIN_AUX_TBT1: + return "AUX_TBT1"; + case POWER_DOMAIN_AUX_TBT2: + return "AUX_TBT2"; + case POWER_DOMAIN_AUX_TBT3: + return "AUX_TBT3"; + case POWER_DOMAIN_AUX_TBT4: + return "AUX_TBT4"; case POWER_DOMAIN_GMBUS: return "GMBUS"; case POWER_DOMAIN_INIT: @@ -384,7 +392,8 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, u32 val; if (wait_fuses) { - pg = SKL_PW_TO_PG(id); + pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_TO_PG(id) : + SKL_PW_TO_PG(id); /* * For PW1 we have to wait both for the PW0/PG0 fuse state * before enabling the power well and PW1/PG1's own fuse @@ -430,6 +439,43 @@ static void hsw_power_well_disable(struct drm_i915_private *dev_priv, hsw_wait_for_power_well_disable(dev_priv, power_well); } +#define ICL_AUX_PW_TO_PORT(pw) ((pw) - ICL_DISP_PW_AUX_A) + +static void +icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum i915_power_well_id id = power_well->id; + enum port port = ICL_AUX_PW_TO_PORT(id); + u32 val; + + val = I915_READ(HSW_PWR_WELL_CTL_DRIVER(id)); + I915_WRITE(HSW_PWR_WELL_CTL_DRIVER(id), val | HSW_PWR_WELL_CTL_REQ(id)); + + val = I915_READ(ICL_PORT_CL_DW12(port)); + I915_WRITE(ICL_PORT_CL_DW12(port), val | ICL_LANE_ENABLE_AUX); + + hsw_wait_for_power_well_enable(dev_priv, power_well); +} + +static void +icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum i915_power_well_id id = power_well->id; + enum port port = ICL_AUX_PW_TO_PORT(id); + u32 val; + + val = I915_READ(ICL_PORT_CL_DW12(port)); + I915_WRITE(ICL_PORT_CL_DW12(port), val & ~ICL_LANE_ENABLE_AUX); + + val = I915_READ(HSW_PWR_WELL_CTL_DRIVER(id)); + I915_WRITE(HSW_PWR_WELL_CTL_DRIVER(id), + val & ~HSW_PWR_WELL_CTL_REQ(id)); + + hsw_wait_for_power_well_disable(dev_priv, power_well); +} + /* * We should only use the power well if we explicitly asked the hardware to * enable it, so check if it's enabled and also check if we've requested it to @@ -1897,6 +1943,105 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) +/* + * ICL PW_0/PG_0 domains (HW/DMC control): + * - PCI + * - clocks except port PLL + * - central power except FBC + * - shared functions except pipe interrupts, pipe MBUS, DBUF registers + * ICL PW_1/PG_1 domains (HW/DMC control): + * - DBUF function + * - PIPE_A and its planes, except VGA + * - transcoder EDP + PSR + * - transcoder DSI + * - DDI_A + * - FBC + */ +#define ICL_PW_4_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + /* VDSC/joining */ +#define ICL_PW_3_POWER_DOMAINS ( \ + ICL_PW_4_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_E) | \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ + BIT_ULL(POWER_DOMAIN_AUX_TBT1) | \ + BIT_ULL(POWER_DOMAIN_AUX_TBT2) | \ + BIT_ULL(POWER_DOMAIN_AUX_TBT3) | \ + BIT_ULL(POWER_DOMAIN_AUX_TBT4) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + /* + * - transcoder WD + * - KVMR (HW control) + */ +#define ICL_PW_2_POWER_DOMAINS ( \ + ICL_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_INIT)) + /* + * - eDP/DSI VDSC + * - KVMR (HW control) + */ +#define ICL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + ICL_PW_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define ICL_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define ICL_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define ICL_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) +#define ICL_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO)) +#define ICL_DDI_IO_E_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO)) +#define ICL_DDI_IO_F_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO)) + +#define ICL_AUX_A_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_A)) +#define ICL_AUX_B_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_B)) +#define ICL_AUX_C_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C)) +#define ICL_AUX_D_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D)) +#define ICL_AUX_E_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E)) +#define ICL_AUX_F_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F)) +#define ICL_AUX_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_TBT1)) +#define ICL_AUX_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_TBT2)) +#define ICL_AUX_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_TBT3)) +#define ICL_AUX_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_TBT4)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, @@ -2454,6 +2599,157 @@ static struct i915_power_well cnl_power_wells[] = { }, }; +static const struct i915_power_well_ops icl_combo_phy_aux_power_well_ops = { + .sync_hw = hsw_power_well_sync_hw, + .enable = icl_combo_phy_aux_power_well_enable, + .disable = icl_combo_phy_aux_power_well_disable, + .is_enabled = hsw_power_well_enabled, +}; + +static struct i915_power_well icl_power_wells[] = { + { + .name = "always-on", + .always_on = 1, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = I915_DISP_PW_ALWAYS_ON, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .domains = 0, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_1, + .hsw.has_fuses = true, + }, + { + .name = "power well 2", + .domains = ICL_PW_2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_2, + .hsw.has_fuses = true, + }, + { + .name = "DC off", + .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_PW_DC_OFF, + }, + { + .name = "power well 3", + .domains = ICL_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_DDI_A, + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_DDI_B, + }, + { + .name = "DDI C IO", + .domains = ICL_DDI_IO_C_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_DDI_C, + }, + { + .name = "DDI D IO", + .domains = ICL_DDI_IO_D_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_DDI_D, + }, + { + .name = "DDI E IO", + .domains = ICL_DDI_IO_E_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_DDI_E, + }, + { + .name = "DDI F IO", + .domains = ICL_DDI_IO_F_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_DDI_F, + }, + { + .name = "AUX A", + .domains = ICL_AUX_A_IO_POWER_DOMAINS, + .ops = &icl_combo_phy_aux_power_well_ops, + .id = ICL_DISP_PW_AUX_A, + }, + { + .name = "AUX B", + .domains = ICL_AUX_B_IO_POWER_DOMAINS, + .ops = &icl_combo_phy_aux_power_well_ops, + .id = ICL_DISP_PW_AUX_B, + }, + { + .name = "AUX C", + .domains = ICL_AUX_C_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_C, + }, + { + .name = "AUX D", + .domains = ICL_AUX_D_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_D, + }, + { + .name = "AUX E", + .domains = ICL_AUX_E_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_E, + }, + { + .name = "AUX F", + .domains = ICL_AUX_F_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_F, + }, + { + .name = "AUX TBT1", + .domains = ICL_AUX_TBT1_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_TBT1, + }, + { + .name = "AUX TBT2", + .domains = ICL_AUX_TBT2_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_TBT2, + }, + { + .name = "AUX TBT3", + .domains = ICL_AUX_TBT3_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_TBT3, + }, + { + .name = "AUX TBT4", + .domains = ICL_AUX_TBT4_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_AUX_TBT4, + }, + { + .name = "power well 4", + .domains = ICL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + }, +}; + static int sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, int disable_power_well) @@ -2471,7 +2767,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || INTEL_INFO(dev_priv)->gen >= 10) { max_dc = 2; mask = 0; } else if (IS_GEN9_LP(dev_priv)) { @@ -2559,7 +2855,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) * The enabling order will be from lower to higher indexed wells, * the disabling order is reversed. */ - if (IS_HASWELL(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + set_power_wells(power_domains, icl_power_wells); + } else if (IS_HASWELL(dev_priv)) { set_power_wells(power_domains, hsw_power_wells); } else if (IS_BROADWELL(dev_priv)) { set_power_wells(power_domains, bdw_power_wells); @@ -3026,6 +3324,8 @@ static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) static void icl_display_core_init(struct drm_i915_private *dev_priv, bool resume) { + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *well; enum port port; u32 val; @@ -3054,8 +3354,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, I915_WRITE(ICL_PORT_CL_DW5(port), val); } - /* 4. Enable power well 1 (PG1) and aux IO power. */ - /* FIXME: ICL power wells code not here yet. */ + /* + * 4. Enable Power Well 1 (PG1). + * The AUX IO power wells will be enabled on demand. + */ + mutex_lock(&power_domains->lock); + well = lookup_power_well(dev_priv, ICL_DISP_PW_1); + intel_power_well_enable(dev_priv, well); + mutex_unlock(&power_domains->lock); /* 5. Enable CDCLK. */ icl_init_cdclk(dev_priv); @@ -3073,6 +3379,8 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, static void icl_display_core_uninit(struct drm_i915_private *dev_priv) { + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *well; enum port port; u32 val; @@ -3086,8 +3394,15 @@ static void icl_display_core_uninit(struct drm_i915_private *dev_priv) /* 3. Disable CD clock */ icl_uninit_cdclk(dev_priv); - /* 4. Disable Power Well 1 (PG1) and Aux IO Power */ - /* FIXME: ICL power wells code not here yet. */ + /* + * 4. Disable Power Well 1 (PG1). + * The AUX IO power wells are toggled on demand, so they are already + * disabled at this point. + */ + mutex_lock(&power_domains->lock); + well = lookup_power_well(dev_priv, ICL_DISP_PW_1); + intel_power_well_disable(dev_priv, well); + mutex_unlock(&power_domains->lock); /* 5. Disable Comp */ for (port = PORT_A; port <= PORT_B; port++) { From a61b47f672c1ebd56b273a55cfed9621448d253e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 12:53:34 +0100 Subject: [PATCH 022/114] drm/i915: Wait for engines to idle before retiring In the next^W forthcoming patch, we will start to defer retiring the request from the engine list if it is still active on the submission backend. To preserve the semantics that after wait-for-idle completes the system is idle and fully retired, we need to therefore wait for the backends to idle before calling i915_retire_requests(). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180627115334.16282-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 858d188dd33b..5a9cae604e2b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3810,10 +3810,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (err) return err; } + + err = wait_for_engines(i915); + if (err) + return err; + i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); - - return wait_for_engines(i915); } else { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3824,9 +3827,9 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (err) return err; } - - return 0; } + + return 0; } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) From 3160422251b214c7a7a8ef91bb76da012bb44b57 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 26 Jun 2018 13:52:23 -0700 Subject: [PATCH 023/114] drm/i915/icp: Add Interrupt Support This patch addresses Interrupts from south display engine (SDE). ICP has two registers - SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC. Introduce these registers and their intended values. Introduce icp_irq_handler(). The icp_irq_postinstall() takes care of enabling all PCH interrupt sources, to unmask them as needed with SDEIMR, as is done done by ibx_irq_pre_postinstall() for earlier platforms. We do not need to explicitly call the ibx_irq_pre_postinstall(). Also, while changing these, s/CPT/PPT/CPT-CNP comment. v2: - remove redundant register defines.(Lucas) - Change register names to be more consistent with previous platforms (Lucas) v3: -Reorder bit defines to a more appropriate location. Change the comments. Confirm in the commit message that icp_irq_postinstall() need not go to ibx_irq_pre_postinstall() and ibx_irq_postinstall() as in earlier platforms. (Paulo) Cc: Lucas De Marchi Cc: Paulo Zanoni Cc: Dhinakaran Pandiyan Cc: Ville Syrjala Reviewed-by: Lucas De Marchi Reviewed-by: Paulo Zanoni Signed-off-by: Anusha Srivatsa [Paulo: coding style bikesheds and rebases]. Signed-off-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/1530046343-30649-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 134 +++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 41 +++++++++- 2 files changed, 172 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 46aaef5c1851..7a7c4a2bd778 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -122,6 +122,15 @@ static const u32 hpd_gen11[HPD_NUM_PINS] = { [HPD_PORT_F] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG }; +static const u32 hpd_icp[HPD_NUM_PINS] = { + [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, + [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, + [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP, + [HPD_PORT_D] = SDE_TC2_HOTPLUG_ICP, + [HPD_PORT_E] = SDE_TC3_HOTPLUG_ICP, + [HPD_PORT_F] = SDE_TC4_HOTPLUG_ICP +}; + /* IIR can theoretically queue up two events. Be paranoid. */ #define GEN8_IRQ_RESET_NDX(type, which) do { \ I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \ @@ -1586,6 +1595,34 @@ static bool bxt_port_hotplug_long_detect(enum port port, u32 val) } } +static bool icp_ddi_port_hotplug_long_detect(enum port port, u32 val) +{ + switch (port) { + case PORT_A: + return val & ICP_DDIA_HPD_LONG_DETECT; + case PORT_B: + return val & ICP_DDIB_HPD_LONG_DETECT; + default: + return false; + } +} + +static bool icp_tc_port_hotplug_long_detect(enum port port, u32 val) +{ + switch (port) { + case PORT_C: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC1); + case PORT_D: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC2); + case PORT_E: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC3); + case PORT_F: + return val & ICP_TC_HPD_LONG_DETECT(PORT_TC4); + default: + return false; + } +} + static bool spt_port_hotplug2_long_detect(enum port port, u32 val) { switch (port) { @@ -2385,6 +2422,43 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) cpt_serr_int_handler(dev_priv); } +static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) +{ + u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + u32 tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; + u32 pin_mask = 0, long_mask = 0; + + if (ddi_hotplug_trigger) { + u32 dig_hotplug_reg; + + dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_DDI); + I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg); + + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + ddi_hotplug_trigger, + dig_hotplug_reg, hpd_icp, + icp_ddi_port_hotplug_long_detect); + } + + if (tc_hotplug_trigger) { + u32 dig_hotplug_reg; + + dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_TC); + I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg); + + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + tc_hotplug_trigger, + dig_hotplug_reg, hpd_icp, + icp_tc_port_hotplug_long_detect); + } + + if (pin_mask) + intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); + + if (pch_iir & SDE_GMBUS_ICP) + gmbus_irq_handler(dev_priv); +} + static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_SPT & @@ -2804,8 +2878,11 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) I915_WRITE(SDEIIR, iir); ret = IRQ_HANDLED; - if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv) || - HAS_PCH_CNP(dev_priv)) + if (HAS_PCH_ICP(dev_priv)) + icp_irq_handler(dev_priv, iir); + else if (HAS_PCH_SPT(dev_priv) || + HAS_PCH_KBP(dev_priv) || + HAS_PCH_CNP(dev_priv)) spt_irq_handler(dev_priv, iir); else cpt_irq_handler(dev_priv, iir); @@ -3584,6 +3661,9 @@ static void gen11_irq_reset(struct drm_device *dev) GEN3_IRQ_RESET(GEN11_DE_HPD_); GEN3_IRQ_RESET(GEN11_GU_MISC_); GEN3_IRQ_RESET(GEN8_PCU_); + + if (HAS_PCH_ICP(dev_priv)) + GEN3_IRQ_RESET(SDE); } void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, @@ -3700,6 +3780,35 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) ibx_hpd_detection_setup(dev_priv); } +static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + hotplug = I915_READ(SHOTPLUG_CTL_DDI); + hotplug |= ICP_DDIA_HPD_ENABLE | + ICP_DDIB_HPD_ENABLE; + I915_WRITE(SHOTPLUG_CTL_DDI, hotplug); + + hotplug = I915_READ(SHOTPLUG_CTL_TC); + hotplug |= ICP_TC_HPD_ENABLE(PORT_TC1) | + ICP_TC_HPD_ENABLE(PORT_TC2) | + ICP_TC_HPD_ENABLE(PORT_TC3) | + ICP_TC_HPD_ENABLE(PORT_TC4); + I915_WRITE(SHOTPLUG_CTL_TC, hotplug); +} + +static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + hotplug_irqs = SDE_DDI_MASK_ICP | SDE_TC_MASK_ICP; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_icp); + + ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + + icp_hpd_detection_setup(dev_priv); +} + static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3733,6 +3842,9 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) POSTING_READ(GEN11_DE_HPD_IMR); gen11_hpd_detection_setup(dev_priv); + + if (HAS_PCH_ICP(dev_priv)) + icp_hpd_irq_setup(dev_priv); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -4168,11 +4280,29 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); } +static void icp_irq_postinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 mask = SDE_GMBUS_ICP; + + WARN_ON(I915_READ(SDEIER) != 0); + I915_WRITE(SDEIER, 0xffffffff); + POSTING_READ(SDEIER); + + gen3_assert_iir_is_zero(dev_priv, SDEIIR); + I915_WRITE(SDEIMR, ~mask); + + icp_hpd_detection_setup(dev_priv); +} + static int gen11_irq_postinstall(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 gu_misc_masked = GEN11_GU_MISC_GSE; + if (HAS_PCH_ICP(dev_priv)) + icp_irq_postinstall(dev); + gen11_gt_irq_postinstall(dev_priv); gen8_de_irq_postinstall(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2c20dc0db648..c30cfcd90754 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7462,7 +7462,7 @@ enum { #define SDE_TRANSA_FIFO_UNDER (1 << 0) #define SDE_TRANS_MASK (0x3f) -/* south display engine interrupt: CPT/PPT */ +/* south display engine interrupt: CPT - CNP */ #define SDE_AUDIO_POWER_D_CPT (1 << 31) #define SDE_AUDIO_POWER_C_CPT (1 << 30) #define SDE_AUDIO_POWER_B_CPT (1 << 29) @@ -7510,6 +7510,21 @@ enum { SDE_FDI_RXB_CPT | \ SDE_FDI_RXA_CPT) +/* south display engine interrupt: ICP */ +#define SDE_TC4_HOTPLUG_ICP (1 << 27) +#define SDE_TC3_HOTPLUG_ICP (1 << 26) +#define SDE_TC2_HOTPLUG_ICP (1 << 25) +#define SDE_TC1_HOTPLUG_ICP (1 << 24) +#define SDE_GMBUS_ICP (1 << 23) +#define SDE_DDIB_HOTPLUG_ICP (1 << 17) +#define SDE_DDIA_HOTPLUG_ICP (1 << 16) +#define SDE_DDI_MASK_ICP (SDE_DDIB_HOTPLUG_ICP | \ + SDE_DDIA_HOTPLUG_ICP) +#define SDE_TC_MASK_ICP (SDE_TC4_HOTPLUG_ICP | \ + SDE_TC3_HOTPLUG_ICP | \ + SDE_TC2_HOTPLUG_ICP | \ + SDE_TC1_HOTPLUG_ICP) + #define SDEISR _MMIO(0xc4000) #define SDEIMR _MMIO(0xc4004) #define SDEIIR _MMIO(0xc4008) @@ -7570,6 +7585,30 @@ enum { #define PORTE_HOTPLUG_SHORT_DETECT (1 << 0) #define PORTE_HOTPLUG_LONG_DETECT (2 << 0) +/* This register is a reuse of PCH_PORT_HOTPLUG register. The + * functionality covered in PCH_PORT_HOTPLUG is split into + * SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC. + */ + +#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) +#define ICP_DDIB_HPD_ENABLE (1 << 7) +#define ICP_DDIB_HPD_STATUS_MASK (3 << 4) +#define ICP_DDIB_HPD_NO_DETECT (0 << 4) +#define ICP_DDIB_HPD_SHORT_DETECT (1 << 4) +#define ICP_DDIB_HPD_LONG_DETECT (2 << 4) +#define ICP_DDIB_HPD_SHORT_LONG_DETECT (3 << 4) +#define ICP_DDIA_HPD_ENABLE (1 << 3) +#define ICP_DDIA_HPD_STATUS_MASK (3 << 0) +#define ICP_DDIA_HPD_NO_DETECT (0 << 0) +#define ICP_DDIA_HPD_SHORT_DETECT (1 << 0) +#define ICP_DDIA_HPD_LONG_DETECT (2 << 0) +#define ICP_DDIA_HPD_SHORT_LONG_DETECT (3 << 0) + +#define SHOTPLUG_CTL_TC _MMIO(0xc4034) +#define ICP_TC_HPD_ENABLE(tc_port) (8 << (tc_port) * 4) +#define ICP_TC_HPD_LONG_DETECT(tc_port) (2 << (tc_port) * 4) +#define ICP_TC_HPD_SHORT_DETECT(tc_port) (1 << (tc_port) * 4) + #define PCH_GPIOA _MMIO(0xc5010) #define PCH_GPIOB _MMIO(0xc5014) #define PCH_GPIOC _MMIO(0xc5018) From ef196b5c2f4fb7a701a8f189cdcba94f422c9009 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 13:01:50 +0200 Subject: [PATCH 024/114] drm/i915: Replace drm_connector_{un/reference} with put,get functions This patch unifies the naming of DRM functions for reference counting of struct drm_connector. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180618110154.30462-2-tdz@users.sourceforge.net --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_dp_mst.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0db23292eb35..fca72ce0b3b3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10737,7 +10737,7 @@ static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->base.state->crtc) - drm_connector_unreference(&connector->base); + drm_connector_put(&connector->base); if (connector->base.encoder) { connector->base.state->best_encoder = @@ -10745,7 +10745,7 @@ static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) connector->base.state->crtc = connector->base.encoder->crtc; - drm_connector_reference(&connector->base); + drm_connector_get(&connector->base); } else { connector->base.state->best_encoder = NULL; connector->base.state->crtc = NULL; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 5890500a3a8b..789a403e9f99 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -524,7 +524,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, intel_connector->mst_port = NULL; drm_modeset_unlock(&connector->dev->mode_config.connection_mutex); - drm_connector_unreference(connector); + drm_connector_put(connector); } static void intel_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) From 0f67706e6600cab897e1806133eea7f5000a2789 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 13:01:51 +0200 Subject: [PATCH 025/114] drm/i915: Replace drm_gem_object_{un/reference} with {put,get} functions This patch unifies the naming of DRM functions for reference counting of struct drm_gem_object. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180618110154.30462-3-tdz@users.sourceforge.net --- drivers/gpu/drm/i915/i915_gem_object.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 54f00b350779..da6e849f41a4 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -337,13 +337,10 @@ __attribute__((nonnull)) static inline struct drm_i915_gem_object * i915_gem_object_get(struct drm_i915_gem_object *obj) { - drm_gem_object_reference(&obj->base); + drm_gem_object_get(&obj->base); return obj; } -__deprecated -extern void drm_gem_object_reference(struct drm_gem_object *); - __attribute__((nonnull)) static inline void i915_gem_object_put(struct drm_i915_gem_object *obj) @@ -351,9 +348,6 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __drm_gem_object_unreference(&obj->base); } -__deprecated -extern void drm_gem_object_unreference(struct drm_gem_object *); - __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); From 55f95c2723daedb44b0552c6ab10401e0bd32a22 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 13:01:52 +0200 Subject: [PATCH 026/114] drm/i915: Replace __drm_gem_object_unreference with __drm_gem_object_put This patch unifies the naming of DRM functions for reference counting of struct drm_gem_object. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180618110154.30462-4-tdz@users.sourceforge.net --- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index da6e849f41a4..0042496216fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -345,7 +345,7 @@ __attribute__((nonnull)) static inline void i915_gem_object_put(struct drm_i915_gem_object *obj) { - __drm_gem_object_unreference(&obj->base); + __drm_gem_object_put(&obj->base); } __deprecated From 01159b47a400fa8ba7026dd647d52e89ef7322bd Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 13:01:53 +0200 Subject: [PATCH 027/114] drm/i915: Replace drm_gem_object_unreference_unlocked with put function This patch unifies the naming of DRM functions for reference counting of struct drm_gem_object. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180618110154.30462-5-tdz@users.sourceforge.net --- drivers/gpu/drm/i915/i915_gem_object.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 0042496216fe..c3c6f2e588fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -348,9 +348,6 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __drm_gem_object_put(&obj->base); } -__deprecated -extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); - static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) { reservation_object_lock(obj->resv, NULL); From a24362ead99ab5d39b594a7b4ff48abc9addc059 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 13:01:54 +0200 Subject: [PATCH 028/114] drm/i915: Replace drm_dev_unref with drm_dev_put This patch unifies the naming of DRM functions for reference counting of struct drm_device. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180618110154.30462-6-tdz@users.sourceforge.net --- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index fbe4324116d7..b5e87fcdcdae 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1724,7 +1724,7 @@ out_unlock: i915_modparams.enable_ppgtt = saved_ppgtt; - drm_dev_unref(&dev_priv->drm); + drm_dev_put(&dev_priv->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 90c3c36173ba..c0c26fae45c7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -586,7 +586,7 @@ int i915_gem_context_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c index 89dc25a5a53b..a7055b12e53c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -389,7 +389,7 @@ int i915_gem_dmabuf_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 2dc72a984d45..8059268800fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -490,7 +490,7 @@ int i915_gem_evict_mock_selftests(void) err = i915_subtests(tests, i915); mutex_unlock(&i915->drm.struct_mutex); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index a4060238bef0..a28ee0cc6a63 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1644,7 +1644,7 @@ int i915_gem_gtt_mock_selftests(void) err = i915_subtests(tests, i915); mutex_unlock(&i915->drm.struct_mutex); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 2b2dde94526f..549707b9d738 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -586,7 +586,7 @@ int i915_gem_object_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 63cd9486cc13..521ae4a90ddf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -262,7 +262,7 @@ int i915_request_mock_selftests(void) return -ENOMEM; err = i915_subtests(tests, i915); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 8400a8cc5cf2..ffa74290e054 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -733,7 +733,7 @@ int i915_vma_mock_selftests(void) err = i915_subtests(tests, i915); mutex_unlock(&i915->drm.struct_mutex); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index d6926e7820e5..f03b407fdbe2 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -464,7 +464,7 @@ int intel_breadcrumbs_mock_selftests(void) return -ENOMEM; err = i915_subtests(tests, i915->engine[RCS]); - drm_dev_unref(&i915->drm); + drm_dev_put(&i915->drm); return err; } From 10195b1e4411191a30456e3e60bc539531b17053 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 14:22:06 +0100 Subject: [PATCH 029/114] drm/i915: Show vma allocator stack when in doubt At the moment, gem_exec_gttfill fails with a sporadic EBUSY due to us wanting to unbind a pinned batch. Let's dump who first bound that vma to see if that helps us identify who still unexpectedly has it pinned. v2: We cannot allocate inside the printer (as it may be on an fs-reclaim path), so hope for the best and build the string on the stack v3: stack depth of 16 routinely overflows a 512 character string, limit it to 12 to avoid unsightly truncation. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180628132206.8329-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 39 +++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e82aa804cdba..d0e606e9b27a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -21,7 +21,7 @@ * IN THE SOFTWARE. * */ - + #include "i915_vma.h" #include "i915_drv.h" @@ -30,6 +30,39 @@ #include +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) + +#include + +static void vma_print_allocator(struct i915_vma *vma, const char *reason) +{ + unsigned long entries[12]; + struct stack_trace trace = { + .entries = entries, + .max_entries = ARRAY_SIZE(entries), + }; + char buf[512]; + + if (!vma->node.stack) { + DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: unknown owner\n", + vma->node.start, vma->node.size, reason); + return; + } + + depot_fetch_stack(vma->node.stack, &trace); + snprint_stack_trace(buf, sizeof(buf), &trace, 0); + DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n", + vma->node.start, vma->node.size, reason, buf); +} + +#else + +static void vma_print_allocator(struct i915_vma *vma, const char *reason) +{ +} + +#endif + static void i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) { @@ -875,8 +908,10 @@ int i915_vma_unbind(struct i915_vma *vma) } GEM_BUG_ON(i915_vma_is_active(vma)); - if (i915_vma_is_pinned(vma)) + if (i915_vma_is_pinned(vma)) { + vma_print_allocator(vma, "is pinned"); return -EBUSY; + } if (!drm_mm_node_allocated(&vma->node)) return 0; From 3f88325c2e396658a17632a19b59958750d7ad89 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 21:13:01 +0100 Subject: [PATCH 030/114] drm/i915: Reduce spinlock hold time during notify_ring() interrupt By taking advantage of the RCU protection of the task struct, we can find the appropriate signaler under the spinlock and then release the spinlock before waking the task and signaling the fence. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180627201304.15817-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7a7c4a2bd778..8893e795f672 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1154,21 +1154,23 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + const u32 seqno = intel_engine_get_seqno(engine); struct i915_request *rq = NULL; + struct task_struct *tsk = NULL; struct intel_wait *wait; - if (!engine->breadcrumbs.irq_armed) + if (unlikely(!engine->breadcrumbs.irq_armed)) return; atomic_inc(&engine->irq_count); - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + rcu_read_lock(); spin_lock(&engine->breadcrumbs.irq_lock); wait = engine->breadcrumbs.irq_wait; if (wait) { - bool wakeup = engine->irq_seqno_barrier; - - /* We use a callback from the dma-fence to submit + /* + * We use a callback from the dma-fence to submit * requests after waiting on our own requests. To * ensure minimum delay in queuing the next request to * hardware, signal the fence now rather than wait for @@ -1179,19 +1181,22 @@ static void notify_ring(struct intel_engine_cs *engine) * and to handle coalescing of multiple seqno updates * and many waiters. */ - if (i915_seqno_passed(intel_engine_get_seqno(engine), - wait->seqno)) { + if (i915_seqno_passed(seqno, wait->seqno)) { struct i915_request *waiter = wait->request; - wakeup = true; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) rq = i915_request_get(waiter); - } - if (wakeup) - wake_up_process(wait->tsk); + tsk = wait->tsk; + } else { + if (engine->irq_seqno_barrier) { + set_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted); + tsk = wait->tsk; + } + } } else { if (engine->breadcrumbs.irq_armed) __intel_engine_disarm_breadcrumbs(engine); @@ -1204,6 +1209,11 @@ static void notify_ring(struct intel_engine_cs *engine) i915_request_put(rq); } + if (tsk && tsk->state & TASK_NORMAL) + wake_up_process(tsk); + + rcu_read_unlock(); + trace_intel_engine_notify(engine, wait); } From 69dc4d003e3b7c262fd5fea3f25852e853eb6d4f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 21:13:02 +0100 Subject: [PATCH 031/114] drm/i915: Only trigger missed-seqno checking next to boundary If we have more interrupts pending (because we know there are more breadcrumb signals before the completion), then we do not need to trigger an irq_seqno_barrier or even wakeup the task on this interrupt as there will be another. To allow some margin of error (we are trying to work around incoherent seqno after all), we wakeup the breadcrumb before the target as well as on the target. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180627201304.15817-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8893e795f672..4f137eeaf395 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1191,7 +1191,8 @@ static void notify_ring(struct intel_engine_cs *engine) tsk = wait->tsk; } else { - if (engine->irq_seqno_barrier) { + if (engine->irq_seqno_barrier && + i915_seqno_passed(seqno, wait->seqno - 1)) { set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); tsk = wait->tsk; From 78796877c37cb2c3898c4bcd2a12238d83858287 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 21:13:03 +0100 Subject: [PATCH 032/114] drm/i915: Move the irq_counter inside the spinlock Rather than have multiple locked instructions inside the notify_ring() irq handler, move them inside the spinlock and reduce their intrinsic locking. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180627201304.15817-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 4 ++-- drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 +++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4f137eeaf395..c81b4c1877cc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1162,8 +1162,6 @@ static void notify_ring(struct intel_engine_cs *engine) if (unlikely(!engine->breadcrumbs.irq_armed)) return; - atomic_inc(&engine->irq_count); - rcu_read_lock(); spin_lock(&engine->breadcrumbs.irq_lock); @@ -1198,6 +1196,8 @@ static void notify_ring(struct intel_engine_cs *engine) tsk = wait->tsk; } } + + engine->breadcrumbs.irq_count++; } else { if (engine->breadcrumbs.irq_armed) __intel_engine_disarm_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e1dbb544046f..39b296878ba2 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1196,7 +1196,7 @@ static bool __i915_spin_request(const struct i915_request *rq, * takes to sleep on a request, on the order of a microsecond. */ - irq = atomic_read(&engine->irq_count); + irq = READ_ONCE(engine->breadcrumbs.irq_count); timeout_us += local_clock_us(&cpu); do { if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) @@ -1208,7 +1208,7 @@ static bool __i915_spin_request(const struct i915_request *rq, * assume we won't see one in the near future but require * the engine->seqno_barrier() to fixup coherency. */ - if (atomic_read(&engine->irq_count) != irq) + if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) break; if (signal_pending_state(state, current)) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 86a987b8ac66..1db6ba7d926e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) struct intel_engine_cs *engine = from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned int irq_count; if (!b->irq_armed) return; - if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { - b->hangcheck_interrupts = atomic_read(&engine->irq_count); + irq_count = READ_ONCE(b->irq_count); + if (b->hangcheck_interrupts != irq_count) { + b->hangcheck_interrupts = irq_count; mod_timer(&b->hangcheck, wait_timeout()); return; } @@ -272,13 +274,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) return false; - /* Only start with the heavy weight fake irq timer if we have not + /* + * Only start with the heavy weight fake irq timer if we have not * seen any interrupts since enabling it the first time. If the * interrupts are still arriving, it means we made a mistake in our * engine->seqno_barrier(), a timing error that should be transient * and unlikely to reoccur. */ - return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; + return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; } static void enable_fake_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a0bc7a8222b4..44ac90ec540c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -345,7 +345,6 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; void *pinned_default_state; - atomic_t irq_count; unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 #define ENGINE_IRQ_EXECLIST 1 @@ -380,6 +379,7 @@ struct intel_engine_cs { unsigned int hangcheck_interrupts; unsigned int irq_enabled; + unsigned int irq_count; bool irq_armed : 1; I915_SELFTEST_DECLARE(bool mock : 1); From e3be4079ea91c8b7bcb97cf44889ec5663c55fb4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 21:13:04 +0100 Subject: [PATCH 033/114] drm/i915: Only signal from interrupt when requested Avoid calling dma_fence_signal() from inside the interrupt if we haven't enabled signaling on the request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180627201304.15817-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++-- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c81b4c1877cc..4be56aec99b3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1182,7 +1182,8 @@ static void notify_ring(struct intel_engine_cs *engine) if (i915_seqno_passed(seqno, wait->seqno)) { struct i915_request *waiter = wait->request; - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + if (waiter && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) rq = i915_request_get(waiter); @@ -1205,8 +1206,11 @@ static void notify_ring(struct intel_engine_cs *engine) spin_unlock(&engine->breadcrumbs.irq_lock); if (rq) { - dma_fence_signal(&rq->fence); + spin_lock(&rq->lock); + dma_fence_signal_locked(&rq->fence); GEM_BUG_ON(!i915_request_completed(rq)); + spin_unlock(&rq->lock); + i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 39b296878ba2..a2f7e9358450 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1285,7 +1285,7 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait, rq); + intel_wait_init(&wait); restart: do { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 44ac90ec540c..78f01a35823a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -928,11 +928,10 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, - struct i915_request *rq) +static inline void intel_wait_init(struct intel_wait *wait) { wait->tsk = current; - wait->request = rq; + wait->request = NULL; } static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) From c39d2e7e35f6f357e59c7313dab8e90f7d1ff93e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 28 Jun 2018 14:15:20 +0000 Subject: [PATCH 034/114] drm/i915/guc: Use intel_guc_init_misc to hide GuC internals We will add more init steps to misc phase and there is no need to expose them separately for use in uc_init_misc function. Signed-off-by: Michal Wajdeczko Cc: Michel Thierry Reviewed-by: Michel Thierry Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180628141522.62788-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 28 ++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_guc.h | 5 ++--- drivers/gpu/drm/i915/intel_uc.c | 6 ++---- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index f651e57b4c61..0b06f27d3e1a 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -27,6 +27,8 @@ #include "intel_guc_submission.h" #include "i915_drv.h" +static void guc_init_ggtt_pin_bias(struct intel_guc *guc); + static void gen8_guc_raise_irq(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -73,7 +75,7 @@ void intel_guc_init_early(struct intel_guc *guc) guc->notify = gen8_guc_raise_irq; } -int intel_guc_init_wq(struct intel_guc *guc) +static int guc_init_wq(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -124,7 +126,7 @@ int intel_guc_init_wq(struct intel_guc *guc) return 0; } -void intel_guc_fini_wq(struct intel_guc *guc) +static void guc_fini_wq(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -135,6 +137,24 @@ void intel_guc_fini_wq(struct intel_guc *guc) destroy_workqueue(guc->log.relay.flush_wq); } +int intel_guc_init_misc(struct intel_guc *guc) +{ + int ret; + + guc_init_ggtt_pin_bias(guc); + + ret = guc_init_wq(guc); + if (ret) + return ret; + + return 0; +} + +void intel_guc_fini_misc(struct intel_guc *guc) +{ + guc_fini_wq(guc); +} + static int guc_shared_data_create(struct intel_guc *guc) { struct i915_vma *vma; @@ -582,13 +602,13 @@ int intel_guc_resume(struct intel_guc *guc) */ /** - * intel_guc_init_ggtt_pin_bias() - Initialize the GuC ggtt_pin_bias value. + * guc_init_ggtt_pin_bias() - Initialize the GuC ggtt_pin_bias value. * @guc: intel_guc structure. * * This function will calculate and initialize the ggtt_pin_bias value based on * overall WOPCM size and GuC WOPCM size. */ -void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc) +static void guc_init_ggtt_pin_bias(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_i915(guc); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index f1265e122d30..4121928a495e 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -151,11 +151,10 @@ static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_init_params(struct intel_guc *guc); -void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc); -int intel_guc_init_wq(struct intel_guc *guc); -void intel_guc_fini_wq(struct intel_guc *guc); +int intel_guc_init_misc(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); +void intel_guc_fini_misc(struct intel_guc *guc); int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 94e8863bd97c..cd49b4f510f0 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -257,9 +257,7 @@ int intel_uc_init_misc(struct drm_i915_private *i915) if (!USES_GUC(i915)) return 0; - intel_guc_init_ggtt_pin_bias(guc); - - ret = intel_guc_init_wq(guc); + ret = intel_guc_init_misc(guc); if (ret) return ret; @@ -273,7 +271,7 @@ void intel_uc_fini_misc(struct drm_i915_private *i915) if (!USES_GUC(i915)) return; - intel_guc_fini_wq(guc); + intel_guc_fini_misc(guc); } int intel_uc_init(struct drm_i915_private *i915) From f7dc0157e4b5f23780559c6b71d3f7282f6c7f1b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 28 Jun 2018 14:15:21 +0000 Subject: [PATCH 035/114] drm/i915/uc: Fetch GuC/HuC firmwares from guc/huc specific init We're fetching GuC/HuC firmwares directly from uc level during init_early stage but this breaks guc/huc struct isolation and also strict SW-only initialization rule for init_early. Move fw fetching to init phase and do it separately per guc/huc struct. v2: don't forget to move wopcm_init - Michele v3: fetch in init_misc phase - Michal Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Michel Thierry Reviewed-by: Michel Thierry #2 Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180628141522.62788-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++---- drivers/gpu/drm/i915/intel_guc.c | 9 ++++++++- drivers/gpu/drm/i915/intel_huc.c | 8 ++++++++ drivers/gpu/drm/i915/intel_huc.h | 6 ++++++ drivers/gpu/drm/i915/intel_uc.c | 28 +++++++++++++++------------- 5 files changed, 42 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5a9cae604e2b..8954db6ab083 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5459,14 +5459,14 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; - ret = intel_wopcm_init(&dev_priv->wopcm); - if (ret) - return ret; - ret = intel_uc_init_misc(dev_priv); if (ret) return ret; + ret = intel_wopcm_init(&dev_priv->wopcm); + if (ret) + goto err_uc_misc; + /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs * used by the CS may be stale, despite us poking the TLB reset. If @@ -5563,6 +5563,7 @@ err_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); +err_uc_misc: intel_uc_fini_misc(dev_priv); if (ret != -EIO) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 0b06f27d3e1a..53b43bc73c01 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -139,6 +139,7 @@ static void guc_fini_wq(struct intel_guc *guc) int intel_guc_init_misc(struct intel_guc *guc) { + struct drm_i915_private *i915 = guc_to_i915(guc); int ret; guc_init_ggtt_pin_bias(guc); @@ -147,11 +148,14 @@ int intel_guc_init_misc(struct intel_guc *guc) if (ret) return ret; + intel_uc_fw_fetch(i915, &guc->fw); + return 0; } void intel_guc_fini_misc(struct intel_guc *guc) { + intel_uc_fw_fini(&guc->fw); guc_fini_wq(guc); } @@ -189,7 +193,7 @@ int intel_guc_init(struct intel_guc *guc) ret = guc_shared_data_create(guc); if (ret) - return ret; + goto err_fetch; GEM_BUG_ON(!guc->shared_data); ret = intel_guc_log_create(&guc->log); @@ -210,6 +214,8 @@ err_log: intel_guc_log_destroy(&guc->log); err_shared: guc_shared_data_destroy(guc); +err_fetch: + intel_uc_fw_fini(&guc->fw); return ret; } @@ -221,6 +227,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); + intel_uc_fw_fini(&guc->fw); } static u32 guc_ctl_debug_flags(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 291285277403..ffcad5fad6a7 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -32,6 +32,14 @@ void intel_huc_init_early(struct intel_huc *huc) intel_huc_fw_init_early(huc); } +int intel_huc_init_misc(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_i915(huc); + + intel_uc_fw_fetch(i915, &huc->fw); + return 0; +} + /** * intel_huc_auth() - Authenticate HuC uCode * @huc: intel_huc structure diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index aa854907abac..7e41d870b509 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -36,9 +36,15 @@ struct intel_huc { }; void intel_huc_init_early(struct intel_huc *huc); +int intel_huc_init_misc(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); +static inline void intel_huc_fini_misc(struct intel_huc *huc) +{ + intel_uc_fw_fini(&huc->fw); +} + static inline int intel_huc_sanitize(struct intel_huc *huc) { intel_uc_fw_sanitize(&huc->fw); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index cd49b4f510f0..7c95697e1a35 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -171,24 +171,11 @@ void intel_uc_init_early(struct drm_i915_private *i915) intel_huc_init_early(huc); sanitize_options_early(i915); - - if (USES_GUC(i915)) - intel_uc_fw_fetch(i915, &guc->fw); - - if (USES_HUC(i915)) - intel_uc_fw_fetch(i915, &huc->fw); } void intel_uc_cleanup_early(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; - struct intel_huc *huc = &i915->huc; - - if (USES_HUC(i915)) - intel_uc_fw_fini(&huc->fw); - - if (USES_GUC(i915)) - intel_uc_fw_fini(&guc->fw); guc_free_load_err_log(guc); } @@ -252,6 +239,7 @@ static void guc_disable_communication(struct intel_guc *guc) int intel_uc_init_misc(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; int ret; if (!USES_GUC(i915)) @@ -261,16 +249,30 @@ int intel_uc_init_misc(struct drm_i915_private *i915) if (ret) return ret; + if (USES_HUC(i915)) { + ret = intel_huc_init_misc(huc); + if (ret) + goto err_guc; + } + return 0; + +err_guc: + intel_guc_fini_misc(guc); + return ret; } void intel_uc_fini_misc(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; if (!USES_GUC(i915)) return; + if (USES_HUC(i915)) + intel_huc_fini_misc(huc); + intel_guc_fini_misc(guc); } From 74093f3eccac232447ef95536c6a359ee7661650 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:03 +0100 Subject: [PATCH 036/114] drm/i915: Drop posting reads to flush master interrupts We do not need to do a posting read of our uncached mmio write to re-enable the master interrupt lines after handling an interrupt, so don't. This saves us a slow UC read before we can process the interrupt, most noticeable in execlists where any stalls imposes extra latency on GPU command execution. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjala Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4be56aec99b3..02fe0d32382e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2183,7 +2183,6 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) I915_WRITE(VLV_IER, ier); I915_WRITE(VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE); - POSTING_READ(VLV_MASTER_IER); if (gt_iir) snb_gt_irq_handler(dev_priv, gt_iir); @@ -2268,7 +2267,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) I915_WRITE(VLV_IER, ier); I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); - POSTING_READ(GEN8_MASTER_IRQ); gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); @@ -2637,7 +2635,6 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) /* disable master interrupt before clearing iir */ de_ier = I915_READ(DEIER); I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); - POSTING_READ(DEIER); /* Disable south interrupts. We'll only write to SDEIIR once, so further * interrupts will will be stored on its back queue, and then we'll be @@ -2647,7 +2644,6 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) if (!HAS_PCH_NOP(dev_priv)) { sde_ier = I915_READ(SDEIER); I915_WRITE(SDEIER, 0); - POSTING_READ(SDEIER); } /* Find, clear, then process each source of interrupt */ @@ -2682,11 +2678,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) } I915_WRITE(DEIER, de_ier); - POSTING_READ(DEIER); - if (!HAS_PCH_NOP(dev_priv)) { + if (!HAS_PCH_NOP(dev_priv)) I915_WRITE(SDEIER, sde_ier); - POSTING_READ(SDEIER); - } /* IRQs are synced during runtime_suspend, we don't require a wakeref */ enable_rpm_wakeref_asserts(dev_priv); From 0b02befa828c0be67a5416e10a0244e181fde0d3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:04 +0100 Subject: [PATCH 037/114] drm/i915/execlists: Pull submit after dequeue under timeline lock In the next patch, we will begin processing the CSB from inside the submission path (underneath an irqsoff section, and even from inside interrupt handlers). This means that updating the execlists->port[] will no longer be serialised by the tasklet but needs to be locked by the engine->timeline.lock instead. Pull dequeue and submit under the same lock for protection. (An alternate future plan is to keep the in/out arrays separate for concurrent processing and reduced lock coverage.) Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 46572976c942..af94949d08c8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -567,7 +567,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT); } -static bool __execlists_dequeue(struct intel_engine_cs *engine) +static void __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -622,11 +622,11 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - return false; + return; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - return false; + return; } /* @@ -651,7 +651,7 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - return false; + return; /* * WaIdleLiteRestore:bdw,skl @@ -751,8 +751,10 @@ done: port != execlists->port ? rq_prio(last) : INT_MIN; execlists->first = rb; - if (submit) + if (submit) { port_assign(port, last); + execlists_submit_ports(engine); + } /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); @@ -761,24 +763,19 @@ done: if (last) execlists_user_begin(execlists, execlists->port); - return submit; + /* If the engine is now idle, so should be the flag; and vice versa. */ + GEM_BUG_ON(execlists_is_active(&engine->execlists, + EXECLISTS_ACTIVE_USER) == + !port_isset(engine->execlists.port)); } static void execlists_dequeue(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; unsigned long flags; - bool submit; spin_lock_irqsave(&engine->timeline.lock, flags); - submit = __execlists_dequeue(engine); + __execlists_dequeue(engine); spin_unlock_irqrestore(&engine->timeline.lock, flags); - - if (submit) - execlists_submit_ports(engine); - - GEM_BUG_ON(port_isset(execlists->port) && - !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); } void @@ -1162,11 +1159,6 @@ static void execlists_submission_tasklet(unsigned long data) if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); - - /* If the engine is now idle, so should be the flag; and vice versa. */ - GEM_BUG_ON(execlists_is_active(&engine->execlists, - EXECLISTS_ACTIVE_USER) == - !port_isset(engine->execlists.port)); } static void queue_request(struct intel_engine_cs *engine, From d8857d541c67110cdcedb330e9677efdc9c5844d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:05 +0100 Subject: [PATCH 038/114] drm/i915/execlists: Pull CSB reset under the timeline.lock In the following patch, we will process the CSB events under the timeline.lock and not serialised by the tasklet. This also means that we will need to protect access to common variables such as execlists->csb_head with the timeline.lock during reset. v2: Move sync_irq to avoid deadlocks between taking timeline.lock from our interrupt handler. v3: Kill off the synchronize_hardirq as it raises more questions than answered; now we use the timeline.lock entirely for CSB serialisation between the irq and elsewhere, we don't need to be so heavy handed with flushing v4: Treat request cancellation (wedging after failed reset) similarly Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index af94949d08c8..0cee34e39343 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -872,7 +872,6 @@ static void reset_irq(struct intel_engine_cs *engine) { /* Mark all CS interrupts as complete */ smp_store_mb(engine->execlists.active, 0); - synchronize_hardirq(engine->i915->drm.irq); clear_gtiir(engine); @@ -909,14 +908,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - local_irq_save(flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); reset_irq(engine); - spin_lock(&engine->timeline.lock); - /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); @@ -950,9 +947,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline.lock); - - local_irq_restore(flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void process_csb(struct intel_engine_cs *engine) @@ -1970,8 +1965,7 @@ static void execlists_reset(struct intel_engine_cs *engine, engine->name, request ? request->global_seqno : 0, intel_engine_get_seqno(engine)); - /* See execlists_cancel_requests() for the irq/spinlock split. */ - local_irq_save(flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* * Catch up with any missed context-switch interrupts. @@ -1986,14 +1980,12 @@ static void execlists_reset(struct intel_engine_cs *engine, reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline.lock); /* Following the reset, we need to reload the CSB read/write pointers */ engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1; - local_irq_restore(flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); /* * If the request was innocent, we leave the request in the ELSP From 8ea397fa70be357f52988eb156b324105b286607 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:06 +0100 Subject: [PATCH 039/114] drm/i915/execlists: Process one CSB update at a time In the next patch, we will process the CSB events directly from the submission path, rather than only after a CS interrupt. Hence, we will no longer have the need for a loop until the has-interrupt bit is clear, and in the meantime can remove that small optimisation. v2: Tvrtko pointed out it was safer to unconditionally kick the tasklet after each irq, when assuming that the tasklet is called for each irq. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 7 +- drivers/gpu/drm/i915/intel_lrc.c | 278 +++++++++++++++---------------- 2 files changed, 141 insertions(+), 144 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 02fe0d32382e..c95cb278a8bc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1497,9 +1497,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) bool tasklet = false; if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) - tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted); + if (READ_ONCE(engine->execlists.active)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; + } } if (iir & GT_RENDER_USER_INTERRUPT) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0cee34e39343..c8d3d41011ce 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -955,166 +955,162 @@ static void process_csb(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; struct drm_i915_private *i915 = engine->i915; + + /* The HWSP contains a (cacheable) mirror of the CSB */ + const u32 *buf = + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + unsigned int head, tail; bool fw = false; - do { - /* The HWSP contains a (cacheable) mirror of the CSB */ - const u32 *buf = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - unsigned int head, tail; + /* Clear before reading to catch new interrupts */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + smp_mb__after_atomic(); - /* Clear before reading to catch new interrupts */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - smp_mb__after_atomic(); + if (unlikely(execlists->csb_use_mmio)) { + intel_uncore_forcewake_get(i915, execlists->fw_domains); + fw = true; - if (unlikely(execlists->csb_use_mmio)) { - if (!fw) { - intel_uncore_forcewake_get(i915, execlists->fw_domains); - fw = true; - } + buf = (u32 * __force) + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); - buf = (u32 * __force) - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + tail = GEN8_CSB_WRITE_PTR(head); + head = GEN8_CSB_READ_PTR(head); + execlists->csb_head = head; + } else { + const int write_idx = + intel_hws_csb_write_index(i915) - + I915_HWS_CSB_BUF0_INDEX; - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - tail = GEN8_CSB_WRITE_PTR(head); - head = GEN8_CSB_READ_PTR(head); - execlists->csb_head = head; - } else { - const int write_idx = - intel_hws_csb_write_index(i915) - - I915_HWS_CSB_BUF0_INDEX; + head = execlists->csb_head; + tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ + } + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", + engine->name, + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); - head = execlists->csb_head; - tail = READ_ONCE(buf[write_idx]); - rmb(); /* Hopefully paired with a wmb() in HW */ + while (head != tail) { + struct i915_request *rq; + unsigned int status; + unsigned int count; + + if (++head == GEN8_CSB_ENTRIES) + head = 0; + + /* + * We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + engine->name, head, + status, buf[2*head + 1], + execlists->active); + + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | + GEN8_CTX_STATUS_PREEMPTED)) + execlists_set_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_HWACK); + + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) + continue; + + /* We should never get a COMPLETED | IDLE_ACTIVE! */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + + if (status & GEN8_CTX_STATUS_COMPLETE && + buf[2*head + 1] == execlists->preempt_complete_status) { + GEM_TRACE("%s preempt-idle\n", engine->name); + complete_preempt_context(execlists); + continue; } - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) + continue; + + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + + rq = port_unpack(port, &count); + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); + port->context_id, count, + rq ? rq->global_seqno : 0, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, + intel_engine_get_seqno(engine), + rq ? rq_prio(rq) : 0); - while (head != tail) { - struct i915_request *rq; - unsigned int status; - unsigned int count; + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - if (++head == GEN8_CSB_ENTRIES) - head = 0; + GEM_BUG_ON(count == 0); + if (--count == 0) { + /* + * On the final event corresponding to the + * submission of this context, we expect either + * an element-switch event or a completion + * event (and on completion, the active-idle + * marker). No more preemptions, lite-restore + * or otherwise. + */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); /* - * We are flying near dragons again. - * - * We hold a reference to the request in execlist_port[] - * but no more than that. We are operating in softirq - * context and so cannot hold any mutex or sleep. That - * prevents us stopping the requests we are processing - * in port[] from being retired simultaneously (the - * breadcrumb will be complete before we see the - * context-switch). As we only hold the reference to the - * request, any pointer chasing underneath the request - * is subject to a potential use-after-free. Thus we - * store all of the bookkeeping within port[] as - * required, and avoid using unguarded pointers beneath - * request itself. The same applies to the atomic - * status notifier. + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt and CSB is processed. */ + GEM_BUG_ON(!i915_request_completed(rq)); - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", - engine->name, head, - status, buf[2*head + 1], - execlists->active); + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); + i915_request_put(rq); - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); + GEM_TRACE("%s completed ctx=%d\n", + engine->name, port->context_id); - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; - - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); - - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } - - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; - - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->global_seqno : 0, - rq ? rq->fence.context : 0, - rq ? rq->fence.seqno : 0, - intel_engine_get_seqno(engine), - rq ? rq_prio(rq) : 0); - - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - - GEM_BUG_ON(count == 0); - if (--count == 0) { - /* - * On the final event corresponding to the - * submission of this context, we expect either - * an element-switch event or a completion - * event (and on completion, the active-idle - * marker). No more preemptions, lite-restore - * or otherwise. - */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - - /* - * We rely on the hardware being strongly - * ordered, that the breadcrumb write is - * coherent (visible from the CPU) before the - * user interrupt and CSB is processed. - */ - GEM_BUG_ON(!i915_request_completed(rq)); - - execlists_context_schedule_out(rq, - INTEL_CONTEXT_SCHEDULE_OUT); - i915_request_put(rq); - - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); - - port = execlists_port_complete(execlists, port); - if (port_isset(port)) - execlists_user_begin(execlists, port); - else - execlists_user_end(execlists); - } else { - port_set(port, port_pack(rq, count)); - } + port = execlists_port_complete(execlists, port); + if (port_isset(port)) + execlists_user_begin(execlists, port); + else + execlists_user_end(execlists); + } else { + port_set(port, port_pack(rq, count)); } + } - if (head != execlists->csb_head) { - execlists->csb_head = head; - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - } - } while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)); + if (head != execlists->csb_head) { + execlists->csb_head = head; + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + } if (unlikely(fw)) intel_uncore_forcewake_put(i915, execlists->fw_domains); From bc4237ec8deaaee5f75d1afa91a19bfe6f948c6f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:07 +0100 Subject: [PATCH 040/114] drm/i915/execlists: Unify CSB access pointers Following the removal of the last workarounds, the only CSB mmio access is for the old vGPU interface. The mmio registers presented by vGPU do not require forcewake and can be treated as ordinary volatile memory, i.e. they behave just like the HWSP access just at a different location. We can reduce the CSB access to a set of read/write/buffer pointers and treat the various paths identically and not worry about forcewake. (Forcewake is nightmare for worstcase latency, and we want to process this all with irqsoff -- no latency allowed!) v2: Comments, comments, comments. Well, 2 bonus comments. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 12 --- drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 23 ++-- 3 files changed, 82 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index d3264bd6e9dc..7209c22798e6 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,7 +25,6 @@ #include #include "i915_drv.h" -#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) i915_gem_batch_pool_init(&engine->batch_pool, engine); } -static bool csb_force_mmio(struct drm_i915_private *i915) -{ - /* Older GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) - return true; - - return false; -} - static void intel_engine_init_execlist(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - execlists->csb_use_mmio = csb_force_mmio(engine->i915); - execlists->port_mask = 1; BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8d3d41011ce..81f000c245ab 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -137,6 +137,7 @@ #include #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_vgpu.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_workarounds.h" @@ -954,44 +955,40 @@ static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_private *i915 = engine->i915; - - /* The HWSP contains a (cacheable) mirror of the CSB */ - const u32 *buf = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - unsigned int head, tail; - bool fw = false; + const u32 * const buf = execlists->csb_status; + u8 head, tail; /* Clear before reading to catch new interrupts */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); smp_mb__after_atomic(); - if (unlikely(execlists->csb_use_mmio)) { - intel_uncore_forcewake_get(i915, execlists->fw_domains); - fw = true; + /* + * Note that csb_write, csb_status may be either in HWSP or mmio. + * When reading from the csb_write mmio register, we have to be + * careful to only use the GEN8_CSB_WRITE_PTR portion, which is + * the low 4bits. As it happens we know the next 4bits are always + * zero and so we can simply masked off the low u8 of the register + * and treat it identically to reading from the HWSP (without having + * to use explicit shifting and masking, and probably bifurcating + * the code to handle the legacy mmio read). + */ + head = execlists->csb_head; + tail = READ_ONCE(*execlists->csb_write); + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); + if (unlikely(head == tail)) + return; - buf = (u32 * __force) - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + /* + * Hopefully paired with a wmb() in HW! + * + * We must complete the read of the write pointer before any reads + * from the CSB, so that we do not see stale values. Without an rmb + * (lfence) the HW may speculatively perform the CSB[] reads *before* + * we perform the READ_ONCE(*csb_write). + */ + rmb(); - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - tail = GEN8_CSB_WRITE_PTR(head); - head = GEN8_CSB_READ_PTR(head); - execlists->csb_head = head; - } else { - const int write_idx = - intel_hws_csb_write_index(i915) - - I915_HWS_CSB_BUF0_INDEX; - - head = execlists->csb_head; - tail = READ_ONCE(buf[write_idx]); - rmb(); /* Hopefully paired with a wmb() in HW */ - } - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", - engine->name, - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); - - while (head != tail) { + do { struct i915_request *rq; unsigned int status; unsigned int count; @@ -1017,12 +1014,12 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", engine->name, head, - status, buf[2*head + 1], + buf[2 * head + 0], buf[2 * head + 1], execlists->active); + status = buf[2 * head]; if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED)) execlists_set_active(execlists, @@ -1104,16 +1101,11 @@ static void process_csb(struct intel_engine_cs *engine) } else { port_set(port, port_pack(rq, count)); } - } + } while (head != tail); - if (head != execlists->csb_head) { - execlists->csb_head = head; - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - } - - if (unlikely(fw)) - intel_uncore_forcewake_put(i915, execlists->fw_domains); + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), + execlists->csb_read); + execlists->csb_head = head; } /* @@ -2431,28 +2423,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) static void logical_ring_setup(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - enum forcewake_domains fw_domains; - intel_engine_setup_common(engine); /* Intentionally left blank. */ engine->buffer = NULL; - fw_domains = intel_uncore_forcewake_for_reg(dev_priv, - RING_ELSP(engine), - FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_PTR(engine), - FW_REG_READ | FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_BUF_BASE(engine), - FW_REG_READ); - - engine->execlists.fw_domains = fw_domains; - tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); @@ -2460,34 +2435,56 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_irqs(engine); } +static bool csb_force_mmio(struct drm_i915_private *i915) +{ + /* Older GVT emulation depends upon intercepting CSB mmio */ + return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915); +} + static int logical_ring_init(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + struct intel_engine_execlists * const execlists = &engine->execlists; int ret; ret = intel_engine_init_common(engine); if (ret) goto error; - if (HAS_LOGICAL_RING_ELSQ(engine->i915)) { - engine->execlists.submit_reg = engine->i915->regs + + if (HAS_LOGICAL_RING_ELSQ(i915)) { + execlists->submit_reg = i915->regs + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine)); - engine->execlists.ctrl_reg = engine->i915->regs + + execlists->ctrl_reg = i915->regs + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine)); } else { - engine->execlists.submit_reg = engine->i915->regs + + execlists->submit_reg = i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); } - engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) { + execlists->preempt_complete_status = ~0u; + if (i915->preempt_context) { struct intel_context *ce = - to_intel_context(engine->i915->preempt_context, engine); + to_intel_context(i915->preempt_context, engine); - engine->execlists.preempt_complete_status = + execlists->preempt_complete_status = upper_32_bits(ce->lrc_desc); } - engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1; + execlists->csb_head = GEN8_CSB_ENTRIES - 1; + execlists->csb_read = + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); + if (csb_force_mmio(i915)) { + execlists->csb_status = (u32 __force *) + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + + execlists->csb_write = (u32 __force *)execlists->csb_read; + } else { + execlists->csb_status = + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + + execlists->csb_write = + &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; + } return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 78f01a35823a..25792889dbf4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -300,24 +300,35 @@ struct intel_engine_execlists { struct rb_node *first; /** - * @fw_domains: forcewake domains for irq tasklet + * @csb_read: control register for Context Switch buffer + * + * Note this register is always in mmio. */ - unsigned int fw_domains; + u32 __iomem *csb_read; /** - * @csb_head: context status buffer head + * @csb_write: control register for Context Switch buffer + * + * Note this register may be either mmio or HWSP shadow. */ - unsigned int csb_head; + u32 *csb_write; /** - * @csb_use_mmio: access csb through mmio, instead of hwsp + * @csb_status: status array for Context Switch buffer + * + * Note these register may be either mmio or HWSP shadow. */ - bool csb_use_mmio; + u32 *csb_status; /** * @preempt_complete_status: expected CSB upon completing preemption */ u32 preempt_complete_status; + + /** + * @csb_head: context status buffer head + */ + u8 csb_head; }; #define INTEL_ENGINE_CS_MAX_NAME 8 From f4b58f0438ba53002e2032e3817c8e847da9a9e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:08 +0100 Subject: [PATCH 041/114] drm/i915/execlists: Reset CSB write pointer after reset On HW reset, the HW clears the write pointer (to 0). But since it also writes its first CSB entry to slot 0, we need to reset the write pointer back to the element before (so the first entry we read is 0). This is required for the next patch, where we trust the CSB completely! v2: Use _MASKED_FIELD v3: Store the reset value, so that we differentiate between mmio/hwsp transparently and without pretense. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 23 +++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 81f000c245ab..88289d1d07e7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -885,6 +885,21 @@ static void reset_irq(struct intel_engine_cs *engine) clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } +static void reset_csb_pointers(struct intel_engine_execlists *execlists) +{ + /* + * After a reset, the HW starts writing into CSB entry [0]. We + * therefore have to set our HEAD pointer back one entry so that + * the *first* entry we check is entry 0. To complicate this further, + * as we don't wait for the first interrupt after reset, we have to + * fake the HW write to point back to the last entry so that our + * inline comparison of our cached head position against the last HW + * write works even before the first interrupt. + */ + execlists->csb_head = execlists->csb_write_reset; + WRITE_ONCE(*execlists->csb_write, execlists->csb_write_reset); +} + static void execlists_cancel_requests(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1971,7 +1986,7 @@ static void execlists_reset(struct intel_engine_cs *engine, __unwind_incomplete_requests(engine); /* Following the reset, we need to reload the CSB read/write pointers */ - engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1; + reset_csb_pointers(&engine->execlists); spin_unlock_irqrestore(&engine->timeline.lock, flags); @@ -2470,7 +2485,6 @@ static int logical_ring_init(struct intel_engine_cs *engine) upper_32_bits(ce->lrc_desc); } - execlists->csb_head = GEN8_CSB_ENTRIES - 1; execlists->csb_read = i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); if (csb_force_mmio(i915)) { @@ -2478,13 +2492,18 @@ static int logical_ring_init(struct intel_engine_cs *engine) (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); execlists->csb_write = (u32 __force *)execlists->csb_read; + execlists->csb_write_reset = + _MASKED_FIELD(GEN8_CSB_WRITE_PTR_MASK, + GEN8_CSB_ENTRIES - 1); } else { execlists->csb_status = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; execlists->csb_write = &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; + execlists->csb_write_reset = GEN8_CSB_ENTRIES - 1; } + reset_csb_pointers(execlists); return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 25792889dbf4..b02620990859 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -325,6 +325,15 @@ struct intel_engine_execlists { */ u32 preempt_complete_status; + /** + * @csb_write_reset: reset value for CSB write pointer + * + * As the CSB write pointer maybe either in HWSP or as a field + * inside an mmio register, we want to reprogram it slightly + * differently to avoid later confusion. + */ + u32 csb_write_reset; + /** * @csb_head: context status buffer head */ From 3800cd1953059b64593c7ca1022071b96bc40ef3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:09 +0100 Subject: [PATCH 042/114] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register As we now never read back our current head position from the CSB pointers register, and the HW itself doesn't use it to prevent overwriting unread CSB entries, we do not need to keep updating the register. As it turns out this register is not listed as being shadowed, and so requires forcewake -- but we haven't been taking forcewake around it so the writes has probably been regularly dropped. Fortuitously, we only read the value after a reset where it did not matter, and zero was the right answer (well, close enough). Mika pointed out that this was how we used to do it (accidentally!) before he fixed it in commit cc53699b25b5 ("drm/i915: Use masked write for Context Status Buffer Pointer"). References: cc53699b25b5 ("drm/i915: Use masked write for Context Status Buffer Pointer") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 88289d1d07e7..ed81f8ac60ca 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1118,8 +1118,6 @@ static void process_csb(struct intel_engine_cs *engine) } } while (head != tail); - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - execlists->csb_read); execlists->csb_head = head; } From fd8526e509020ed30298ab57d03edc97bef83962 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:10 +0100 Subject: [PATCH 043/114] drm/i915/execlists: Trust the CSB Now that we use the CSB stored in the CPU friendly HWSP, we do not need to track interrupts for when the mmio CSB registers are valid and can just check where we read up to last from the cached HWSP. This means we can forgo the atomic bit tracking from interrupt, and in the next patch it means we can check the CSB at any time. v2: Change the splitting inside reset_prepare, we only want to lose testing the interrupt in this patch, the next patch requires the change in locking Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 11 +++------- drivers/gpu/drm/i915/intel_engine_cs.c | 8 ++----- drivers/gpu/drm/i915/intel_lrc.c | 29 ++++--------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 4 files changed, 9 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c95cb278a8bc..435a4444314c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1493,15 +1493,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) { - struct intel_engine_execlists * const execlists = &engine->execlists; bool tasklet = false; - if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) { - set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } - } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) + tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { notify_ring(engine); @@ -1509,7 +1504,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } if (tasklet) - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->execlists.tasklet); } static void gen8_gt_irq_ack(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 7209c22798e6..ace93958689e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); read = GEN8_CSB_READ_PTR(ptr); write = GEN8_CSB_WRITE_PTR(ptr); - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n", + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n", read, execlists->csb_head, write, intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted)), yesno(test_bit(TASKLET_STATE_SCHED, &engine->execlists.tasklet.state)), enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_unlock(&b->rb_lock); local_irq_restore(flags); - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n", engine->irq_posted, yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))); drm_printf(m, "HWSP:\n"); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ed81f8ac60ca..d835da128a17 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -875,14 +875,6 @@ static void reset_irq(struct intel_engine_cs *engine) smp_store_mb(engine->execlists.active, 0); clear_gtiir(engine); - - /* - * The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. - */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } static void reset_csb_pointers(struct intel_engine_execlists *execlists) @@ -973,10 +965,6 @@ static void process_csb(struct intel_engine_cs *engine) const u32 * const buf = execlists->csb_status; u8 head, tail; - /* Clear before reading to catch new interrupts */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - smp_mb__after_atomic(); - /* * Note that csb_write, csb_status may be either in HWSP or mmio. * When reading from the csb_write mmio register, we have to be @@ -1129,11 +1117,10 @@ static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n", + GEM_TRACE("%s awake?=%d, active=%x\n", engine->name, engine->i915->gt.awake, - engine->execlists.active, - test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)); + engine->execlists.active); /* * We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -1145,14 +1132,7 @@ static void execlists_submission_tasklet(unsigned long data) */ GEM_BUG_ON(!engine->i915->gt.awake); - /* - * Prefer doing test_and_clear_bit() as a two stage operation to avoid - * imposing the cost of a locked atomic transaction when submitting a - * new request (outside of the context-switch interrupt). - */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - process_csb(engine); - + process_csb(engine); if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); } @@ -1920,8 +1900,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine) * and avoid blaming an innocent request if the stall was due to the * preemption itself. */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - process_csb(engine); + process_csb(engine); /* * The last active request can then be no later than the last request diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b02620990859..ce6cc2a6cf7a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -367,7 +367,6 @@ struct intel_engine_cs { unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 -#define ENGINE_IRQ_EXECLIST 1 /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the From 9512f985c32d45ab439a210981e0d73071da395a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:11 +0100 Subject: [PATCH 044/114] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half"), we came to the conclusion that running our CSB processing and ELSP submission from inside the irq handler was a bad idea. A really bad idea as we could impose nearly 1s latency on other users of the system, on average! Deferring our work to a tasklet allowed us to do the processing with irqs enabled, reducing the impact to an average of about 50us. We have since eradicated the use of forcewaked mmio from inside the CSB processing and ELSP submission, bringing the impact down to around 5us (on Kabylake); an order of magnitude better than our measurements 2 years ago on Broadwell and only about 2x worse on average than the gem_syslatency on an unladen system. In this iteration of the tasklet-vs-direct submission debate, we seek a compromise where by we submit new requests immediately to the HW but defer processing the CS interrupt onto a tasklet. We gain the advantage of low-latency and ksoftirqd avoidance when waking up the HW, while avoiding the system-wide starvation of our CS irq-storms. Comparing the impact on the maximum latency observed (that is the time stolen from an RT process) over a 120s interval, repeated several times (using gem_syslatency, similar to RT's cyclictest) while the system is fully laden with i915 nops, we see that direct submission an actually improve the worse case. Maximum latency in microseconds of a third party RT thread (gem_syslatency -t 120 -f 2) x Always using tasklets (a couple of >1000us outliers removed) + Only using tasklets from CS irq, direct submission of requests +------------------------------------------------------------------------+ | + | | + | | + | | + + | | + + + | | + + + + x x x | | +++ + + + x x x x x x | | +++ + ++ + + *x x x x x x | | +++ + ++ + * *x x * x x x | | + +++ + ++ * * +*xxx * x x xx | | * +++ + ++++* *x+**xx+ * x x xxxx x | | **x++++*++**+*x*x****x+ * +x xx xxxx x x | |x* ******+***************++*+***xxxxxx* xx*x xxx + x+| | |__________MA___________| | | |______M__A________| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 118 91 186 124 125.28814 16.279137 + 120 92 187 109 112.00833 13.458617 Difference at 95.0% confidence -13.2798 +/- 3.79219 -10.5994% +/- 3.02677% (Student's t, pooled s = 14.9237) However the mean latency is adversely affected: Mean latency in microseconds of a third party RT thread (gem_syslatency -t 120 -f 1) x Always using tasklets + Only using tasklets from CS irq, direct submission of requests +------------------------------------------------------------------------+ | xxxxxx + ++ | | xxxxxx + ++ | | xxxxxx + +++ ++ | | xxxxxxx +++++ ++ | | xxxxxxx +++++ ++ | | xxxxxxx +++++ +++ | | xxxxxxx + ++++++++++ | | xxxxxxxx ++ ++++++++++ | | xxxxxxxx ++ ++++++++++ | | xxxxxxxxxx +++++++++++++++ | | xxxxxxxxxxx x +++++++++++++++ | |x xxxxxxxxxxxxx x + + ++++++++++++++++++ +| | |__A__| | | |____A___| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 120 3.506 3.727 3.631 3.6321417 0.02773109 + 120 3.834 4.149 4.039 4.0375167 0.041221676 Difference at 95.0% confidence 0.405375 +/- 0.00888913 11.1608% +/- 0.244735% (Student's t, pooled s = 0.03513) However, since the mean latency corresponds to the amount of irqsoff processing we have to do for a CS interrupt, we only need to speed that up to benefit not just system latency but our own throughput. v2: Remember to defer submissions when under reset. v4: Only use direct submission for new requests v5: Be aware that with mixing direct tasklet evaluation and deferred tasklets, we may end up idling before running the deferred tasklet. v6: Remove the redudant likely() from tasklet_is_enabled(), restrict the annotation to reset_in_progress(). v7: Take the full timeline.lock when enabling perf_pmu stats as the tasklet is no longer a valid guard. A consequence is that the stats are now only valid for engines also using the timeline.lock to process state. Testcase: igt/gem_exec_latency/*rthog* References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half") Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.h | 5 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 8 +- drivers/gpu/drm/i915/intel_lrc.c | 102 ++++++++++++++++--------- 3 files changed, 75 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 261da577829a..e46592956872 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -88,4 +88,9 @@ static inline void __tasklet_enable_sync_once(struct tasklet_struct *t) tasklet_kill(t); } +static inline bool __tasklet_is_enabled(const struct tasklet_struct *t) +{ + return !atomic_read(&t->count); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ace93958689e..01862884a436 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1619,8 +1619,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - tasklet_disable(&execlists->tasklet); - write_seqlock_irqsave(&engine->stats.lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); + write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1644,8 +1644,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock_irqrestore(&engine->stats.lock, flags); - tasklet_enable(&execlists->tasklet); + write_sequnlock(&engine->stats.lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return err; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d835da128a17..6ab6ddb103d1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -563,12 +563,14 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); execlists_cancel_port_requests(execlists); - execlists_unwind_incomplete_requests(execlists); + __unwind_incomplete_requests(container_of(execlists, + struct intel_engine_cs, + execlists)); execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void __execlists_dequeue(struct intel_engine_cs *engine) +static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -578,9 +580,8 @@ static void __execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - lockdep_assert_held(&engine->timeline.lock); - - /* Hardware submission is through 2 ports. Conceptually each port + /* + * Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute * requests belonging to a single context from each ring. RING_HEAD @@ -770,15 +771,6 @@ done: !port_isset(engine->execlists.port)); } -static void execlists_dequeue(struct intel_engine_cs *engine) -{ - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); - __execlists_dequeue(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { @@ -958,6 +950,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline.lock, flags); } +static inline bool +reset_in_progress(const struct intel_engine_execlists *execlists) +{ + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1109,18 +1107,9 @@ static void process_csb(struct intel_engine_cs *engine) execlists->csb_head = head; } -/* - * Check the unread Context Status Buffers and manage the submission of new - * contexts to the ELSP accordingly. - */ -static void execlists_submission_tasklet(unsigned long data) +static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - engine->i915->gt.awake, - engine->execlists.active); + lockdep_assert_held(&engine->timeline.lock); /* * We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -1137,6 +1126,28 @@ static void execlists_submission_tasklet(unsigned long data) execlists_dequeue(engine); } +/* + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ +static void execlists_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + unsigned long flags; + + GEM_TRACE("%s awake?=%d, active=%x\n", + engine->name, + engine->i915->gt.awake, + engine->execlists.active); + + spin_lock_irqsave(&engine->timeline.lock, flags); + + if (engine->i915->gt.awake) /* we may be delayed until after we idle! */ + __execlists_submission_tasklet(engine); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + static void queue_request(struct intel_engine_cs *engine, struct i915_sched_node *node, int prio) @@ -1145,16 +1156,30 @@ static void queue_request(struct intel_engine_cs *engine, &lookup_priolist(engine, prio)->requests); } -static void __submit_queue(struct intel_engine_cs *engine, int prio) +static void __update_queue(struct intel_engine_cs *engine, int prio) { engine->execlists.queue_priority = prio; - tasklet_hi_schedule(&engine->execlists.tasklet); +} + +static void __submit_queue_imm(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + if (reset_in_progress(execlists)) + return; /* defer until we restart the engine following reset */ + + if (execlists->tasklet.func == execlists_submission_tasklet) + __execlists_submission_tasklet(engine); + else + tasklet_hi_schedule(&execlists->tasklet); } static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) - __submit_queue(engine, prio); + if (prio > engine->execlists.queue_priority) { + __update_queue(engine, prio); + __submit_queue_imm(engine); + } } static void execlists_submit_request(struct i915_request *request) @@ -1166,11 +1191,12 @@ static void execlists_submit_request(struct i915_request *request) spin_lock_irqsave(&engine->timeline.lock, flags); queue_request(engine, &request->sched, rq_prio(request)); - submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->sched.link)); + submit_queue(engine, rq_prio(request)); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } @@ -1297,8 +1323,11 @@ static void execlists_schedule(struct i915_request *request, } if (prio > engine->execlists.queue_priority && - i915_sw_fence_done(&sched_to_request(node)->submit)) - __submit_queue(engine, prio); + i915_sw_fence_done(&sched_to_request(node)->submit)) { + /* defer submission until after all of our updates */ + __update_queue(engine, prio); + tasklet_hi_schedule(&engine->execlists.tasklet); + } } spin_unlock_irq(&engine->timeline.lock); @@ -1879,6 +1908,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *request, *active; + unsigned long flags; GEM_TRACE("%s\n", engine->name); @@ -1893,6 +1923,8 @@ execlists_reset_prepare(struct intel_engine_cs *engine) */ __tasklet_disable_sync_once(&execlists->tasklet); + spin_lock_irqsave(&engine->timeline.lock, flags); + /* * We want to flush the pending context switches, having disabled * the tasklet above, we can assume exclusive access to the execlists. @@ -1910,15 +1942,12 @@ execlists_reset_prepare(struct intel_engine_cs *engine) active = NULL; request = port_request(execlists->port); if (request) { - unsigned long flags; - /* * Prevent the breadcrumb from advancing before we decide * which request is currently active. */ intel_engine_stop_cs(engine); - spin_lock_irqsave(&engine->timeline.lock, flags); list_for_each_entry_from_reverse(request, &engine->timeline.requests, link) { @@ -1928,9 +1957,10 @@ execlists_reset_prepare(struct intel_engine_cs *engine) active = request; } - spin_unlock_irqrestore(&engine->timeline.lock, flags); } + spin_unlock_irqrestore(&engine->timeline.lock, flags); + return active; } From c9855a561afa5b21db7f2218f0b7baaa555fa90c Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 25 Jun 2018 18:37:57 +0200 Subject: [PATCH 045/114] drm/i915: Block enabling FBC until flips have been completed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a small race window in which FBC can be enabled after pre_plane_update is called, but before the page flip has been queued or completed. Signed-off-by: Maarten Lankhorst Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103167 Link: https://patchwork.freedesktop.org/patch/msgid/20180625163758.10871-1-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_fbc.c | 35 ++++++++++---------------------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2b684f431c60..615ed807ea32 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -512,6 +512,7 @@ struct intel_fbc { bool enabled; bool active; + bool flip_pending; bool underrun_detected; struct work_struct underrun_work; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index b431b6733cc1..9f9ea0b5452f 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -924,13 +924,6 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, 32 * fbc->threshold) * 8; } -static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1, - struct intel_fbc_reg_params *params2) -{ - /* We can use this since intel_fbc_get_reg_params() does a memset. */ - return memcmp(params1, params2, sizeof(*params1)) == 0; -} - void intel_fbc_pre_update(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) @@ -953,6 +946,7 @@ void intel_fbc_pre_update(struct intel_crtc *crtc, goto unlock; intel_fbc_update_state_cache(crtc, crtc_state, plane_state); + fbc->flip_pending = true; deactivate: intel_fbc_deactivate(dev_priv, reason); @@ -988,13 +982,15 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; - struct intel_fbc_reg_params old_params; WARN_ON(!mutex_is_locked(&fbc->lock)); if (!fbc->enabled || fbc->crtc != crtc) return; + fbc->flip_pending = false; + WARN_ON(fbc->active); + if (!i915_modparams.enable_fbc) { intel_fbc_deactivate(dev_priv, "disabled at runtime per module param"); __intel_fbc_disable(dev_priv); @@ -1002,25 +998,16 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) return; } - if (!intel_fbc_can_activate(crtc)) { - WARN_ON(fbc->active); - return; - } - - old_params = fbc->params; intel_fbc_get_reg_params(crtc, &fbc->params); - /* If the scanout has not changed, don't modify the FBC settings. - * Note that we make the fundamental assumption that the fb->obj - * cannot be unpinned (and have its GTT offset and fence revoked) - * without first being decoupled from the scanout and FBC disabled. - */ - if (fbc->active && - intel_fbc_reg_params_equal(&old_params, &fbc->params)) + if (!intel_fbc_can_activate(crtc)) return; - intel_fbc_deactivate(dev_priv, "FBC enabled (active or scheduled)"); - intel_fbc_schedule_activation(crtc); + if (!fbc->busy_bits) { + intel_fbc_deactivate(dev_priv, "FBC enabled (active or scheduled)"); + intel_fbc_schedule_activation(crtc); + } else + intel_fbc_deactivate(dev_priv, "frontbuffer write"); } void intel_fbc_post_update(struct intel_crtc *crtc) @@ -1085,7 +1072,7 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv, (frontbuffer_bits & intel_fbc_get_frontbuffer_bit(fbc))) { if (fbc->active) intel_fbc_recompress(dev_priv); - else + else if (!fbc->flip_pending) __intel_fbc_post_update(fbc->crtc); } From 457209595789a3acd57990e2e9dade95bbea8fba Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 25 Jun 2018 18:37:58 +0200 Subject: [PATCH 046/114] drm/i915: Remove delayed FBC activation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only time we should start FBC is when we have waited a vblank after the atomic update. We've already forced a vblank wait by doing wait_for_flip_done before intel_post_plane_update(), so we don't need to wait a second time before enabling. Removing the worker simplifies the code and removes possible race conditions, like happening in 103167. Cc: Paulo Zanoni Cc: Rodrigo Vivi Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103167 Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20180625163758.10871-2-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_debugfs.c | 5 -- drivers/gpu/drm/i915/i915_drv.h | 6 -- drivers/gpu/drm/i915/intel_fbc.c | 96 +---------------------------- 3 files changed, 1 insertion(+), 106 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c400f42a54ec..48a57c0636bf 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1659,11 +1659,6 @@ static int i915_fbc_status(struct seq_file *m, void *unused) else seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); - if (fbc->work.scheduled) - seq_printf(m, "FBC worker scheduled on vblank %llu, now %llu\n", - fbc->work.scheduled_vblank, - drm_crtc_vblank_count(&fbc->crtc->base)); - if (intel_fbc_is_active(dev_priv)) { u32 mask; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 615ed807ea32..ce7d06332884 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -580,12 +580,6 @@ struct intel_fbc { unsigned int gen9_wa_cfb_stride; } params; - struct intel_fbc_work { - bool scheduled; - u64 scheduled_vblank; - struct work_struct work; - } work; - const char *no_fbc_reason; }; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 9f9ea0b5452f..01d1d2088f04 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -399,89 +399,6 @@ bool intel_fbc_is_active(struct drm_i915_private *dev_priv) return dev_priv->fbc.active; } -static void intel_fbc_work_fn(struct work_struct *__work) -{ - struct drm_i915_private *dev_priv = - container_of(__work, struct drm_i915_private, fbc.work.work); - struct intel_fbc *fbc = &dev_priv->fbc; - struct intel_fbc_work *work = &fbc->work; - struct intel_crtc *crtc = fbc->crtc; - struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[crtc->pipe]; - - if (drm_crtc_vblank_get(&crtc->base)) { - /* CRTC is now off, leave FBC deactivated */ - mutex_lock(&fbc->lock); - work->scheduled = false; - mutex_unlock(&fbc->lock); - return; - } - -retry: - /* Delay the actual enabling to let pageflipping cease and the - * display to settle before starting the compression. Note that - * this delay also serves a second purpose: it allows for a - * vblank to pass after disabling the FBC before we attempt - * to modify the control registers. - * - * WaFbcWaitForVBlankBeforeEnable:ilk,snb - * - * It is also worth mentioning that since work->scheduled_vblank can be - * updated multiple times by the other threads, hitting the timeout is - * not an error condition. We'll just end up hitting the "goto retry" - * case below. - */ - wait_event_timeout(vblank->queue, - drm_crtc_vblank_count(&crtc->base) != work->scheduled_vblank, - msecs_to_jiffies(50)); - - mutex_lock(&fbc->lock); - - /* Were we cancelled? */ - if (!work->scheduled) - goto out; - - /* Were we delayed again while this function was sleeping? */ - if (drm_crtc_vblank_count(&crtc->base) == work->scheduled_vblank) { - mutex_unlock(&fbc->lock); - goto retry; - } - - intel_fbc_hw_activate(dev_priv); - - work->scheduled = false; - -out: - mutex_unlock(&fbc->lock); - drm_crtc_vblank_put(&crtc->base); -} - -static void intel_fbc_schedule_activation(struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_fbc *fbc = &dev_priv->fbc; - struct intel_fbc_work *work = &fbc->work; - - WARN_ON(!mutex_is_locked(&fbc->lock)); - if (WARN_ON(!fbc->enabled)) - return; - - if (drm_crtc_vblank_get(&crtc->base)) { - DRM_ERROR("vblank not available for FBC on pipe %c\n", - pipe_name(crtc->pipe)); - return; - } - - /* It is useless to call intel_fbc_cancel_work() or cancel_work() in - * this function since we're not releasing fbc.lock, so it won't have an - * opportunity to grab it to discover that it was cancelled. So we just - * update the expected jiffy count. */ - work->scheduled = true; - work->scheduled_vblank = drm_crtc_vblank_count(&crtc->base); - drm_crtc_vblank_put(&crtc->base); - - schedule_work(&work->work); -} - static void intel_fbc_deactivate(struct drm_i915_private *dev_priv, const char *reason) { @@ -489,11 +406,6 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv, WARN_ON(!mutex_is_locked(&fbc->lock)); - /* Calling cancel_work() here won't help due to the fact that the work - * function grabs fbc->lock. Just set scheduled to false so the work - * function can know it was cancelled. */ - fbc->work.scheduled = false; - if (fbc->active) intel_fbc_hw_deactivate(dev_priv); @@ -1005,7 +917,7 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) if (!fbc->busy_bits) { intel_fbc_deactivate(dev_priv, "FBC enabled (active or scheduled)"); - intel_fbc_schedule_activation(crtc); + intel_fbc_hw_activate(dev_priv); } else intel_fbc_deactivate(dev_priv, "frontbuffer write"); } @@ -1212,8 +1124,6 @@ void intel_fbc_disable(struct intel_crtc *crtc) if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); mutex_unlock(&fbc->lock); - - cancel_work_sync(&fbc->work.work); } /** @@ -1235,8 +1145,6 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv) __intel_fbc_disable(dev_priv); } mutex_unlock(&fbc->lock); - - cancel_work_sync(&fbc->work.work); } static void intel_fbc_underrun_work_fn(struct work_struct *work) @@ -1387,12 +1295,10 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) { struct intel_fbc *fbc = &dev_priv->fbc; - INIT_WORK(&fbc->work.work, intel_fbc_work_fn); INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); mutex_init(&fbc->lock); fbc->enabled = false; fbc->active = false; - fbc->work.scheduled = false; if (need_fbc_vtd_wa(dev_priv)) mkwrite_device_info(dev_priv)->has_fbc = false; From d78e2bbf483faa48f060c2454c56c3ff9cd74573 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 Jun 2018 14:37:16 +0100 Subject: [PATCH 047/114] drm/i915/selftests: Mark up write into scratch vma We correctly attach the exclusive fetch for the scratch object when emitting a request that writes into it, but for completeness we should also declared the write to i915_vma_move_to_active() Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180629133717.11761-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index c0c26fae45c7..cc848ceeb3c3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -175,7 +175,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, i915_vma_unpin(batch); i915_vma_close(batch); - i915_vma_move_to_active(vma, rq, 0); + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); reservation_object_lock(obj->resv, NULL); From be01de596ef65ad1fc66a795d45e7334d197e594 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 Jun 2018 14:37:17 +0100 Subject: [PATCH 048/114] drm/i915/selftests: Attach the fence to the object when making busy make_obj_busy() makes a dummy busy object, but didn't attach the fence to the reservation object, so it would not have registered as busy. For completeness, attach the dummy request as the exclusive fence and mark the object as written (in i915_vma_move_to_active) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180629133717.11761-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 549707b9d738..77dd7a510ea6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -454,7 +454,12 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) return PTR_ERR(rq); } - i915_vma_move_to_active(vma, rq, 0); + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + i915_request_add(rq); i915_gem_object_set_active_reference(obj); From 1ea29bbd47cf5640fd67f8bcca1f934aa468c428 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 18 Jun 2018 11:18:20 +0000 Subject: [PATCH 049/114] drm/i915/guc: Print CTL params passed to Guc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While debugging we may want to examine params passed to GuC. v2: drop #ifdef DEBUG_GUC - Michal Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Michel Thierry Reviewed-by: Michel Thierry #1 Cc: Michal Winiarski Reviewed-by: Michał Winiarski Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180618111821.47088-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 53b43bc73c01..e12bd259df17 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -351,6 +351,9 @@ void intel_guc_init_params(struct intel_guc *guc) params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc); + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]); + /* * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and * they are power context saved so it's ok to release forcewake From e67005e59a74613c2d5879f63eef70e6267bb452 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 29 Jun 2018 13:20:39 +0300 Subject: [PATCH 050/114] drm/i915: abstract and document register picking macros Try to describe what the pick variants do, and which to prefer. No functional changes. Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180629102039.2435-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c30cfcd90754..7dc774682922 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -139,19 +139,35 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); } +/* + * Given the first two numbers __a and __b of arbitrarily many evenly spaced + * numbers, pick the 0-based __index'th value. + * + * Always prefer this over _PICK() if the numbers are evenly spaced. + */ +#define _PICK_EVEN(__index, __a, __b) ((__a) + (__index) * ((__b) - (__a))) + +/* + * Given the arbitrary numbers in varargs, pick the 0-based __index'th number. + * + * Always prefer _PICK_EVEN() over this if the numbers are evenly spaced. + */ #define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index]) -#define _PIPE(pipe, a, b) ((a) + (pipe) * ((b) - (a))) +/* + * Named helper wrappers around _PICK_EVEN() and _PICK(). + */ +#define _PIPE(pipe, a, b) _PICK_EVEN(pipe, a, b) #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b)) -#define _PLANE(plane, a, b) _PIPE(plane, a, b) +#define _PLANE(plane, a, b) _PICK_EVEN(plane, a, b) #define _MMIO_PLANE(plane, a, b) _MMIO_PIPE(plane, a, b) -#define _TRANS(tran, a, b) ((a) + (tran) * ((b) - (a))) +#define _TRANS(tran, a, b) _PICK_EVEN(tran, a, b) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) -#define _PORT(port, a, b) ((a) + (port) * ((b) - (a))) +#define _PORT(port, a, b) _PICK_EVEN(port, a, b) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) -#define _PLL(pll, a, b) ((a) + (pll) * ((b) - (a))) +#define _PLL(pll, a, b) _PICK_EVEN(pll, a, b) #define _MMIO_PLL(pll, a, b) _MMIO(_PLL(pll, a, b)) #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) @@ -6874,7 +6890,7 @@ enum { #define _PS_ECC_STAT_2B 0x68AD0 #define _PS_ECC_STAT_1C 0x691D0 -#define _ID(id, a, b) ((a) + (id) * ((b) - (a))) +#define _ID(id, a, b) _PICK_EVEN(id, a, b) #define SKL_PS_CTRL(pipe, id) _MMIO_PIPE(pipe, \ _ID(id, _PS_1A_CTRL, _PS_2A_CTRL), \ _ID(id, _PS_1B_CTRL, _PS_2B_CTRL)) From 7e7367d3bc6cf27dd7e007e7897fcebfeff1ee8b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 30 Jun 2018 10:05:09 +0100 Subject: [PATCH 051/114] drm/i915: Try GGTT mmapping whole object as partial If the whole object is already pinned by HW for use as scanout, we will fail to move it to the mappable region and so must resort to using a partial VMA covering the whole object. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104513 Fixes: aa136d9d72c2 ("drm/i915: Convert partial ggtt vma to full ggtt if it spans the entire object") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180630090509.469-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++----------- drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8954db6ab083..048b722cf27c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2006,7 +2006,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) bool write = !!(vmf->flags & FAULT_FLAG_WRITE); struct i915_vma *vma; pgoff_t page_offset; - unsigned int flags; int ret; /* We don't use vmf->pgoff since that has the fake offset */ @@ -2042,27 +2041,34 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) goto err_unlock; } - /* If the object is smaller than a couple of partial vma, it is - * not worth only creating a single partial vma - we may as well - * clear enough space for the full object. - */ - flags = PIN_MAPPABLE; - if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) - flags |= PIN_NONBLOCK | PIN_NONFAULT; /* Now pin it into the GTT as needed */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); + unsigned int flags; - /* Userspace is now writing through an untracked VMA, abandon + flags = PIN_MAPPABLE; + if (view.type == I915_GGTT_VIEW_NORMAL) + flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ + + /* + * Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. */ obj->frontbuffer_ggtt_origin = ORIGIN_CPU; - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + if (IS_ERR(vma) && !view.type) { + flags = PIN_MAPPABLE; + view.type = I915_GGTT_VIEW_PARTIAL; + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + } } if (IS_ERR(vma)) { ret = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d0e606e9b27a..de2b6d65e865 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -143,7 +143,7 @@ vma_create(struct drm_i915_gem_object *obj, obj->base.size >> PAGE_SHIFT)); vma->size = view->partial.size; vma->size <<= PAGE_SHIFT; - GEM_BUG_ON(vma->size >= obj->base.size); + GEM_BUG_ON(vma->size > obj->base.size); } else if (view->type == I915_GGTT_VIEW_ROTATED) { vma->size = intel_rotation_info_size(&view->rotated); vma->size <<= PAGE_SHIFT; From 00b062967f1524de25daf6578d9043b0bbe7c208 Mon Sep 17 00:00:00 2001 From: Vathsala Nagaraju Date: Wed, 27 Jun 2018 13:38:30 +0530 Subject: [PATCH 052/114] drm/i915/psr: Add psr1 live status Prints live state of psr1.Extending the existing PSR2 live state function to cover psr1. Tested on KBL with psr2 and psr1 panel. v2: rebase v3: DK Rename psr2_live_status to psr_source_status. v4: DK Move EDP_PSR_STATUS_STATE_SHIFT below EDP_PSR_STATUS_STATE_MASK. Pass seq to psr_source_status, handle source status prints in psr_source_status. v5: Fixed CI warning messages v6: Remove extra space in the title before the colon.(DK) Rebase. (Jani) v7: Use tabs for indenting the values.(Jani) v8: Addressed dk's review comments. Cc: Rodrigo Vivi Cc: Dhinakaran Pandiyan Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Vathsala Nagaraju Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/1530086910-15914-1-git-send-email-vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 70 +++++++++++++++++++---------- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 48a57c0636bf..f6142d78ede4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2592,27 +2592,55 @@ static const struct file_operations i915_guc_log_relay_fops = { .release = i915_guc_log_relay_release, }; -static const char *psr2_live_status(u32 val) +static void +psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) { - static const char * const live_status[] = { - "IDLE", - "CAPTURE", - "CAPTURE_FS", - "SLEEP", - "BUFON_FW", - "ML_UP", - "SU_STANDBY", - "FAST_SLEEP", - "DEEP_SLEEP", - "BUF_ON", - "TG_ON" - }; + u32 val, psr_status; - val = (val & EDP_PSR2_STATUS_STATE_MASK) >> EDP_PSR2_STATUS_STATE_SHIFT; - if (val < ARRAY_SIZE(live_status)) - return live_status[val]; + if (dev_priv->psr.psr2_enabled) { + static const char * const live_status[] = { + "IDLE", + "CAPTURE", + "CAPTURE_FS", + "SLEEP", + "BUFON_FW", + "ML_UP", + "SU_STANDBY", + "FAST_SLEEP", + "DEEP_SLEEP", + "BUF_ON", + "TG_ON" + }; + psr_status = I915_READ(EDP_PSR2_STATUS); + val = (psr_status & EDP_PSR2_STATUS_STATE_MASK) >> + EDP_PSR2_STATUS_STATE_SHIFT; + if (val < ARRAY_SIZE(live_status)) { + seq_printf(m, "Source PSR status: 0x%x [%s]\n", + psr_status, live_status[val]); + return; + } + } else { + static const char * const live_status[] = { + "IDLE", + "SRDONACK", + "SRDENT", + "BUFOFF", + "BUFON", + "AUXACK", + "SRDOFFACK", + "SRDENT_ON", + }; + psr_status = I915_READ(EDP_PSR_STATUS); + val = (psr_status & EDP_PSR_STATUS_STATE_MASK) >> + EDP_PSR_STATUS_STATE_SHIFT; + if (val < ARRAY_SIZE(live_status)) { + seq_printf(m, "Source PSR status: 0x%x [%s]\n", + psr_status, live_status[val]); + return; + } + } - return "unknown"; + seq_printf(m, "Source PSR status: 0x%x [%s]\n", psr_status, "unknown"); } static const char *psr_sink_status(u8 val) @@ -2676,12 +2704,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Performance_Counter: %u\n", psrperf); } - if (dev_priv->psr.psr2_enabled) { - u32 psr2 = I915_READ(EDP_PSR2_STATUS); - seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", - psr2, psr2_live_status(psr2)); - } + psr_source_status(dev_priv, m); if (dev_priv->psr.enabled) { struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7dc774682922..69b9978d7dda 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4133,6 +4133,7 @@ enum { #define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) #define EDP_PSR_STATUS_STATE_MASK (7 << 29) +#define EDP_PSR_STATUS_STATE_SHIFT 29 #define EDP_PSR_STATUS_STATE_IDLE (0 << 29) #define EDP_PSR_STATUS_STATE_SRDONACK (1 << 29) #define EDP_PSR_STATUS_STATE_SRDENT (2 << 29) From abdd322f680870dbe1942425d1fa2c74de4721f4 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 27 Jun 2018 23:18:54 -0700 Subject: [PATCH 053/114] drm/i915: Remove unnecessary check for unsupported modifiers for NV12 There is already a check to allow only RGB8888 formats with CCS modifiers. Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20180628061854.6430-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_display.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fca72ce0b3b3..ce7646265b50 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14478,11 +14478,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, } break; case DRM_FORMAT_NV12: - if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS || - mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) { - DRM_DEBUG_KMS("RC not to be enabled with NV12\n"); - goto err; - } if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) { DRM_DEBUG_KMS("unsupported pixel format: %s\n", From c43dbcbbcc8c515d4ececc7a996d5fc7286c28c3 Mon Sep 17 00:00:00 2001 From: Tarun Vyas Date: Wed, 27 Jun 2018 13:02:49 -0700 Subject: [PATCH 054/114] drm/i915/psr: Lockless version of psr_wait_for_idle This is a lockless version of the exisiting psr_wait_for_idle(). We want to wait for PSR to idle out inside intel_pipe_update_start. At the time of a pipe update, we should never race with any psr enable or disable code, which is a part of crtc enable/disable. The follow up patch will use this lockless wait inside pipe_update_ start to wait for PSR to idle out before checking for vblank evasion. We need to keep the wait in pipe_update_start to as less as it can be. So,we can live and flourish w/o taking any psr locks at all. Even if psr is never enabled, psr2_enabled will be false and this function will wait for PSR1 to idle out, which should just return immediately, so a very short (~1-2 usec) wait for cases where PSR is disabled. v2: Add comment to explain the 25msec timeout (DK) v3: Rename psr_wait_for_idle to __psr_wait_for_idle_locked to avoid naming conflicts and propagate err (if any) to the caller (Chris) v5: Form a series with the next patch v7: Better explain the need for lockless wait and increase the max timeout to handle refresh rates < 60 Hz (Daniel Vetter) v8: Rebase Acked-by: Daniel Vetter Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Tarun Vyas Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180627200250.1515-1-tarun.vyas@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_psr.c | 36 ++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 5a37db292d3a..a8073eb02ffa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1921,6 +1921,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); +int intel_psr_wait_for_idle(struct drm_i915_private *dev_priv); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 45f1cb7d6c04..23acc9ac8d4d 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -717,7 +717,39 @@ void intel_psr_disable(struct intel_dp *intel_dp, cancel_work_sync(&dev_priv->psr.work); } -static bool psr_wait_for_idle(struct drm_i915_private *dev_priv) +int intel_psr_wait_for_idle(struct drm_i915_private *dev_priv) +{ + i915_reg_t reg; + u32 mask; + + /* + * The sole user right now is intel_pipe_update_start(), + * which won't race with psr_enable/disable, which is + * where psr2_enabled is written to. So, we don't need + * to acquire the psr.lock. More importantly, we want the + * latency inside intel_pipe_update_start() to be as low + * as possible, so no need to acquire psr.lock when it is + * not needed and will induce latencies in the atomic + * update path. + */ + if (dev_priv->psr.psr2_enabled) { + reg = EDP_PSR2_STATUS; + mask = EDP_PSR2_STATUS_STATE_MASK; + } else { + reg = EDP_PSR_STATUS; + mask = EDP_PSR_STATUS_STATE_MASK; + } + + /* + * Max time for PSR to idle = Inverse of the refresh rate + + * 6 ms of exit training time + 1.5 ms of aux channel + * handshake. 50 msec is defesive enough to cover everything. + */ + return intel_wait_for_register(dev_priv, reg, mask, + EDP_PSR_STATUS_STATE_IDLE, 50); +} + +static bool __psr_wait_for_idle_locked(struct drm_i915_private *dev_priv) { struct intel_dp *intel_dp; i915_reg_t reg; @@ -763,7 +795,7 @@ static void intel_psr_work(struct work_struct *work) * PSR might take some time to get fully disabled * and be ready for re-enable. */ - if (!psr_wait_for_idle(dev_priv)) + if (!__psr_wait_for_idle_locked(dev_priv)) goto unlock; /* From a608987970b929e250957e4e3fb891f1f10eff6f Mon Sep 17 00:00:00 2001 From: Tarun Vyas Date: Wed, 27 Jun 2018 13:02:50 -0700 Subject: [PATCH 055/114] drm/i915: Wait for PSR exit before checking for vblank evasion The PIPEDSL freezes on PSR entry and if PSR hasn't fully exited, then the pipe_update_start call schedules itself out to check back later. On ChromeOS-4.4 kernel, which is fairly up-to-date w.r.t drm/i915 but lags w.r.t core kernel code, hot plugging an external display triggers tons of "potential atomic update errors" in the dmesg, on *pipe A*. A closer analysis reveals that we try to read the scanline 3 times and eventually timeout, b/c PSR hasn't exited fully leading to a PIPEDSL stuck @ 1599. This issue is not seen on upstream kernels, b/c for *some* reason we loop inside intel_pipe_update start for ~2+ msec which in this case is more than enough to exit PSR fully, hence an *unstuck* PIPEDSL counter, hence no error. On the other hand, the ChromeOS kernel spends ~1.1 msec looping inside intel_pipe_update_start and hence errors out b/c the source is still in PSR. Regardless, we should wait for PSR exit (if PSR is disabled, we incur a ~1-2 usec penalty) before reading the PIPEDSL, b/c if we haven't fully exited PSR, then checking for vblank evasion isn't actually applicable. v4: Comment explaining psr_wait after enabling VBL interrupts (DK) v5: CAN_PSR() to handle platforms that don't support PSR. v6: Handle local_irq_disable on early return (Chris) Acked-by: Daniel Vetter Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Tarun Vyas Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180627200250.1515-2-tarun.vyas@intel.com --- drivers/gpu/drm/i915/intel_sprite.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index d9e7f4fb5096..e2328d0402d8 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -107,13 +107,21 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) VBLANK_EVASION_TIME_US); max = vblank_start - 1; - local_irq_disable(); - if (min <= 0 || max <= 0) - return; + goto irq_disable; if (WARN_ON(drm_crtc_vblank_get(&crtc->base))) - return; + goto irq_disable; + + /* + * Wait for psr to idle out after enabling the VBL interrupts + * VBL interrupts will start the PSR exit and prevent a PSR + * re-entry as well. + */ + if (CAN_PSR(dev_priv) && intel_psr_wait_for_idle(dev_priv)) + DRM_ERROR("PSR idle timed out, atomic update may fail\n"); + + local_irq_disable(); crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; @@ -171,6 +179,10 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) crtc->debug.start_vbl_count = intel_crtc_get_vblank_counter(crtc); trace_i915_pipe_update_vblank_evaded(crtc); + return; + +irq_disable: + local_irq_disable(); } /** From 38b7fb0b2ad1f2ba83bf9a8535cedb198a448ea4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Jul 2018 11:18:29 +0100 Subject: [PATCH 056/114] drm/i915/selftests: Release the struct_mutex to free the objects live_gtt is a very slow test to run, simply because it tries to allocate and use as much as the 48b address space as possibly can and in the process will try to own all of the system memory. This leads to resource exhaustion and CPU starvation; the latter impacts us when the NMI watchdog declares a task hung due to a mutex contention with ourselves. This we can prevent by releasing the struct_mutex and forcing our i915/rcu workers to run, and in particular flushing the freed object worker that is the cause for concern. References: https://bugs.freedesktop.org/show_bug.cgi?id=107094 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180703101829.7360-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index a28ee0cc6a63..9578652fba24 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -32,6 +32,20 @@ #include "mock_drm.h" #include "mock_gem_device.h" +static void cleanup_freed_objects(struct drm_i915_private *i915) +{ + /* + * As we may hold onto the struct_mutex for inordinate lengths of + * time, the NMI khungtaskd detector may fire for the free objects + * worker. + */ + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); +} + static void fake_free_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { @@ -291,6 +305,8 @@ static int lowlevel_hole(struct drm_i915_private *i915, i915_gem_object_put(obj); kfree(order); + + cleanup_freed_objects(i915); } return 0; @@ -519,6 +535,7 @@ static int fill_hole(struct drm_i915_private *i915, } close_object_list(&objects, vm); + cleanup_freed_objects(i915); } return 0; @@ -605,6 +622,8 @@ err_put: i915_gem_object_put(obj); if (err) return err; + + cleanup_freed_objects(i915); } return 0; @@ -789,6 +808,8 @@ err_obj: kfree(order); if (err) return err; + + cleanup_freed_objects(i915); } return 0; @@ -857,6 +878,7 @@ static int __shrink_hole(struct drm_i915_private *i915, } close_object_list(&objects, vm); + cleanup_freed_objects(i915); return err; } @@ -949,6 +971,7 @@ static int shrink_boom(struct drm_i915_private *i915, i915_gem_object_put(explode); memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); + cleanup_freed_objects(i915); } return 0; From 1f6f00238abf2bfa46f4d2c44cfc7512566f3685 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Jul 2018 14:53:31 +0100 Subject: [PATCH 057/114] drm/i915/selftests: Drop struct_mutex around lowlevel pggtt allocation For a ppgtt that we are constructing, there is no struct_mutex dependence so skip it. In the process, also ping the scheduler frequently to try and avoid the NMI watchdog. v2: gen6 requires struct_mutex to clean up (currently) Suggested-by: Tvrtko Ursulin References: https://bugs.freedesktop.org/show_bug.cgi?id=107094 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180703135331.12265-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 9578652fba24..a7956ecc3e1f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -156,12 +156,9 @@ static int igt_ppgtt_alloc(void *arg) if (!USES_PPGTT(dev_priv)) return 0; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = __hw_ppgtt_create(dev_priv); - if (IS_ERR(ppgtt)) { - err = PTR_ERR(ppgtt); - goto err_unlock; - } + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); if (!ppgtt->vm.allocate_va_range) goto err_ppgtt_cleanup; @@ -180,6 +177,8 @@ static int igt_ppgtt_alloc(void *arg) goto err_ppgtt_cleanup; } + cond_resched(); + ppgtt->vm.clear_range(&ppgtt->vm, 0, size); } @@ -197,13 +196,15 @@ static int igt_ppgtt_alloc(void *arg) } goto err_ppgtt_cleanup; } + + cond_resched(); } err_ppgtt_cleanup: + mutex_lock(&dev_priv->drm.struct_mutex); ppgtt->vm.cleanup(&ppgtt->vm); - kfree(ppgtt); -err_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); + kfree(ppgtt); return err; } From 0ba7c51a6fd80a89236f6ceb52e63f8a7f62bfd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 Jun 2018 20:56:25 +0300 Subject: [PATCH 058/114] drm/i915: Fix hotplug irq ack on i965/g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like with PIPESTAT, the edge triggered IIR on i965/g4x also causes problems for hotplug interrupts. To make sure we don't get the IIR port interrupt bit stuck low with the ISR bit high we must force an edge in ISR. Unfortunately we can't borrow the PIPESTAT trick and toggle the enable bits in PORT_HOTPLUG_EN as that act itself generates hotplug interrupts. Instead we just have to loop until we've cleared PORT_HOTPLUG_STAT, or we just give up and WARN. v2: Don't frob with PORT_HOTPLUG_EN Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180614175625.1615-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_irq.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 435a4444314c..c59aa0737079 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2069,10 +2069,38 @@ static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv, static u32 i9xx_hpd_irq_ack(struct drm_i915_private *dev_priv) { - u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT); + u32 hotplug_status = 0, hotplug_status_mask; + int i; - if (hotplug_status) + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + hotplug_status_mask = HOTPLUG_INT_STATUS_G4X | + DP_AUX_CHANNEL_MASK_INT_STATUS_G4X; + else + hotplug_status_mask = HOTPLUG_INT_STATUS_I915; + + /* + * We absolutely have to clear all the pending interrupt + * bits in PORT_HOTPLUG_STAT. Otherwise the ISR port + * interrupt bit won't have an edge, and the i965/g4x + * edge triggered IIR will not notice that an interrupt + * is still pending. We can't use PORT_HOTPLUG_EN to + * guarantee the edge as the act of toggling the enable + * bits can itself generate a new hotplug interrupt :( + */ + for (i = 0; i < 10; i++) { + u32 tmp = I915_READ(PORT_HOTPLUG_STAT) & hotplug_status_mask; + + if (tmp == 0) + return hotplug_status; + + hotplug_status |= tmp; I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status); + } + + WARN_ONCE(1, + "PORT_HOTPLUG_STAT did not clear (0x%08x)\n", + I915_READ(PORT_HOTPLUG_STAT)); return hotplug_status; } From 78c357dd3fcf51de61a0b8db3abdb8ed5aea6dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Jun 2018 23:02:57 +0300 Subject: [PATCH 059/114] drm/i915: Fix pre-ILK error interrupt ack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the EIR clearing to cope with the edge triggered IIR on i965/g4x. To guarantee an edge in the ISR master error bit we temporarily mask everything in EMR. As some of the EIR bits can't even be directly cleared we also borrow a trick from i915_clear_error_registers() and permanently mask any bit that remains high. No real thought given to how we might unmask them again once the cause for the error has been clered. I suppose on pre-g4x GPU reset will reinitialize EMR from scratch. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180611200258.27121-3-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_irq.c | 105 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 1 - 2 files changed, 96 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c59aa0737079..7fb493b7aa74 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3279,7 +3279,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) */ DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); I915_WRITE(EMR, I915_READ(EMR) | eir); - I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT); } } @@ -4406,6 +4406,81 @@ static int i8xx_irq_postinstall(struct drm_device *dev) return 0; } +static void i8xx_error_irq_ack(struct drm_i915_private *dev_priv, + u16 *eir, u16 *eir_stuck) +{ + u16 emr; + + *eir = I915_READ16(EIR); + + if (*eir) + I915_WRITE16(EIR, *eir); + + *eir_stuck = I915_READ16(EIR); + if (*eir_stuck == 0) + return; + + /* + * Toggle all EMR bits to make sure we get an edge + * in the ISR master error bit if we don't clear + * all the EIR bits. Otherwise the edge triggered + * IIR on i965/g4x wouldn't notice that an interrupt + * is still pending. Also some EIR bits can't be + * cleared except by handling the underlying error + * (or by a GPU reset) so we mask any bit that + * remains set. + */ + emr = I915_READ16(EMR); + I915_WRITE16(EMR, 0xffff); + I915_WRITE16(EMR, emr | *eir_stuck); +} + +static void i8xx_error_irq_handler(struct drm_i915_private *dev_priv, + u16 eir, u16 eir_stuck) +{ + DRM_DEBUG("Master Error: EIR 0x%04x\n", eir); + + if (eir_stuck) + DRM_DEBUG_DRIVER("EIR stuck: 0x%04x, masked\n", eir_stuck); +} + +static void i9xx_error_irq_ack(struct drm_i915_private *dev_priv, + u32 *eir, u32 *eir_stuck) +{ + u32 emr; + + *eir = I915_READ(EIR); + + I915_WRITE(EIR, *eir); + + *eir_stuck = I915_READ(EIR); + if (*eir_stuck == 0) + return; + + /* + * Toggle all EMR bits to make sure we get an edge + * in the ISR master error bit if we don't clear + * all the EIR bits. Otherwise the edge triggered + * IIR on i965/g4x wouldn't notice that an interrupt + * is still pending. Also some EIR bits can't be + * cleared except by handling the underlying error + * (or by a GPU reset) so we mask any bit that + * remains set. + */ + emr = I915_READ(EMR); + I915_WRITE(EMR, 0xffffffff); + I915_WRITE(EMR, emr | *eir_stuck); +} + +static void i9xx_error_irq_handler(struct drm_i915_private *dev_priv, + u32 eir, u32 eir_stuck) +{ + DRM_DEBUG("Master Error, EIR 0x%08x\n", eir); + + if (eir_stuck) + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masked\n", eir_stuck); +} + static irqreturn_t i8xx_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; @@ -4420,6 +4495,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) do { u32 pipe_stats[I915_MAX_PIPES] = {}; + u16 eir = 0, eir_stuck = 0; u16 iir; iir = I915_READ16(IIR); @@ -4432,13 +4508,16 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) * signalled in iir */ i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); + if (iir & I915_MASTER_ERROR_INTERRUPT) + i8xx_error_irq_ack(dev_priv, &eir, &eir_stuck); + I915_WRITE16(IIR, iir); if (iir & I915_USER_INTERRUPT) notify_ring(dev_priv->engine[RCS]); - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); + if (iir & I915_MASTER_ERROR_INTERRUPT) + i8xx_error_irq_handler(dev_priv, eir, eir_stuck); i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); @@ -4519,6 +4598,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) do { u32 pipe_stats[I915_MAX_PIPES] = {}; + u32 eir = 0, eir_stuck = 0; u32 hotplug_status = 0; u32 iir; @@ -4536,13 +4616,16 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) * signalled in iir */ i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); + if (iir & I915_MASTER_ERROR_INTERRUPT) + i9xx_error_irq_ack(dev_priv, &eir, &eir_stuck); + I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) notify_ring(dev_priv->engine[RCS]); - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); + if (iir & I915_MASTER_ERROR_INTERRUPT) + i9xx_error_irq_handler(dev_priv, eir, eir_stuck); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -4596,14 +4679,14 @@ static int i965_irq_postinstall(struct drm_device *dev) I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + I915_MASTER_ERROR_INTERRUPT); enable_mask = I915_ASLE_INTERRUPT | I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT | + I915_MASTER_ERROR_INTERRUPT | I915_USER_INTERRUPT; if (IS_G4X(dev_priv)) @@ -4663,6 +4746,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) do { u32 pipe_stats[I915_MAX_PIPES] = {}; + u32 eir = 0, eir_stuck = 0; u32 hotplug_status = 0; u32 iir; @@ -4679,6 +4763,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) * signalled in iir */ i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); + if (iir & I915_MASTER_ERROR_INTERRUPT) + i9xx_error_irq_ack(dev_priv, &eir, &eir_stuck); + I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) @@ -4687,8 +4774,8 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) if (iir & I915_BSD_USER_INTERRUPT) notify_ring(dev_priv->engine[VCS]); - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); + if (iir & I915_MASTER_ERROR_INTERRUPT) + i9xx_error_irq_handler(dev_priv, eir, eir_stuck); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 69b9978d7dda..097000520a80 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2871,7 +2871,6 @@ enum i915_power_well_id { #define I915_DISPLAY_PORT_INTERRUPT (1 << 17) #define I915_DISPLAY_PIPE_C_HBLANK_INTERRUPT (1 << 16) #define I915_MASTER_ERROR_INTERRUPT (1 << 15) -#define I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT (1 << 15) #define I915_DISPLAY_PIPE_B_HBLANK_INTERRUPT (1 << 14) #define I915_GMCH_THERMAL_SENSOR_EVENT_INTERRUPT (1 << 14) /* p-state */ #define I915_DISPLAY_PIPE_A_HBLANK_INTERRUPT (1 << 13) From 16659bc53a027f8e4bce6393476849e2bda8b8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Jun 2018 23:02:58 +0300 Subject: [PATCH 060/114] drm/i915: Unmask and enable master error interrupt on gen2/3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For whatever reason we only unmask and enable the master error interrut on gen4. With the EIR handling fixed let's do that on gen2/3 as well. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180611200258.27121-4-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7fb493b7aa74..2fcc00b06915 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4387,11 +4387,13 @@ static int i8xx_irq_postinstall(struct drm_device *dev) /* Unmask the interrupts that we always want on. */ dev_priv->irq_mask = ~(I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT); + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_MASTER_ERROR_INTERRUPT); enable_mask = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_MASTER_ERROR_INTERRUPT | I915_USER_INTERRUPT; GEN2_IRQ_INIT(, dev_priv->irq_mask, enable_mask); @@ -4555,12 +4557,14 @@ static int i915_irq_postinstall(struct drm_device *dev) dev_priv->irq_mask = ~(I915_ASLE_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT); + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_MASTER_ERROR_INTERRUPT); enable_mask = I915_ASLE_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_MASTER_ERROR_INTERRUPT | I915_USER_INTERRUPT; if (I915_HAS_HOTPLUG(dev_priv)) { From 63fd659fb1a52262a37293a9a034a912a2406b26 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Jul 2018 19:55:18 +0100 Subject: [PATCH 061/114] drm/i915/gtt: Pull global wc page stash under its own locking Currently, the wc-stash used for providing flushed WC pages ready for constructing the page directories is assumed to be protected by the struct_mutex. However, we want to remove this global lock and so must install a replacement global lock for accessing the global wc-stash (the per-vm stash continues to be guarded by the vm). We need to push ahead on this patch due to an oversight in hastily removing the struct_mutex guard around the igt_ppgtt_alloc selftest. No matter, it will prove very useful (i.e. will be required) in the near future. v2: Restore the onstack stash so that we can drop the vm->mutex in future across the allocation. v3: Restore the lost pagevec_init of the onstack allocation, and repaint function names. v4: Reorder init so that we don't try and use i915_address_space before it is ininitialised. Fixes: 1f6f00238abf ("drm/i915/selftests: Drop struct_mutex around lowlevel pggtt allocation") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180704185518.4193-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 208 +++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 10 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 2 +- 7 files changed, 143 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce7d06332884..4f9191ab3e12 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -952,7 +952,7 @@ struct i915_gem_mm { /** * Small stash of WC pages */ - struct pagevec wc_stash; + struct pagestash wc_stash; /** * tmpfs instance used for shmem backed objects diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ccf463ab6562..985ef70d9416 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -374,7 +374,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (USES_FULL_PPGTT(dev_priv)) { struct i915_hw_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv, file_priv, ctx->name); + ppgtt = i915_ppgtt_create(dev_priv, file_priv); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ba1aed07d6dc..5fb299590579 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -375,37 +375,70 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } +static void stash_init(struct pagestash *stash) +{ + pagevec_init(&stash->pvec); + spin_lock_init(&stash->lock); +} + +static struct page *stash_pop_page(struct pagestash *stash) +{ + struct page *page = NULL; + + spin_lock(&stash->lock); + if (likely(stash->pvec.nr)) + page = stash->pvec.pages[--stash->pvec.nr]; + spin_unlock(&stash->lock); + + return page; +} + +static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) +{ + int nr; + + spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); + + nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec)); + memcpy(stash->pvec.pages + stash->pvec.nr, + pvec->pages + pvec->nr - nr, + sizeof(pvec->pages[0]) * nr); + stash->pvec.nr += nr; + + spin_unlock(&stash->lock); + + pvec->nr -= nr; +} + static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct pagevec *pvec = &vm->free_pages; - struct pagevec stash; + struct pagevec stack; + struct page *page; if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) i915_gem_shrink_all(vm->i915); - if (likely(pvec->nr)) - return pvec->pages[--pvec->nr]; + page = stash_pop_page(&vm->free_pages); + if (page) + return page; if (!vm->pt_kmap_wc) return alloc_page(gfp); - /* A placeholder for a specific mutex to guard the WC stash */ - lockdep_assert_held(&vm->i915->drm.struct_mutex); - /* Look in our global stash of WC pages... */ - pvec = &vm->i915->mm.wc_stash; - if (likely(pvec->nr)) - return pvec->pages[--pvec->nr]; + page = stash_pop_page(&vm->i915->mm.wc_stash); + if (page) + return page; /* - * Otherwise batch allocate pages to amoritize cost of set_pages_wc. + * Otherwise batch allocate pages to amortize cost of set_pages_wc. * * We have to be careful as page allocation may trigger the shrinker * (via direct reclaim) which will fill up the WC stash underneath us. * So we add our WB pages into a temporary pvec on the stack and merge * them into the WC stash after all the allocations are complete. */ - pagevec_init(&stash); + pagevec_init(&stack); do { struct page *page; @@ -413,59 +446,67 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) if (unlikely(!page)) break; - stash.pages[stash.nr++] = page; - } while (stash.nr < pagevec_space(pvec)); + stack.pages[stack.nr++] = page; + } while (pagevec_space(&stack)); - if (stash.nr) { - int nr = min_t(int, stash.nr, pagevec_space(pvec)); - struct page **pages = stash.pages + stash.nr - nr; + if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { + page = stack.pages[--stack.nr]; - if (nr && !set_pages_array_wc(pages, nr)) { - memcpy(pvec->pages + pvec->nr, - pages, sizeof(pages[0]) * nr); - pvec->nr += nr; - stash.nr -= nr; - } + /* Merge spare WC pages to the global stash */ + stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); - pagevec_release(&stash); + /* Push any surplus WC pages onto the local VM stash */ + if (stack.nr) + stash_push_pagevec(&vm->free_pages, &stack); } - return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL; + /* Return unwanted leftovers */ + if (unlikely(stack.nr)) { + WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); + __pagevec_release(&stack); + } + + return page; } static void vm_free_pages_release(struct i915_address_space *vm, bool immediate) { - struct pagevec *pvec = &vm->free_pages; + struct pagevec *pvec = &vm->free_pages.pvec; + struct pagevec stack; + lockdep_assert_held(&vm->free_pages.lock); GEM_BUG_ON(!pagevec_count(pvec)); if (vm->pt_kmap_wc) { - struct pagevec *stash = &vm->i915->mm.wc_stash; - - /* When we use WC, first fill up the global stash and then + /* + * When we use WC, first fill up the global stash and then * only if full immediately free the overflow. */ + stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); - lockdep_assert_held(&vm->i915->drm.struct_mutex); - if (pagevec_space(stash)) { - do { - stash->pages[stash->nr++] = - pvec->pages[--pvec->nr]; - if (!pvec->nr) - return; - } while (pagevec_space(stash)); + /* + * As we have made some room in the VM's free_pages, + * we can wait for it to fill again. Unless we are + * inside i915_address_space_fini() and must + * immediately release the pages! + */ + if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) + return; - /* As we have made some room in the VM's free_pages, - * we can wait for it to fill again. Unless we are - * inside i915_address_space_fini() and must - * immediately release the pages! - */ - if (!immediate) - return; - } + /* + * We have to drop the lock to allow ourselves to sleep, + * so take a copy of the pvec and clear the stash for + * others to use it as we sleep. + */ + stack = *pvec; + pagevec_reinit(pvec); + spin_unlock(&vm->free_pages.lock); + pvec = &stack; set_pages_array_wb(pvec->pages, pvec->nr); + + spin_lock(&vm->free_pages.lock); } __pagevec_release(pvec); @@ -481,8 +522,38 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page) * unconditional might_sleep() for everybody. */ might_sleep(); - if (!pagevec_add(&vm->free_pages, page)) + spin_lock(&vm->free_pages.lock); + if (!pagevec_add(&vm->free_pages.pvec, page)) vm_free_pages_release(vm, false); + spin_unlock(&vm->free_pages.lock); +} + +static void i915_address_space_init(struct i915_address_space *vm, + struct drm_i915_private *dev_priv) +{ + GEM_BUG_ON(!vm->total); + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + + stash_init(&vm->free_pages); + + INIT_LIST_HEAD(&vm->active_list); + INIT_LIST_HEAD(&vm->inactive_list); + INIT_LIST_HEAD(&vm->unbound_list); + + list_add_tail(&vm->global_link, &dev_priv->vm_list); +} + +static void i915_address_space_fini(struct i915_address_space *vm) +{ + spin_lock(&vm->free_pages.lock); + if (pagevec_count(&vm->free_pages.pvec)) + vm_free_pages_release(vm, true); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); + spin_unlock(&vm->free_pages.lock); + + drm_mm_takedown(&vm->mm); + list_del(&vm->global_link); } static int __setup_page_dma(struct i915_address_space *vm, @@ -1562,6 +1633,8 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) if (!ppgtt) return ERR_PTR(-ENOMEM); + kref_init(&ppgtt->ref); + ppgtt->vm.i915 = i915; ppgtt->vm.dma = &i915->drm.pdev->dev; @@ -1569,6 +1642,8 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) 1ULL << 48 : 1ULL << 32; + i915_address_space_init(&ppgtt->vm, i915); + /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ @@ -2068,11 +2143,15 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) if (!ppgtt) return ERR_PTR(-ENOMEM); + kref_init(&ppgtt->base.ref); + ppgtt->base.vm.i915 = i915; ppgtt->base.vm.dma = &i915->drm.pdev->dev; ppgtt->base.vm.total = I915_PDES * GEN6_PTES * PAGE_SIZE; + i915_address_space_init(&ppgtt->base.vm, i915); + ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; @@ -2105,30 +2184,6 @@ err_free: return ERR_PTR(err); } -static void i915_address_space_init(struct i915_address_space *vm, - struct drm_i915_private *dev_priv, - const char *name) -{ - drm_mm_init(&vm->mm, 0, vm->total); - vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; - - INIT_LIST_HEAD(&vm->active_list); - INIT_LIST_HEAD(&vm->inactive_list); - INIT_LIST_HEAD(&vm->unbound_list); - - list_add_tail(&vm->global_link, &dev_priv->vm_list); - pagevec_init(&vm->free_pages); -} - -static void i915_address_space_fini(struct i915_address_space *vm) -{ - if (pagevec_count(&vm->free_pages)) - vm_free_pages_release(vm, true); - - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); -} - static void gtt_write_workarounds(struct drm_i915_private *dev_priv) { /* This function is for gtt related workarounds. This function is @@ -2199,8 +2254,7 @@ __hw_ppgtt_create(struct drm_i915_private *i915) struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_i915_private *i915, - struct drm_i915_file_private *fpriv, - const char *name) + struct drm_i915_file_private *fpriv) { struct i915_hw_ppgtt *ppgtt; @@ -2208,8 +2262,6 @@ i915_ppgtt_create(struct drm_i915_private *i915, if (IS_ERR(ppgtt)) return ppgtt; - kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->vm, i915, name); ppgtt->vm.file = fpriv; trace_i915_ppgtt_create(&ppgtt->vm); @@ -2788,7 +2840,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) struct i915_hw_ppgtt *ppgtt; int err; - ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); + ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM)); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -2918,7 +2970,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) ggtt->vm.cleanup(&ggtt->vm); - pvec = &dev_priv->mm.wc_stash; + pvec = &dev_priv->mm.wc_stash.pvec; if (pvec->nr) { set_pages_array_wb(pvec->pages, pvec->nr); __pagevec_release(pvec); @@ -3518,6 +3570,8 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; + stash_init(&dev_priv->mm.wc_stash); + INIT_LIST_HEAD(&dev_priv->vm_list); /* Note that we use page colouring to enforce a guard page at the @@ -3526,7 +3580,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - i915_address_space_init(&ggtt->vm, dev_priv, "[global]"); + i915_address_space_init(&ggtt->vm, dev_priv); if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9a4824cae68d..f298e72b79ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -270,6 +270,11 @@ struct i915_vma_ops { void (*clear_pages)(struct i915_vma *vma); }; +struct pagestash { + spinlock_t lock; + struct pagevec pvec; +}; + struct i915_address_space { struct drm_mm mm; struct drm_i915_private *i915; @@ -324,7 +329,7 @@ struct i915_address_space { */ struct list_head unbound_list; - struct pagevec free_pages; + struct pagestash free_pages; bool pt_kmap_wc; /* FIXME: Need a more generic return type */ @@ -615,8 +620,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *fpriv, - const char *name); + struct drm_i915_file_private *fpriv); void i915_ppgtt_close(struct i915_address_space *vm); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index b5e87fcdcdae..39699939587d 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1694,7 +1694,7 @@ int i915_gem_huge_page_mock_selftests(void) dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock"); + ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV)); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index a7956ecc3e1f..4bfb0537f9be 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1004,7 +1004,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, return PTR_ERR(file); mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv, "mock"); + ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 6a7f4da7b523..0da5b8c6d912 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -124,7 +124,7 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->vm.vma_ops.set_pages = ggtt_set_pages; ggtt->vm.vma_ops.clear_pages = clear_pages; - i915_address_space_init(&ggtt->vm, i915, "global"); + i915_address_space_init(&ggtt->vm, i915); } void mock_fini_ggtt(struct drm_i915_private *i915) From cef08fdc743c4211aeba69dd23408093d59da241 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jul 2018 07:56:51 +0100 Subject: [PATCH 062/114] drm/i915: Remove defunct i915->vm_list No longer used and can be removed. One less global that currently demands struct_mutex protection. References: e9e7dc4144cd ("drm/i915/gtt: Make gen6 page directories evictable") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180705065653.20449-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_gtt.c | 5 ----- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/selftests/mock_gtt.c | 3 --- 4 files changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4f9191ab3e12..e236016eef7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1744,7 +1744,6 @@ struct drm_i915_private { struct drm_atomic_state *modeset_restore_state; struct drm_modeset_acquire_ctx reset_ctx; - struct list_head vm_list; /* Global list of all address spaces */ struct i915_ggtt ggtt; /* VM representing the global address space */ struct i915_gem_mm mm; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5fb299590579..a9330de886f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -540,8 +540,6 @@ static void i915_address_space_init(struct i915_address_space *vm, INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); - - list_add_tail(&vm->global_link, &dev_priv->vm_list); } static void i915_address_space_fini(struct i915_address_space *vm) @@ -553,7 +551,6 @@ static void i915_address_space_fini(struct i915_address_space *vm) spin_unlock(&vm->free_pages.lock); drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); } static int __setup_page_dma(struct i915_address_space *vm, @@ -3572,8 +3569,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) stash_init(&dev_priv->mm.wc_stash); - INIT_LIST_HEAD(&dev_priv->vm_list); - /* Note that we use page colouring to enforce a guard page at the * end of the address space. This is required as the CS may prefetch * beyond the end of the batch buffer, across the page boundary, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f298e72b79ca..feda45dfd481 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -288,7 +288,6 @@ struct i915_address_space { * assign blame. */ struct drm_i915_file_private *file; - struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 0da5b8c6d912..07df5c399ec1 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -74,7 +74,6 @@ mock_ppgtt(struct drm_i915_private *i915, INIT_LIST_HEAD(&ppgtt->vm.inactive_list); INIT_LIST_HEAD(&ppgtt->vm.unbound_list); - INIT_LIST_HEAD(&ppgtt->vm.global_link); drm_mm_init(&ppgtt->vm.mm, 0, ppgtt->vm.total); ppgtt->vm.clear_range = nop_clear_range; @@ -106,8 +105,6 @@ void mock_init_ggtt(struct drm_i915_private *i915) { struct i915_ggtt *ggtt = &i915->ggtt; - INIT_LIST_HEAD(&i915->vm_list); - ggtt->vm.i915 = i915; ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE); From eae4c9445354602926ff7cc4702c2516e2485a94 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jul 2018 07:56:52 +0100 Subject: [PATCH 063/114] drm/i915/selftests: Use full release for local ppgtt allocation We can now use the full release mechanism (i915_ppgtt_put) for our local ppgtt allocation in igt_ppgtt_alloc. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180705065653.20449-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 4bfb0537f9be..e108fe4e0fd9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -202,9 +202,8 @@ static int igt_ppgtt_alloc(void *arg) err_ppgtt_cleanup: mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt->vm.cleanup(&ppgtt->vm); + i915_ppgtt_put(ppgtt); mutex_unlock(&dev_priv->drm.struct_mutex); - kfree(ppgtt); return err; } From 0f17d5dd2199555830482b638a5fc8bf915f2f10 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jul 2018 07:56:53 +0100 Subject: [PATCH 064/114] drm/i915/selftests: Replace open-coded i915_address_space_init() Use i915_address_space_init() rather than open-code it inside mock_ppgtt() as we will forget to keep it in sync. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180705065653.20449-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_gtt.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 07df5c399ec1..a140ea5c3a7c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -70,11 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); - INIT_LIST_HEAD(&ppgtt->vm.active_list); - INIT_LIST_HEAD(&ppgtt->vm.inactive_list); - INIT_LIST_HEAD(&ppgtt->vm.unbound_list); - - drm_mm_init(&ppgtt->vm.mm, 0, ppgtt->vm.total); + i915_address_space_init(&ppgtt->vm, i915); ppgtt->vm.clear_range = nop_clear_range; ppgtt->vm.insert_page = mock_insert_page; From 27efd2566cb89b7909366dbb88add8fb1e3d24e2 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 18:31:48 +0530 Subject: [PATCH 065/114] drm/i915/icl: Define register for DSI PLL This patch adds the new registers and corresponding bit definitions which will be used for programming/enable DSI PLL. v2: Review comments from Jani N - Fix spaces while defining ICL_ESC_CLK_DIV_MASK - Define shift and mask for bitfields. Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1530795727-28644-2-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 097000520a80..dc953ee7e3b3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9538,6 +9538,21 @@ enum skl_power_gate { #define MIPIO_TXESC_CLK_DIV2 _MMIO(0x160008) #define GLK_TX_ESC_CLK_DIV2_MASK 0x3FF +#define _ICL_DSI_ESC_CLK_DIV0 0x6b090 +#define _ICL_DSI_ESC_CLK_DIV1 0x6b890 +#define ICL_DSI_ESC_CLK_DIV(port) _MMIO_PORT((port), \ + _ICL_DSI_ESC_CLK_DIV0, \ + _ICL_DSI_ESC_CLK_DIV1) +#define _ICL_DPHY_ESC_CLK_DIV0 0x162190 +#define _ICL_DPHY_ESC_CLK_DIV1 0x6C190 +#define ICL_DPHY_ESC_CLK_DIV(port) _MMIO_PORT((port), \ + _ICL_DPHY_ESC_CLK_DIV0, \ + _ICL_DPHY_ESC_CLK_DIV1) +#define ICL_BYTE_CLK_PER_ESC_CLK_MASK (0x1f << 16) +#define ICL_BYTE_CLK_PER_ESC_CLK_SHIFT 16 +#define ICL_ESC_CLK_DIV_MASK 0x1ff +#define ICL_ESC_CLK_DIV_SHIFT 0 + /* Gen4+ Timestamp and Pipe Frame time stamp registers */ #define GEN4_TIMESTAMP _MMIO(0x2358) #define ILK_TIMESTAMP_HI _MMIO(0x70070) From f0d759f038dcced7dfb756dc54c224f09ad062a2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 Jun 2018 17:35:41 -0500 Subject: [PATCH 066/114] drm/i915: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 141432 Addresses-Coverity-ID: 141433 Addresses-Coverity-ID: 141434 Addresses-Coverity-ID: 141435 Addresses-Coverity-ID: 141436 Addresses-Coverity-ID: 1357360 Addresses-Coverity-ID: 1357403 Addresses-Coverity-ID: 1357433 Addresses-Coverity-ID: 1392622 Addresses-Coverity-ID: 1415273 Addresses-Coverity-ID: 1435752 Addresses-Coverity-ID: 1441500 Addresses-Coverity-ID: 1454596 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180628223541.GA17665@embeddedor.com --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/intel_cdclk.c | 5 +++++ drivers/gpu/drm/i915/intel_ddi.c | 1 + drivers/gpu/drm/i915/intel_display.c | 2 ++ drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 +++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + drivers/gpu/drm/i915/intel_sdvo.c | 6 ++++++ 10 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 048b722cf27c..0c0a1a959d0b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2120,6 +2120,7 @@ err: */ if (!i915_terminally_wedged(&dev_priv->gpu_error)) return VM_FAULT_SIGBUS; + /* else: fall through */ case -EAGAIN: /* * EAGAIN means the gpu is hung and we'll wait for the error diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 79a347295e00..055f8687776d 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -254,6 +254,7 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { default: MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + /* fall through */ case GEN7_STOLEN_RESERVED_1M: *size = 1024 * 1024; break; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index bf9433d7964d..29075c763428 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -316,6 +316,7 @@ static void pnv_get_cdclk(struct drm_i915_private *dev_priv, break; default: DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + /* fall through */ case GC_DISPLAY_CLOCK_133_MHZ_PNV: cdclk_state->cdclk = 133333; break; @@ -1797,6 +1798,7 @@ static int icl_calc_cdclk(int min_cdclk, unsigned int ref) switch (ref) { default: MISSING_CASE(ref); + /* fall through */ case 24000: ranges = ranges_24; break; @@ -1824,6 +1826,7 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 307200: case 556800: case 652800: @@ -1896,6 +1899,7 @@ static u8 icl_calc_voltage_level(int cdclk) return 1; default: MISSING_CASE(cdclk); + /* fall through */ case 652800: case 648000: return 2; @@ -1913,6 +1917,7 @@ static void icl_get_cdclk(struct drm_i915_private *dev_priv, switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { default: MISSING_CASE(val); + /* fall through */ case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: cdclk_state->ref = 24000; break; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 0319825b725b..c74b01a52082 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1069,6 +1069,7 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, switch (id) { default: MISSING_CASE(id); + /* fall through */ case DPLL_ID_ICL_DPLL0: case DPLL_ID_ICL_DPLL1: return DDI_CLK_SEL_NONE; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ce7646265b50..12102224f47a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9360,6 +9360,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, switch (tmp & TRANS_DDI_EDP_INPUT_MASK) { default: WARN(1, "unknown pipe linked to edp transcoder\n"); + /* fall through */ case TRANS_DDI_EDP_INPUT_A_ONOFF: case TRANS_DDI_EDP_INPUT_A_ON: trans_edp_pipe = PIPE_A; @@ -11024,6 +11025,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) case INTEL_OUTPUT_DDI: if (WARN_ON(!HAS_DDI(to_i915(dev)))) break; + /* else: fall through */ case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_EDP: diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 57342364fd30..058696b7d6c3 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2566,6 +2566,7 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, switch (index) { default: MISSING_CASE(index); + /* fall through */ case 0: link_clock = 540000; break; @@ -2639,6 +2640,7 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, switch (div1) { default: MISSING_CASE(div1); + /* fall through */ case 2: hsdiv = 0; break; @@ -2903,6 +2905,7 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) switch (id) { default: MISSING_CASE(id); + /* fall through */ case DPLL_ID_ICL_DPLL0: case DPLL_ID_ICL_DPLL1: return CNL_DPLL_ENABLE(id); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a8073eb02ffa..cdfdebc11ef8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1253,6 +1253,7 @@ enc_to_dig_port(struct drm_encoder *encoder) switch (intel_encoder->type) { case INTEL_OUTPUT_DDI: WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + /* fall through */ case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: case INTEL_OUTPUT_HDMI: diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 01862884a436..478c928912c4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -229,6 +229,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) break; default: MISSING_CASE(class); + /* fall through */ case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d81b2cfe1c5e..6b5aa3b074ec 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -3212,6 +3212,7 @@ static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv, switch (val & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) { default: MISSING_CASE(val); + /* fall through */ case VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0: procmon = &cnl_procmon_values[PROCMON_0_85V_DOT_0]; break; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 9d9229a1f36c..36f81a96b8f6 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1340,6 +1340,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, switch (crtc_state->pixel_multiplier) { default: WARN(1, "unknown pixel multiplier specified\n"); + /* fall through */ case 1: rate = SDVO_CLOCK_RATE_MULT_1X; break; case 2: rate = SDVO_CLOCK_RATE_MULT_2X; break; case 4: rate = SDVO_CLOCK_RATE_MULT_4X; break; @@ -2316,14 +2317,19 @@ intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) switch (sdvo->controlled_output) { case SDVO_OUTPUT_LVDS1: mask |= SDVO_OUTPUT_LVDS1; + /* fall through */ case SDVO_OUTPUT_LVDS0: mask |= SDVO_OUTPUT_LVDS0; + /* fall through */ case SDVO_OUTPUT_TMDS1: mask |= SDVO_OUTPUT_TMDS1; + /* fall through */ case SDVO_OUTPUT_TMDS0: mask |= SDVO_OUTPUT_TMDS0; + /* fall through */ case SDVO_OUTPUT_RGB1: mask |= SDVO_OUTPUT_RGB1; + /* fall through */ case SDVO_OUTPUT_RGB0: mask |= SDVO_OUTPUT_RGB0; break; From bb9e8755a4259a306cd1691b4e17c4c485d81c9e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jul 2018 16:47:56 +0100 Subject: [PATCH 067/114] drm/i915/selftests: Fixup recursive MI_BB_START for gen3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no magic bit0 in MI_BB_START for gen3, it's the same dword length parameter as elsewhere and needs to be zero. v2: Same bug in both live_requests and live_hanghcheck. References: https://bugs.freedesktop.org/show_bug.cgi?id=107132 Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180705154756.5533-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/selftests/i915_request.c | 5 +---- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 521ae4a90ddf..e44aa3335d9e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -594,11 +594,8 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) } else if (gen >= 6) { *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; *cmd++ = lower_32_bits(vma->node.start); - } else if (gen >= 4) { - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(vma->node.start); } else { - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cmd++ = lower_32_bits(vma->node.start); } *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index fe7d3190ebfe..72547aa24538 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -193,7 +193,7 @@ static int emit_recurse_batch(struct hang *h, batch += 1024 / sizeof(*batch); *batch++ = MI_ARB_CHECK; - *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ From 9757973f41b52cd8fa7850100b8b0ae85261bcc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 21 Jun 2018 20:46:58 +0300 Subject: [PATCH 068/114] drm/i915: Remove pointless if-else from sdvo code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return value is a bool so we can just return the result of the biwise AND. The compiler will take care of the rest. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180621174658.18823-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_sdvo.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 36f81a96b8f6..5417c54f67da 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1401,10 +1401,7 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector) intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs); - if (active_outputs & intel_sdvo_connector->output_flag) - return true; - else - return false; + return active_outputs & intel_sdvo_connector->output_flag; } bool intel_sdvo_port_enabled(struct drm_i915_private *dev_priv, From 73d8e5fba54f400cd5fe48517dbe6776fb16c2ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jul 2018 18:15:23 +0100 Subject: [PATCH 069/114] drm/i915/selftests: Detect unknown swizzling correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915_gem_detect_bit_6_swizzle() tries to hide unknown swizzling from userspace (and ourselves) leaving us with the only clue inside i915->quirks & QUIRK_PIN_SWIZZLED_PAGES. If we see this bit set, it means that we really have no clue as to what the swizzle pattern is being used in any one page and so cannot compute what the reference value should be in our tiling selftests. We have to skip the test. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107133 Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180705171523.18462-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 77dd7a510ea6..885153268968 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -347,6 +347,14 @@ static int igt_partial_tiling(void *arg) unsigned int pitch; struct tile tile; + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + /* + * The swizzling pattern is actually unknown as it + * varies based on physical address of each page. + * See i915_gem_detect_bit_6_swizzle(). + */ + break; + tile.tiling = tiling; switch (tiling) { case I915_TILING_X: @@ -357,8 +365,8 @@ static int igt_partial_tiling(void *arg) break; } - if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || - tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN); + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; if (INTEL_GEN(i915) <= 2) { From c4e4f4545bdc3939ae9fff1da55ed1e28776dafe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jul 2018 16:02:14 +0100 Subject: [PATCH 070/114] drm/i915/selftests: Fail hangcheck testing if the GPU is wedged If the GPU is irrecoverably wedged on startup, it means that it failed on initialisation and we have already tried to reset it but failed. We can ignore all further testing, as it is already dead. Failing early, prevents us from slowly failing in our endeavours later and timing out. Signed-off-by: Chris Wilson Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180705150214.28316-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 72547aa24538..5cb808dc5b50 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1243,6 +1243,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (!intel_has_gpu_reset(i915)) return 0; + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; /* we're long past hope of a successful reset */ + intel_runtime_pm_get(i915); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); From ca3589c11815cb1587a908c46e8f04a05a193254 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Jul 2018 16:25:07 +0300 Subject: [PATCH 071/114] drm/i915/dsi: rename the current DSI files based on first platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from ICL or gen 11 we have a new DSI block which requires completely different programming from the current implementation. Having them in the same file would be confusing. Rename the current DSI and DSI PLL implementation files as vlv_dsi.c and vlv_dsi_pll.c. No functional changes. v2: use "gen7" prefix. v3: use "vlv" prefix. References: https://patchwork.freedesktop.org/series/44823/ Cc: Madhav Chauhan Cc: Daniel Vetter Cc: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Reviewed-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180705132509.12881-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/Makefile | 6 +++--- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_dsi.h | 4 ++-- drivers/gpu/drm/i915/{intel_dsi.c => vlv_dsi.c} | 0 drivers/gpu/drm/i915/{intel_dsi_pll.c => vlv_dsi_pll.c} | 0 5 files changed, 6 insertions(+), 6 deletions(-) rename drivers/gpu/drm/i915/{intel_dsi.c => vlv_dsi.c} (100%) rename drivers/gpu/drm/i915/{intel_dsi_pll.c => vlv_dsi_pll.c} (100%) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4c6adae23e18..e7fedb83aafc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -141,9 +141,7 @@ i915-y += dvo_ch7017.o \ intel_dp_link_training.o \ intel_dp_mst.o \ intel_dp.o \ - intel_dsi.o \ intel_dsi_dcs_backlight.o \ - intel_dsi_pll.o \ intel_dsi_vbt.o \ intel_dvo.o \ intel_hdmi.o \ @@ -152,7 +150,9 @@ i915-y += dvo_ch7017.o \ intel_lvds.o \ intel_panel.o \ intel_sdvo.o \ - intel_tv.o + intel_tv.o \ + vlv_dsi.o \ + vlv_dsi_pll.o # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cdfdebc11ef8..88d3581124e8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1730,7 +1730,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); /* intel_dp_mst.c */ int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id); void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port); -/* intel_dsi.c */ +/* vlv_dsi.c */ void intel_dsi_init(struct drm_i915_private *dev_priv); /* intel_dsi_dcs_backlight.c */ diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 7afeb9580f41..dce9bcc2de53 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -129,11 +129,11 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) return container_of(encoder, struct intel_dsi, base.base); } -/* intel_dsi.c */ +/* vlv_dsi.c */ void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); -/* intel_dsi_pll.c */ +/* vlv_dsi_pll.c */ bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c similarity index 100% rename from drivers/gpu/drm/i915/intel_dsi.c rename to drivers/gpu/drm/i915/vlv_dsi.c diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/vlv_dsi_pll.c similarity index 100% rename from drivers/gpu/drm/i915/intel_dsi_pll.c rename to drivers/gpu/drm/i915/vlv_dsi_pll.c From e518634b4366d2fe874d36e6394693b1be0a9f44 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Jul 2018 16:25:08 +0300 Subject: [PATCH 072/114] drm/i915/dsi: use vlv and bxt prefixes for the global DSI functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid confusion with the functions to be added for the new ICL or gen 11 DSI implementation by renaming the current DSI functions. While at it, permutate the words in the function names to make them all start with "vlv_dsi" or "vlv_dsi_pll" etc. Reduce the platform abstractions in the PLL file while at it, moving the checks to vlv_dsi.c instead, where we typically already have the necessary if ladders. Leave the static functions as-is for now; they could be renamed later if needed. No functional changes. v2: use "gen7" prefix. v3: use "vlv" and "bxt" prefixes, reduce the abstractions. References: https://patchwork.freedesktop.org/series/44823/ Cc: Madhav Chauhan Cc: Daniel Vetter Cc: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Reviewed-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180705132509.12881-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 +- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_dsi.h | 30 +++++---- drivers/gpu/drm/i915/intel_dsi_vbt.c | 2 +- drivers/gpu/drm/i915/vlv_dsi.c | 61 ++++++++++------- drivers/gpu/drm/i915/vlv_dsi_pll.c | 98 +++++----------------------- 6 files changed, 80 insertions(+), 119 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 12102224f47a..d33033abbf18 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9416,7 +9416,7 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, * registers/MIPI[BXT]. We can break out here early, since we * need the same DSI PLL to be enabled for both DSI ports. */ - if (!intel_dsi_pll_is_enabled(dev_priv)) + if (!bxt_dsi_pll_is_enabled(dev_priv)) break; /* XXX: this works for video mode only */ @@ -14092,7 +14092,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); - intel_dsi_init(dev_priv); + vlv_dsi_init(dev_priv); } else if (HAS_DDI(dev_priv)) { int found; @@ -14198,7 +14198,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_hdmi_init(dev_priv, CHV_HDMID, PORT_D); } - intel_dsi_init(dev_priv); + vlv_dsi_init(dev_priv); } else if (!IS_GEN2(dev_priv) && !IS_PINEVIEW(dev_priv)) { bool found = false; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 88d3581124e8..3034477b79ff 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1731,7 +1731,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id); void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port); /* vlv_dsi.c */ -void intel_dsi_init(struct drm_i915_private *dev_priv); +void vlv_dsi_init(struct drm_i915_private *dev_priv); /* intel_dsi_dcs_backlight.c */ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index dce9bcc2de53..ad7c1cb32983 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -130,20 +130,28 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) } /* vlv_dsi.c */ -void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); +void vlv_dsi_wait_for_fifo_empty(struct intel_dsi *intel_dsi, enum port port); enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); /* vlv_dsi_pll.c */ -bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); -int intel_compute_dsi_pll(struct intel_encoder *encoder, - struct intel_crtc_state *config); -void intel_enable_dsi_pll(struct intel_encoder *encoder, - const struct intel_crtc_state *config); -void intel_disable_dsi_pll(struct intel_encoder *encoder); -u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, - struct intel_crtc_state *config); -void intel_dsi_reset_clocks(struct intel_encoder *encoder, - enum port port); +int vlv_dsi_pll_compute(struct intel_encoder *encoder, + struct intel_crtc_state *config); +void vlv_dsi_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *config); +void vlv_dsi_pll_disable(struct intel_encoder *encoder); +u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, + struct intel_crtc_state *config); +void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); + +bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); +int bxt_dsi_pll_compute(struct intel_encoder *encoder, + struct intel_crtc_state *config); +void bxt_dsi_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *config); +void bxt_dsi_pll_disable(struct intel_encoder *encoder); +u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, + struct intel_crtc_state *config); +void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); /* intel_dsi_vbt.c */ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 4d6ffa7b3e7b..ac83d6b89ae0 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -181,7 +181,7 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, break; } - wait_for_dsi_fifo_empty(intel_dsi, port); + vlv_dsi_wait_for_fifo_empty(intel_dsi, port); out: data += len; diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 3b7acb5a70b3..0aa99eeae271 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -69,7 +69,7 @@ enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) } } -void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) +void vlv_dsi_wait_for_fifo_empty(struct intel_dsi *intel_dsi, enum port port) { struct drm_encoder *encoder = &intel_dsi->base.base; struct drm_device *dev = encoder->dev; @@ -342,11 +342,15 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, pipe_config->cpu_transcoder = TRANSCODER_DSI_C; else pipe_config->cpu_transcoder = TRANSCODER_DSI_A; - } - ret = intel_compute_dsi_pll(encoder, pipe_config); - if (ret) - return false; + ret = bxt_dsi_pll_compute(encoder, pipe_config); + if (ret) + return false; + } else { + ret = vlv_dsi_pll_compute(encoder, pipe_config); + if (ret) + return false; + } pipe_config->clock_set = true; @@ -810,8 +814,13 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. */ - intel_disable_dsi_pll(encoder); - intel_enable_dsi_pll(encoder, pipe_config); + if (IS_GEN9_LP(dev_priv)) { + bxt_dsi_pll_disable(encoder); + bxt_dsi_pll_enable(encoder, pipe_config); + } else { + vlv_dsi_pll_disable(encoder); + vlv_dsi_pll_enable(encoder, pipe_config); + } if (IS_BROXTON(dev_priv)) { /* Add MIPI IO reset programming for modeset */ @@ -949,7 +958,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, if (is_vid_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); + vlv_dsi_wait_for_fifo_empty(intel_dsi, port); intel_dsi_port_disable(encoder); usleep_range(2000, 5000); @@ -979,11 +988,13 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, val & ~MIPIO_RST_CTRL); } - intel_disable_dsi_pll(encoder); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + if (IS_GEN9_LP(dev_priv)) { + bxt_dsi_pll_disable(encoder); + } else { u32 val; + vlv_dsi_pll_disable(encoder); + val = I915_READ(DSPCLK_GATE_D); val &= ~DPOUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, val); @@ -1024,7 +1035,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, * configuration, otherwise accessing DSI registers will hang the * machine. See BSpec North Display Engine registers/MIPI[BXT]. */ - if (IS_GEN9_LP(dev_priv) && !intel_dsi_pll_is_enabled(dev_priv)) + if (IS_GEN9_LP(dev_priv) && !bxt_dsi_pll_is_enabled(dev_priv)) goto out_put_power; /* XXX: this only works for one DSI output */ @@ -1247,16 +1258,19 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(dev_priv)) { bxt_dsi_get_pipe_config(encoder, pipe_config); + pclk = bxt_dsi_get_pclk(encoder, pipe_config->pipe_bpp, + pipe_config); + } else { + pclk = vlv_dsi_get_pclk(encoder, pipe_config->pipe_bpp, + pipe_config); + } - pclk = intel_dsi_get_pclk(encoder, pipe_config->pipe_bpp, - pipe_config); - if (!pclk) - return; - - pipe_config->base.adjusted_mode.crtc_clock = pclk; - pipe_config->port_clock = pclk; + if (pclk) { + pipe_config->base.adjusted_mode.crtc_clock = pclk; + pipe_config->port_clock = pclk; + } } static enum drm_mode_status @@ -1590,7 +1604,10 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) /* Panel commands can be sent when clock is in LP11 */ I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - intel_dsi_reset_clocks(encoder, port); + if (IS_GEN9_LP(dev_priv)) + bxt_dsi_reset_clocks(encoder, port); + else + vlv_dsi_reset_clocks(encoder, port); I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); val = I915_READ(MIPI_DSI_FUNC_PRG(port)); @@ -1713,7 +1730,7 @@ static void intel_dsi_add_properties(struct intel_connector *connector) } } -void intel_dsi_init(struct drm_i915_private *dev_priv) +void vlv_dsi_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; struct intel_dsi *intel_dsi; diff --git a/drivers/gpu/drm/i915/vlv_dsi_pll.c b/drivers/gpu/drm/i915/vlv_dsi_pll.c index 2ff2ee7f3b78..a132a8037ecc 100644 --- a/drivers/gpu/drm/i915/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/vlv_dsi_pll.c @@ -111,8 +111,8 @@ static int dsi_calc_mnp(struct drm_i915_private *dev_priv, * XXX: The muxing and gating is hard coded for now. Need to add support for * sharing PLLs with two DSI outputs. */ -static int vlv_compute_dsi_pll(struct intel_encoder *encoder, - struct intel_crtc_state *config) +int vlv_dsi_pll_compute(struct intel_encoder *encoder, + struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -142,8 +142,8 @@ static int vlv_compute_dsi_pll(struct intel_encoder *encoder, return 0; } -static void vlv_enable_dsi_pll(struct intel_encoder *encoder, - const struct intel_crtc_state *config) +void vlv_dsi_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -175,7 +175,7 @@ static void vlv_enable_dsi_pll(struct intel_encoder *encoder, DRM_DEBUG_KMS("DSI PLL locked\n"); } -static void vlv_disable_dsi_pll(struct intel_encoder *encoder) +void vlv_dsi_pll_disable(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 tmp; @@ -192,7 +192,7 @@ static void vlv_disable_dsi_pll(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) +bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) { bool enabled; u32 val; @@ -229,7 +229,7 @@ static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) return enabled; } -static void bxt_disable_dsi_pll(struct intel_encoder *encoder) +void bxt_dsi_pll_disable(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 val; @@ -261,8 +261,8 @@ static void assert_bpp_mismatch(enum mipi_dsi_pixel_format fmt, int pipe_bpp) bpp, pipe_bpp); } -static u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, - struct intel_crtc_state *config) +u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, + struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -327,8 +327,8 @@ static u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, return pclk; } -static u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, - struct intel_crtc_state *config) +u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, + struct intel_crtc_state *config) { u32 pclk; u32 dsi_clk; @@ -357,16 +357,7 @@ static u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, return pclk; } -u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, - struct intel_crtc_state *config) -{ - if (IS_GEN9_LP(to_i915(encoder->base.dev))) - return bxt_dsi_get_pclk(encoder, pipe_bpp, config); - else - return vlv_dsi_get_pclk(encoder, pipe_bpp, config); -} - -static void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) +void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) { u32 temp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -480,8 +471,8 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); } -static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, - struct intel_crtc_state *config) +int bxt_dsi_pll_compute(struct intel_encoder *encoder, + struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -528,8 +519,8 @@ static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, return 0; } -static void gen9lp_enable_dsi_pll(struct intel_encoder *encoder, - const struct intel_crtc_state *config) +void bxt_dsi_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -568,52 +559,7 @@ static void gen9lp_enable_dsi_pll(struct intel_encoder *encoder, DRM_DEBUG_KMS("DSI PLL locked\n"); } -bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) -{ - if (IS_GEN9_LP(dev_priv)) - return bxt_dsi_pll_is_enabled(dev_priv); - - MISSING_CASE(INTEL_DEVID(dev_priv)); - - return false; -} - -int intel_compute_dsi_pll(struct intel_encoder *encoder, - struct intel_crtc_state *config) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return vlv_compute_dsi_pll(encoder, config); - else if (IS_GEN9_LP(dev_priv)) - return gen9lp_compute_dsi_pll(encoder, config); - - return -ENODEV; -} - -void intel_enable_dsi_pll(struct intel_encoder *encoder, - const struct intel_crtc_state *config) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_enable_dsi_pll(encoder, config); - else if (IS_GEN9_LP(dev_priv)) - gen9lp_enable_dsi_pll(encoder, config); -} - -void intel_disable_dsi_pll(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_disable_dsi_pll(encoder); - else if (IS_GEN9_LP(dev_priv)) - bxt_disable_dsi_pll(encoder); -} - -static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, - enum port port) +void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) { u32 tmp; struct drm_device *dev = encoder->base.dev; @@ -638,13 +584,3 @@ static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } - -void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (IS_GEN9_LP(dev_priv)) - gen9lp_dsi_reset_clocks(encoder, port); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_dsi_reset_clocks(encoder, port); -} From 012bf847d1396e4cf2503e298ca49b3dc4591755 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Jul 2018 16:25:09 +0300 Subject: [PATCH 073/114] drm/i915/dsi: update some of the platform based checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the more customary order of latest platform first, and don't bother with an if in the last branch. Cc: Madhav Chauhan Cc: Daniel Vetter Cc: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Reviewed-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180705132509.12881-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/vlv_dsi.c | 56 ++++++++++++++++------------------ 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 0aa99eeae271..435a2c35ee8c 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -550,12 +550,12 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_dsi_device_ready(encoder); - else if (IS_BROXTON(dev_priv)) - bxt_dsi_device_ready(encoder); - else if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) glk_dsi_device_ready(encoder); + else if (IS_GEN9_LP(dev_priv)) + bxt_dsi_device_ready(encoder); + else + vlv_dsi_device_ready(encoder); } static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) @@ -938,11 +938,10 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || - IS_BROXTON(dev_priv)) - vlv_dsi_clear_device_ready(encoder); - else if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) glk_dsi_clear_device_ready(encoder); + else + vlv_dsi_clear_device_ready(encoder); } static void intel_dsi_post_disable(struct intel_encoder *encoder, @@ -1599,23 +1598,24 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) enum port port; u32 val; - if (!IS_GEMINILAKE(dev_priv)) { - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + if (IS_GEMINILAKE(dev_priv)) + return; - if (IS_GEN9_LP(dev_priv)) - bxt_dsi_reset_clocks(encoder, port); - else - vlv_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - val = I915_READ(MIPI_DSI_FUNC_PRG(port)); - val &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + if (IS_GEN9_LP(dev_priv)) + bxt_dsi_reset_clocks(encoder, port); + else + vlv_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); - } + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); } } @@ -1747,14 +1747,10 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) if (!intel_bios_is_dsi_present(dev_priv, &port)) return; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->mipi_mmio_base = VLV_MIPI_BASE; - } else if (IS_GEN9_LP(dev_priv)) { + if (IS_GEN9_LP(dev_priv)) dev_priv->mipi_mmio_base = BXT_MIPI_BASE; - } else { - DRM_ERROR("Unsupported Mipi device to reg base"); - return; - } + else + dev_priv->mipi_mmio_base = VLV_MIPI_BASE; intel_dsi = kzalloc(sizeof(*intel_dsi), GFP_KERNEL); if (!intel_dsi) From fcfe0bdcb1911a4ff6ab6b6264e3323745178025 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 19:19:33 +0530 Subject: [PATCH 074/114] drm/i915/icl: Program DSI Escape clock Divider Escape Clock is used for LP communication across the DSI Link. To achieve the constant frequency of the escape clock from the variable DPLL frequency output, a variable divider(M) is needed. This patch programs the same. v2: (Jani N) Don't end line with "(". Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1530798591-2077-3-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/icl_dsi.c | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 drivers/gpu/drm/i915/icl_dsi.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e7fedb83aafc..5794f102f9b8 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -135,6 +135,7 @@ i915-y += dvo_ch7017.o \ dvo_ns2501.o \ dvo_sil164.o \ dvo_tfp410.o \ + icl_dsi.o \ intel_crt.o \ intel_ddi.o \ intel_dp_aux_backlight.o \ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index dc953ee7e3b3..a35f4142cced 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9552,6 +9552,7 @@ enum skl_power_gate { #define ICL_BYTE_CLK_PER_ESC_CLK_SHIFT 16 #define ICL_ESC_CLK_DIV_MASK 0x1ff #define ICL_ESC_CLK_DIV_SHIFT 0 +#define DSI_MAX_ESC_CLK 20000 /* in KHz */ /* Gen4+ Timestamp and Pipe Frame time stamp registers */ #define GEN4_TIMESTAMP _MMIO(0x2358) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c new file mode 100644 index 000000000000..1eb4ac30e400 --- /dev/null +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -0,0 +1,64 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Madhav Chauhan + * Jani Nikula + */ + +#include "intel_dsi.h" + +static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); + u32 afe_clk_khz; /* 8X Clock */ + u32 esc_clk_div_m; + + afe_clk_khz = DIV_ROUND_CLOSEST(intel_dsi->pclk * bpp, + intel_dsi->lane_count); + + esc_clk_div_m = DIV_ROUND_UP(afe_clk_khz, DSI_MAX_ESC_CLK); + + for_each_dsi_port(port, intel_dsi->ports) { + I915_WRITE(ICL_DSI_ESC_CLK_DIV(port), + esc_clk_div_m & ICL_ESC_CLK_DIV_MASK); + POSTING_READ(ICL_DSI_ESC_CLK_DIV(port)); + } + + for_each_dsi_port(port, intel_dsi->ports) { + I915_WRITE(ICL_DPHY_ESC_CLK_DIV(port), + esc_clk_div_m & ICL_ESC_CLK_DIV_MASK); + POSTING_READ(ICL_DPHY_ESC_CLK_DIV(port)); + } +} + +static void __attribute__((unused)) +gen11_dsi_pre_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + /* step3: enable DSI PLL */ + gen11_dsi_program_esc_clk_div(encoder); +} From 21652f3b0d48749e3ba0d332d2b39cc18eacb1c0 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 19:19:34 +0530 Subject: [PATCH 075/114] drm/i915/icl: Define DSI mode ctl register This patch defines DSI IO mode control register and it's bits used while enabling IO power for DSI. Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1530798591-2077-4-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a35f4142cced..eb3b7544a875 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9688,6 +9688,14 @@ enum skl_power_gate { #define _BXT_MIPIC_PORT_CTRL 0x6B8C0 #define BXT_MIPI_PORT_CTRL(tc) _MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL) +/* ICL DSI MODE control */ +#define _ICL_DSI_IO_MODECTL_0 0x6B094 +#define _ICL_DSI_IO_MODECTL_1 0x6B894 +#define ICL_DSI_IO_MODECTL(port) _MMIO_PORT(port, \ + _ICL_DSI_IO_MODECTL_0, \ + _ICL_DSI_IO_MODECTL_1) +#define COMBO_PHY_MODE_DSI (1 << 0) + #define BXT_P_DSI_REGULATOR_CFG _MMIO(0x160020) #define STAP_SELECT (1 << 0) From b1cb21a5f1c668534b25464717c806e141ba500f Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 19:19:35 +0530 Subject: [PATCH 076/114] drm/i915/icl: Enable DSI IO power This patch configures mode of operation for DSI and enable DDI IO power by configuring power well. v2: Use for_each_dsi_port() for power get (Jani N) Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1530798591-2077-5-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/icl_dsi.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 1eb4ac30e400..774ab262c8f3 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -54,11 +54,34 @@ static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder) } } +static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp; + + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(ICL_DSI_IO_MODECTL(port)); + tmp |= COMBO_PHY_MODE_DSI; + I915_WRITE(ICL_DSI_IO_MODECTL(port), tmp); + } + + for_each_dsi_port(port, intel_dsi->ports) { + intel_display_power_get(dev_priv, port == PORT_A ? + POWER_DOMAIN_PORT_DDI_A_IO : + POWER_DOMAIN_PORT_DDI_B_IO); + } +} + static void __attribute__((unused)) gen11_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + /* step2: enable IO power */ + gen11_dsi_enable_io_power(encoder); + /* step3: enable DSI PLL */ gen11_dsi_program_esc_clk_div(encoder); } From 166869b390b6fe763544fe4ae1e01acd28db331a Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 19:19:36 +0530 Subject: [PATCH 077/114] drm/i915/icl: Define PORT_CL_DW_10 register This register used to power down individual lanes for DDI/DSI ports. Bitfields to power up/down various combinations of lanes are also added in this patch. v2: Review comments from Jani N - Use override instead of "override" for bitfields - Define mask for override bitfield - Define PWR_DOWN_LN* macros shifted in place v3: Correct PWR_DOWN_LN_MASK value (Jani N) Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1530798591-2077-6-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eb3b7544a875..3ad5dcb21a7a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1720,6 +1720,26 @@ enum i915_power_well_id { #define ICL_PORT_CL_DW5(port) _MMIO_PORT(port, _ICL_PORT_CL_DW5_A, \ _ICL_PORT_CL_DW5_B) +#define _CNL_PORT_CL_DW10_A 0x162028 +#define _ICL_PORT_CL_DW10_B 0x6c028 +#define ICL_PORT_CL_DW10(port) _MMIO_PORT(port, \ + _CNL_PORT_CL_DW10_A, \ + _ICL_PORT_CL_DW10_B) +#define PG_SEQ_DELAY_OVERRIDE_MASK (3 << 25) +#define PG_SEQ_DELAY_OVERRIDE_SHIFT 25 +#define PG_SEQ_DELAY_OVERRIDE_ENABLE (1 << 24) +#define PWR_UP_ALL_LANES (0x0 << 4) +#define PWR_DOWN_LN_3_2_1 (0xe << 4) +#define PWR_DOWN_LN_3_2 (0xc << 4) +#define PWR_DOWN_LN_3 (0x8 << 4) +#define PWR_DOWN_LN_2_1_0 (0x7 << 4) +#define PWR_DOWN_LN_1_0 (0x3 << 4) +#define PWR_DOWN_LN_1 (0x2 << 4) +#define PWR_DOWN_LN_3_1 (0xa << 4) +#define PWR_DOWN_LN_3_1_0 (0xb << 4) +#define PWR_DOWN_LN_MASK (0xf << 4) +#define PWR_DOWN_LN_SHIFT 4 + #define _PORT_CL1CM_DW9_A 0x162024 #define _PORT_CL1CM_DW9_BC 0x6C024 #define IREF0RC_OFFSET_SHIFT 8 From 45f09f7adc8a10138b158c5805a4d3b20aac611a Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 19:19:37 +0530 Subject: [PATCH 078/114] drm/i915/icl: Power down unused DSI lanes To save power, unused lanes should be powered down using the bitfield of PORT_CL_DW10. v2: Review comments from Jani N - Put default label next to case 4 - Include the shifts in the macros Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1530798591-2077-7-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/icl_dsi.c | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 774ab262c8f3..13830e43a4d1 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -74,6 +74,43 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) } } +static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp; + u32 lane_mask; + + switch (intel_dsi->lane_count) { + case 1: + lane_mask = PWR_DOWN_LN_3_1_0; + break; + case 2: + lane_mask = PWR_DOWN_LN_3_1; + break; + case 3: + lane_mask = PWR_DOWN_LN_3; + break; + case 4: + default: + lane_mask = PWR_UP_ALL_LANES; + break; + } + + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(ICL_PORT_CL_DW10(port)); + tmp &= ~PWR_DOWN_LN_MASK; + I915_WRITE(ICL_PORT_CL_DW10(port), tmp | lane_mask); + } +} + +static void gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder) +{ + /* step 4a: power up all lanes of the DDI used by DSI */ + gen11_dsi_power_up_lanes(encoder); +} + static void __attribute__((unused)) gen11_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, @@ -84,4 +121,7 @@ gen11_dsi_pre_enable(struct intel_encoder *encoder, /* step3: enable DSI PLL */ gen11_dsi_program_esc_clk_div(encoder); + + /* step4: enable DSI port and DPHY */ + gen11_dsi_enable_port_and_phy(encoder); } From d61d1b3bbba1054a7a326c18a3c66f822f9fa338 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 5 Jul 2018 19:19:38 +0530 Subject: [PATCH 079/114] drm/i915/icl: Define AUX lane registers for Port A/B This patch defines AUX lane registers for PORT_PCS_DW1, PORT_TX_DW2, PORT_TX_DW4, PORT_TX_DW5 used during dsi enabling. v2: Review comments from Jani N: - Define _ICL_PORT_PCS_DW1_AUX_A for consistency - Three spaces for bitfield definition. Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1530798591-2077-8-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3ad5dcb21a7a..0424e45f88db 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1796,16 +1796,22 @@ enum i915_power_well_id { _CNL_PORT_PCS_DW1_LN0_D, \ _CNL_PORT_PCS_DW1_LN0_AE, \ _CNL_PORT_PCS_DW1_LN0_F)) + #define _ICL_PORT_PCS_DW1_GRP_A 0x162604 #define _ICL_PORT_PCS_DW1_GRP_B 0x6C604 #define _ICL_PORT_PCS_DW1_LN0_A 0x162804 #define _ICL_PORT_PCS_DW1_LN0_B 0x6C804 +#define _ICL_PORT_PCS_DW1_AUX_A 0x162304 +#define _ICL_PORT_PCS_DW1_AUX_B 0x6c304 #define ICL_PORT_PCS_DW1_GRP(port) _MMIO_PORT(port,\ _ICL_PORT_PCS_DW1_GRP_A, \ _ICL_PORT_PCS_DW1_GRP_B) #define ICL_PORT_PCS_DW1_LN0(port) _MMIO_PORT(port, \ _ICL_PORT_PCS_DW1_LN0_A, \ _ICL_PORT_PCS_DW1_LN0_B) +#define ICL_PORT_PCS_DW1_AUX(port) _MMIO_PORT(port, \ + _ICL_PORT_PCS_DW1_AUX_A, \ + _ICL_PORT_PCS_DW1_AUX_B) #define COMMON_KEEPER_EN (1 << 26) /* CNL Port TX registers */ @@ -1842,16 +1848,23 @@ enum i915_power_well_id { #define _ICL_PORT_TX_DW2_GRP_B 0x6C688 #define _ICL_PORT_TX_DW2_LN0_A 0x162888 #define _ICL_PORT_TX_DW2_LN0_B 0x6C888 +#define _ICL_PORT_TX_DW2_AUX_A 0x162388 +#define _ICL_PORT_TX_DW2_AUX_B 0x6c388 #define ICL_PORT_TX_DW2_GRP(port) _MMIO_PORT(port, \ _ICL_PORT_TX_DW2_GRP_A, \ _ICL_PORT_TX_DW2_GRP_B) #define ICL_PORT_TX_DW2_LN0(port) _MMIO_PORT(port, \ _ICL_PORT_TX_DW2_LN0_A, \ _ICL_PORT_TX_DW2_LN0_B) +#define ICL_PORT_TX_DW2_AUX(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW2_AUX_A, \ + _ICL_PORT_TX_DW2_AUX_B) #define SWING_SEL_UPPER(x) (((x) >> 3) << 15) #define SWING_SEL_UPPER_MASK (1 << 15) #define SWING_SEL_LOWER(x) (((x) & 0x7) << 11) #define SWING_SEL_LOWER_MASK (0x7 << 11) +#define FRC_LATENCY_OPTIM_MASK (0x7 << 8) +#define FRC_LATENCY_OPTIM_VAL(x) ((x) << 8) #define RCOMP_SCALAR(x) ((x) << 0) #define RCOMP_SCALAR_MASK (0xFF << 0) @@ -1867,6 +1880,8 @@ enum i915_power_well_id { #define _ICL_PORT_TX_DW4_LN0_A 0x162890 #define _ICL_PORT_TX_DW4_LN1_A 0x162990 #define _ICL_PORT_TX_DW4_LN0_B 0x6C890 +#define _ICL_PORT_TX_DW4_AUX_A 0x162390 +#define _ICL_PORT_TX_DW4_AUX_B 0x6c390 #define ICL_PORT_TX_DW4_GRP(port) _MMIO_PORT(port, \ _ICL_PORT_TX_DW4_GRP_A, \ _ICL_PORT_TX_DW4_GRP_B) @@ -1875,6 +1890,9 @@ enum i915_power_well_id { _ICL_PORT_TX_DW4_LN0_B) + \ ((ln) * (_ICL_PORT_TX_DW4_LN1_A - \ _ICL_PORT_TX_DW4_LN0_A))) +#define ICL_PORT_TX_DW4_AUX(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW4_AUX_A, \ + _ICL_PORT_TX_DW4_AUX_B) #define LOADGEN_SELECT (1 << 31) #define POST_CURSOR_1(x) ((x) << 12) #define POST_CURSOR_1_MASK (0x3F << 12) @@ -1889,12 +1907,17 @@ enum i915_power_well_id { #define _ICL_PORT_TX_DW5_GRP_B 0x6C694 #define _ICL_PORT_TX_DW5_LN0_A 0x162894 #define _ICL_PORT_TX_DW5_LN0_B 0x6C894 +#define _ICL_PORT_TX_DW5_AUX_A 0x162394 +#define _ICL_PORT_TX_DW5_AUX_B 0x6c394 #define ICL_PORT_TX_DW5_GRP(port) _MMIO_PORT(port, \ _ICL_PORT_TX_DW5_GRP_A, \ _ICL_PORT_TX_DW5_GRP_B) #define ICL_PORT_TX_DW5_LN0(port) _MMIO_PORT(port, \ _ICL_PORT_TX_DW5_LN0_A, \ _ICL_PORT_TX_DW5_LN0_B) +#define ICL_PORT_TX_DW5_AUX(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW5_AUX_A, \ + _ICL_PORT_TX_DW5_AUX_B) #define TX_TRAINING_EN (1 << 31) #define TAP2_DISABLE (1 << 30) #define TAP3_DISABLE (1 << 29) From 1eca65d922d7543b60610624afd345b4f3d71ae2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:06 +0100 Subject: [PATCH 080/114] drm/i915: Squelch very verbose error logging Having found the error causing the IGT test to fail, downgrade the verbose logging so that we stop flooding the syslogs as we deliberately provoke it many thousands of time during selftests. References: 10195b1e4411 ("drm/i915: Show vma allocator stack when in doubt") Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 12 ++++++++++++ drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 80efee1ff7f3..96b577f62752 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -51,6 +51,18 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_ERRLOG_GEM + bool "Insert extra logging (very verbose) for common GEM errors" + default n + depends on DRM_I915_DEBUG_GEM + help + Enable additional logging that may help track down the cause of + principally userspace errors. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_TRACE_GEM bool "Insert extra ftrace output from the GEM internals" depends on DRM_I915_DEBUG_GEM diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index de2b6d65e865..518de47111ff 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -30,7 +30,7 @@ #include -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) +#if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) #include From e5d2435bfaeee3f4045e03441d3902c63254b618 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:07 +0100 Subject: [PATCH 081/114] drm/i915/selftests: Destroy partial tiling vma after use As we keep VMA around until the object is destroyed, when testing partial tiling we instantiate many, many VMA (as the object is huge allowing for many different partial regions). We test elsewhere our handling of populating large objects with a full set of VMA and checking we can retrieve them afterwards, but in this test we incur the cost of flushing all VMA after every GTT write, dramatically slowing down the test. References: https://bugs.freedesktop.org/show_bug.cgi?id=107130 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 885153268968..232e5ccf1852 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -288,6 +288,8 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, kunmap(p); if (err) return err; + + i915_vma_destroy(vma); } return 0; From b5f6e53d4cd58043e2c4e1b60fd446924cf56cbe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:08 +0100 Subject: [PATCH 082/114] drm/i915/selftests: Skip using the GPU if wedged If the GPU is irrecoverably broken, we can not use it to dirty memory and check for cache coherency with the CPU. All we can do is simply skip over the GPU subtests and focus on the CPU domains (WC, WB) cache management. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107127 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index a4900091ae3d..cb9eef1635e1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -239,8 +239,16 @@ static bool always_valid(struct drm_i915_private *i915) return true; } +static bool needs_fence_registers(struct drm_i915_private *i915) +{ + return !i915_terminally_wedged(&i915->gpu_error); +} + static bool needs_mi_store_dword(struct drm_i915_private *i915) { + if (i915_terminally_wedged(&i915->gpu_error)) + return false; + return intel_engine_can_store_dword(i915->engine[RCS]); } @@ -251,7 +259,7 @@ static const struct igt_coherency_mode { bool (*valid)(struct drm_i915_private *i915); } igt_coherency_mode[] = { { "cpu", cpu_set, cpu_get, always_valid }, - { "gtt", gtt_set, gtt_get, always_valid }, + { "gtt", gtt_set, gtt_get, needs_fence_registers }, { "wc", wc_set, wc_get, always_valid }, { "gpu", gpu_set, NULL, needs_mi_store_dword }, { }, From e16f4c36cb9bf61fcd70a2ea978cee2a23ebf71d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:09 +0100 Subject: [PATCH 083/114] drm/i915/selftests: Skip making an object busy if the GPU is wedged If the GPU is wedged, we cannot make the object busy as trying to submit a request will generate -EIO. Skip to the end of the test. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 232e5ccf1852..6fe71865b710 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -553,6 +553,9 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { + if (i915_terminally_wedged(&i915->gpu_error)) + break; + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); From a9450e15ad9e921fd2d76b038a8b252b1afc7bb6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:10 +0100 Subject: [PATCH 084/114] drm/i915/selftests: Skip all request selftests when wedged If the GPU is irrecoverably wedge, we cannot submit any request and so all of the request selftests will expectedly fail. Skip over them. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_request.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index e44aa3335d9e..cc27edc40356 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -859,5 +859,9 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sequential_engines), SUBTEST(live_empty_request), }; + + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + return i915_subtests(tests, i915); } From 47e61a79806d5d222261250dc573a68c53e99c22 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:11 +0100 Subject: [PATCH 085/114] drm/i915/selftests: Skip workaround tests when wedged If the GPU is irrecoverably wedged, we cannot submit any request and therefore cannot query the register state of the context (which is done using the GPU command stream). So skip over the test as it expectedly fails. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index e1ea2d2bedd2..3d86b90ab722 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -283,6 +283,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) }; int err; + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, i915); mutex_unlock(&i915->drm.struct_mutex); From 7783decff5be6811a1a31df91b47fd2541b993d0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:12 +0100 Subject: [PATCH 086/114] drm/i915/selftests: Skip live eviction tests when wedged If the GPU is irrecoverably wedged, we cannot submit any requests and so cannot make the GTT busy in order to test evicting active objects. As this expectedly fails, skip over the test. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8059268800fa..128ad1cf0647 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -500,5 +500,8 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_evict_contexts), }; + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + return i915_subtests(tests, i915); } From 921d07d73ebf6d87064883d69fedd3d7e9e99538 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:13 +0100 Subject: [PATCH 087/114] drm/i915/selftests: Skip huge pages live tests if wedged We test the GPU handling of huge pages by submitting requests that write into a huge page, but if the GPU is irrecoverably wedged we cannot submit any requests. As the test expectedly fails, skip over it. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/huge_pages.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 39699939587d..1193dd36913a 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1748,6 +1748,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) return 0; } + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return 0; + file = mock_file(dev_priv); if (IS_ERR(file)) return PTR_ERR(file); From 31c9bd78018cc36e62525fe3edc0009c1da6347f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 07:53:14 +0100 Subject: [PATCH 088/114] drm/i915/selftests: Skip over live context testing when wedged If the GPU is terminally wedged we cannot submit any requests into a context, completely unfulfilling our purpose of doing so. As this expectedly fails, skip over the test. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706065332.15214-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index cc848ceeb3c3..0b36265a0f96 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -599,6 +599,9 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) bool fake_alias = false; int err; + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return 0; + /* Install a fake aliasing gtt for exercise */ if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) { mutex_lock(&dev_priv->drm.struct_mutex); From 6cc42152b02b3f73969934b63332d47e2dac55e4 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 28 Jun 2018 09:23:02 +0200 Subject: [PATCH 089/114] drm/i915: Remove support for legacy debugfs crc interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This interface is deprecated, and has been replaced by the upstream drm crc interface. Signed-off-by: Maarten Lankhorst Cc: Tomi Sarvela Cc: Petri Latvala Cc: Jani Nikula Cc: Ville Syrjälä Acked-by: Ville Syrjälä Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180628072303.14175-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 11 +- drivers/gpu/drm/i915/i915_irq.c | 79 ++--- drivers/gpu/drm/i915/intel_drv.h | 2 - drivers/gpu/drm/i915/intel_pipe_crc.c | 445 -------------------------- 5 files changed, 24 insertions(+), 520 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f6142d78ede4..544e5e7f011f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4818,7 +4818,6 @@ static const struct i915_debugfs_files { #endif {"i915_fifo_underrun_reset", &i915_fifo_underrun_reset_ops}, {"i915_next_seqno", &i915_next_seqno_fops}, - {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, {"i915_spr_wm_latency", &i915_spr_wm_latency_fops}, {"i915_cur_wm_latency", &i915_cur_wm_latency_fops}, @@ -4838,7 +4837,7 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv) { struct drm_minor *minor = dev_priv->drm.primary; struct dentry *ent; - int ret, i; + int i; ent = debugfs_create_file("i915_forcewake_user", S_IRUSR, minor->debugfs_root, to_i915(minor->dev), @@ -4846,10 +4845,6 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv) if (!ent) return -ENOMEM; - ret = intel_pipe_crc_create(minor); - if (ret) - return ret; - for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) { ent = debugfs_create_file(i915_debugfs_files[i].name, S_IRUGO | S_IWUSR, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e236016eef7b..8a2196e4262b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1271,20 +1271,11 @@ enum intel_pipe_crc_source { INTEL_PIPE_CRC_SOURCE_MAX, }; -struct intel_pipe_crc_entry { - uint32_t frame; - uint32_t crc[5]; -}; - #define INTEL_PIPE_CRC_ENTRIES_NR 128 struct intel_pipe_crc { spinlock_t lock; - bool opened; /* exclusive access to the result file */ - struct intel_pipe_crc_entry *entries; - enum intel_pipe_crc_source source; - int head, tail; - wait_queue_head_t wq; int skipped; + enum intel_pipe_crc_source source; }; struct i915_frontbuffer_tracking { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 2fcc00b06915..495b9d27990e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1751,69 +1751,34 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, uint32_t crc4) { struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - struct intel_pipe_crc_entry *entry; struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - struct drm_driver *driver = dev_priv->drm.driver; uint32_t crcs[5]; - int head, tail; spin_lock(&pipe_crc->lock); - if (pipe_crc->source && !crtc->base.crc.opened) { - if (!pipe_crc->entries) { - spin_unlock(&pipe_crc->lock); - DRM_DEBUG_KMS("spurious interrupt\n"); - return; - } - - head = pipe_crc->head; - tail = pipe_crc->tail; - - if (CIRC_SPACE(head, tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) { - spin_unlock(&pipe_crc->lock); - DRM_ERROR("CRC buffer overflowing\n"); - return; - } - - entry = &pipe_crc->entries[head]; - - entry->frame = driver->get_vblank_counter(&dev_priv->drm, pipe); - entry->crc[0] = crc0; - entry->crc[1] = crc1; - entry->crc[2] = crc2; - entry->crc[3] = crc3; - entry->crc[4] = crc4; - - head = (head + 1) & (INTEL_PIPE_CRC_ENTRIES_NR - 1); - pipe_crc->head = head; - + /* + * For some not yet identified reason, the first CRC is + * bonkers. So let's just wait for the next vblank and read + * out the buggy result. + * + * On GEN8+ sometimes the second CRC is bonkers as well, so + * don't trust that one either. + */ + if (pipe_crc->skipped <= 0 || + (INTEL_GEN(dev_priv) >= 8 && pipe_crc->skipped == 1)) { + pipe_crc->skipped++; spin_unlock(&pipe_crc->lock); - - wake_up_interruptible(&pipe_crc->wq); - } else { - /* - * For some not yet identified reason, the first CRC is - * bonkers. So let's just wait for the next vblank and read - * out the buggy result. - * - * On GEN8+ sometimes the second CRC is bonkers as well, so - * don't trust that one either. - */ - if (pipe_crc->skipped <= 0 || - (INTEL_GEN(dev_priv) >= 8 && pipe_crc->skipped == 1)) { - pipe_crc->skipped++; - spin_unlock(&pipe_crc->lock); - return; - } - spin_unlock(&pipe_crc->lock); - crcs[0] = crc0; - crcs[1] = crc1; - crcs[2] = crc2; - crcs[3] = crc3; - crcs[4] = crc4; - drm_crtc_add_crc_entry(&crtc->base, true, - drm_crtc_accurate_vblank_count(&crtc->base), - crcs); + return; } + spin_unlock(&pipe_crc->lock); + + crcs[0] = crc0; + crcs[1] = crc1; + crcs[2] = crc2; + crcs[3] = crc3; + crcs[4] = crc4; + drm_crtc_add_crc_entry(&crtc->base, true, + drm_crtc_accurate_vblank_count(&crtc->base), + crcs); } #else static inline void diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3034477b79ff..e283e9e901c2 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2153,7 +2153,6 @@ void lspcon_resume(struct intel_lspcon *lspcon); void lspcon_wait_pcon_mode(struct intel_lspcon *lspcon); /* intel_pipe_crc.c */ -int intel_pipe_crc_create(struct drm_minor *minor); #ifdef CONFIG_DEBUG_FS int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt); @@ -2169,5 +2168,4 @@ static inline void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc) { } #endif -extern const struct file_operations i915_display_crc_ctl_fops; #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 39a4e4edda07..849e1b69ba73 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -30,160 +30,6 @@ #include #include "intel_drv.h" -struct pipe_crc_info { - const char *name; - struct drm_i915_private *dev_priv; - enum pipe pipe; -}; - -static int i915_pipe_crc_open(struct inode *inode, struct file *filep) -{ - struct pipe_crc_info *info = inode->i_private; - struct drm_i915_private *dev_priv = info->dev_priv; - struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe]; - - if (info->pipe >= INTEL_INFO(dev_priv)->num_pipes) - return -ENODEV; - - spin_lock_irq(&pipe_crc->lock); - - if (pipe_crc->opened) { - spin_unlock_irq(&pipe_crc->lock); - return -EBUSY; /* already open */ - } - - pipe_crc->opened = true; - filep->private_data = inode->i_private; - - spin_unlock_irq(&pipe_crc->lock); - - return 0; -} - -static int i915_pipe_crc_release(struct inode *inode, struct file *filep) -{ - struct pipe_crc_info *info = inode->i_private; - struct drm_i915_private *dev_priv = info->dev_priv; - struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe]; - - spin_lock_irq(&pipe_crc->lock); - pipe_crc->opened = false; - spin_unlock_irq(&pipe_crc->lock); - - return 0; -} - -/* (6 fields, 8 chars each, space separated (5) + '\n') */ -#define PIPE_CRC_LINE_LEN (6 * 8 + 5 + 1) -/* account for \'0' */ -#define PIPE_CRC_BUFFER_LEN (PIPE_CRC_LINE_LEN + 1) - -static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc) -{ - lockdep_assert_held(&pipe_crc->lock); - return CIRC_CNT(pipe_crc->head, pipe_crc->tail, - INTEL_PIPE_CRC_ENTRIES_NR); -} - -static ssize_t -i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, - loff_t *pos) -{ - struct pipe_crc_info *info = filep->private_data; - struct drm_i915_private *dev_priv = info->dev_priv; - struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe]; - char buf[PIPE_CRC_BUFFER_LEN]; - int n_entries; - ssize_t bytes_read; - - /* - * Don't allow user space to provide buffers not big enough to hold - * a line of data. - */ - if (count < PIPE_CRC_LINE_LEN) - return -EINVAL; - - if (pipe_crc->source == INTEL_PIPE_CRC_SOURCE_NONE) - return 0; - - /* nothing to read */ - spin_lock_irq(&pipe_crc->lock); - while (pipe_crc_data_count(pipe_crc) == 0) { - int ret; - - if (filep->f_flags & O_NONBLOCK) { - spin_unlock_irq(&pipe_crc->lock); - return -EAGAIN; - } - - ret = wait_event_interruptible_lock_irq(pipe_crc->wq, - pipe_crc_data_count(pipe_crc), pipe_crc->lock); - if (ret) { - spin_unlock_irq(&pipe_crc->lock); - return ret; - } - } - - /* We now have one or more entries to read */ - n_entries = count / PIPE_CRC_LINE_LEN; - - bytes_read = 0; - while (n_entries > 0) { - struct intel_pipe_crc_entry *entry = - &pipe_crc->entries[pipe_crc->tail]; - - if (CIRC_CNT(pipe_crc->head, pipe_crc->tail, - INTEL_PIPE_CRC_ENTRIES_NR) < 1) - break; - - BUILD_BUG_ON_NOT_POWER_OF_2(INTEL_PIPE_CRC_ENTRIES_NR); - pipe_crc->tail = (pipe_crc->tail + 1) & - (INTEL_PIPE_CRC_ENTRIES_NR - 1); - - bytes_read += snprintf(buf, PIPE_CRC_BUFFER_LEN, - "%8u %8x %8x %8x %8x %8x\n", - entry->frame, entry->crc[0], - entry->crc[1], entry->crc[2], - entry->crc[3], entry->crc[4]); - - spin_unlock_irq(&pipe_crc->lock); - - if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN)) - return -EFAULT; - - user_buf += PIPE_CRC_LINE_LEN; - n_entries--; - - spin_lock_irq(&pipe_crc->lock); - } - - spin_unlock_irq(&pipe_crc->lock); - - return bytes_read; -} - -static const struct file_operations i915_pipe_crc_fops = { - .owner = THIS_MODULE, - .open = i915_pipe_crc_open, - .read = i915_pipe_crc_read, - .release = i915_pipe_crc_release, -}; - -static struct pipe_crc_info i915_pipe_crc_data[I915_MAX_PIPES] = { - { - .name = "i915_pipe_A_crc", - .pipe = PIPE_A, - }, - { - .name = "i915_pipe_B_crc", - .pipe = PIPE_B, - }, - { - .name = "i915_pipe_C_crc", - .pipe = PIPE_C, - }, -}; - static const char * const pipe_crc_sources[] = { "none", "plane1", @@ -197,29 +43,6 @@ static const char * const pipe_crc_sources[] = { "auto", }; -static const char *pipe_crc_source_name(enum intel_pipe_crc_source source) -{ - BUILD_BUG_ON(ARRAY_SIZE(pipe_crc_sources) != INTEL_PIPE_CRC_SOURCE_MAX); - return pipe_crc_sources[source]; -} - -static int display_crc_ctl_show(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = m->private; - enum pipe pipe; - - for_each_pipe(dev_priv, pipe) - seq_printf(m, "%c %s\n", pipe_name(pipe), - pipe_crc_source_name(dev_priv->pipe_crc[pipe].source)); - - return 0; -} - -static int display_crc_ctl_open(struct inode *inode, struct file *file) -{ - return single_open(file, display_crc_ctl_show, inode->i_private); -} - static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, uint32_t *val) { @@ -616,177 +439,6 @@ static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val, set_wa); } -static int pipe_crc_set_source(struct drm_i915_private *dev_priv, - enum pipe pipe, - enum intel_pipe_crc_source source) -{ - struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - enum intel_display_power_domain power_domain; - u32 val = 0; /* shut up gcc */ - int ret; - - if (pipe_crc->source == source) - return 0; - - /* forbid changing the source without going back to 'none' */ - if (pipe_crc->source && source) - return -EINVAL; - - power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) { - DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n"); - return -EIO; - } - - ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val, true); - if (ret != 0) - goto out; - - /* none -> real source transition */ - if (source) { - struct intel_pipe_crc_entry *entries; - - DRM_DEBUG_DRIVER("collecting CRCs for pipe %c, %s\n", - pipe_name(pipe), pipe_crc_source_name(source)); - - entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR, - sizeof(pipe_crc->entries[0]), - GFP_KERNEL); - if (!entries) { - ret = -ENOMEM; - goto out; - } - - spin_lock_irq(&pipe_crc->lock); - kfree(pipe_crc->entries); - pipe_crc->entries = entries; - pipe_crc->head = 0; - pipe_crc->tail = 0; - spin_unlock_irq(&pipe_crc->lock); - } - - pipe_crc->source = source; - - I915_WRITE(PIPE_CRC_CTL(pipe), val); - POSTING_READ(PIPE_CRC_CTL(pipe)); - - /* real source -> none transition */ - if (!source) { - struct intel_pipe_crc_entry *entries; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, - pipe); - - DRM_DEBUG_DRIVER("stopping CRCs for pipe %c\n", - pipe_name(pipe)); - - drm_modeset_lock(&crtc->base.mutex, NULL); - if (crtc->base.state->active) - intel_wait_for_vblank(dev_priv, pipe); - drm_modeset_unlock(&crtc->base.mutex); - - spin_lock_irq(&pipe_crc->lock); - entries = pipe_crc->entries; - pipe_crc->entries = NULL; - pipe_crc->head = 0; - pipe_crc->tail = 0; - spin_unlock_irq(&pipe_crc->lock); - - kfree(entries); - - if (IS_G4X(dev_priv)) - g4x_undo_pipe_scramble_reset(dev_priv, pipe); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_undo_pipe_scramble_reset(dev_priv, pipe); - else if ((IS_HASWELL(dev_priv) || - IS_BROADWELL(dev_priv)) && pipe == PIPE_A) - hsw_pipe_A_crc_wa(dev_priv, false); - } - - ret = 0; - -out: - intel_display_power_put(dev_priv, power_domain); - - return ret; -} - -/* - * Parse pipe CRC command strings: - * command: wsp* object wsp+ name wsp+ source wsp* - * object: 'pipe' - * name: (A | B | C) - * source: (none | plane1 | plane2 | pf) - * wsp: (#0x20 | #0x9 | #0xA)+ - * - * eg.: - * "pipe A plane1" -> Start CRC computations on plane1 of pipe A - * "pipe A none" -> Stop CRC - */ -static int display_crc_ctl_tokenize(char *buf, char *words[], int max_words) -{ - int n_words = 0; - - while (*buf) { - char *end; - - /* skip leading white space */ - buf = skip_spaces(buf); - if (!*buf) - break; /* end of buffer */ - - /* find end of word */ - for (end = buf; *end && !isspace(*end); end++) - ; - - if (n_words == max_words) { - DRM_DEBUG_DRIVER("too many words, allowed <= %d\n", - max_words); - return -EINVAL; /* ran out of words[] before bytes */ - } - - if (*end) - *end++ = '\0'; - words[n_words++] = buf; - buf = end; - } - - return n_words; -} - -enum intel_pipe_crc_object { - PIPE_CRC_OBJECT_PIPE, -}; - -static const char * const pipe_crc_objects[] = { - "pipe", -}; - -static int -display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) -{ - int i; - - i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf); - if (i < 0) - return i; - - *o = i; - return 0; -} - -static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, - const char *buf, enum pipe *pipe) -{ - const char name = buf[0]; - - if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes)) - return -EINVAL; - - *pipe = name - 'A'; - - return 0; -} - static int display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) { @@ -805,81 +457,6 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) return 0; } -static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, - char *buf, size_t len) -{ -#define N_WORDS 3 - int n_words; - char *words[N_WORDS]; - enum pipe pipe; - enum intel_pipe_crc_object object; - enum intel_pipe_crc_source source; - - n_words = display_crc_ctl_tokenize(buf, words, N_WORDS); - if (n_words != N_WORDS) { - DRM_DEBUG_DRIVER("tokenize failed, a command is %d words\n", - N_WORDS); - return -EINVAL; - } - - if (display_crc_ctl_parse_object(words[0], &object) < 0) { - DRM_DEBUG_DRIVER("unknown object %s\n", words[0]); - return -EINVAL; - } - - if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) { - DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]); - return -EINVAL; - } - - if (display_crc_ctl_parse_source(words[2], &source) < 0) { - DRM_DEBUG_DRIVER("unknown source %s\n", words[2]); - return -EINVAL; - } - - return pipe_crc_set_source(dev_priv, pipe, source); -} - -static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf, - size_t len, loff_t *offp) -{ - struct seq_file *m = file->private_data; - struct drm_i915_private *dev_priv = m->private; - char *tmpbuf; - int ret; - - if (len == 0) - return 0; - - if (len > PAGE_SIZE - 1) { - DRM_DEBUG_DRIVER("expected <%lu bytes into pipe crc control\n", - PAGE_SIZE); - return -E2BIG; - } - - tmpbuf = memdup_user_nul(ubuf, len); - if (IS_ERR(tmpbuf)) - return PTR_ERR(tmpbuf); - - ret = display_crc_ctl_parse(dev_priv, tmpbuf, len); - - kfree(tmpbuf); - if (ret < 0) - return ret; - - *offp += len; - return len; -} - -const struct file_operations i915_display_crc_ctl_fops = { - .owner = THIS_MODULE, - .open = display_crc_ctl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = display_crc_ctl_write -}; - void intel_display_crc_init(struct drm_i915_private *dev_priv) { enum pipe pipe; @@ -887,32 +464,10 @@ void intel_display_crc_init(struct drm_i915_private *dev_priv) for_each_pipe(dev_priv, pipe) { struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - pipe_crc->opened = false; spin_lock_init(&pipe_crc->lock); - init_waitqueue_head(&pipe_crc->wq); } } -int intel_pipe_crc_create(struct drm_minor *minor) -{ - struct drm_i915_private *dev_priv = to_i915(minor->dev); - struct dentry *ent; - int i; - - for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) { - struct pipe_crc_info *info = &i915_pipe_crc_data[i]; - - info->dev_priv = dev_priv; - ent = debugfs_create_file(info->name, S_IRUGO, - minor->debugfs_root, info, - &i915_pipe_crc_fops); - if (!ent) - return -ENOMEM; - } - - return 0; -} - int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt) { From b79ebe74e1c4219e91940b637d40939b0c80c0f2 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 5 Jul 2018 15:26:54 +0300 Subject: [PATCH 090/114] drm/i915/ddi: Simplify get_encoder_power_domains() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can simplify the encoder's get_power_domains() hook by calling it only if the encoder is active. That way the hook can return its power domains unconditionally without checking the active state by calling encoder::get_hw_state(). This get_hw_state() query is in fact redundant since it's already done by intel_modeset_readout_hw_state() setting the encoder's crtc or leaving it NULL accordingly. Let's use this fact to decide if the encoder is active. While at it clarify the comment in intel_ddi_get_power_domains() about primary vs. fake MST encoders and make sure we never do an incorrect encoder->dig_port cast for fake MST encoders. Suggested-by: Ville Syrjälä Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180705122654.17072-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 19 +++++++------------ drivers/gpu/drm/i915/intel_display.c | 11 +++++------ 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index c74b01a52082..32838ed89ee7 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2006,24 +2006,19 @@ intel_ddi_main_link_aux_domain(struct intel_dp *intel_dp) static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - enum pipe pipe; + struct intel_digital_port *dig_port; u64 domains; - if (!intel_ddi_get_hw_state(encoder, &pipe)) - return 0; - - domains = BIT_ULL(dig_port->ddi_io_power_domain); - if (!crtc_state) - return domains; - /* * TODO: Add support for MST encoders. Atm, the following should never - * happen since this function will be called only for the primary - * encoder which doesn't have its own pipe/crtc_state. + * happen since fake-MST encoders don't set their get_power_domains() + * hook. */ if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))) - return domains; + return 0; + + dig_port = enc_to_dig_port(&encoder->base); + domains = BIT_ULL(dig_port->ddi_io_power_domain); /* AUX power is only needed for (e)DP mode, not for HDMI. */ if (intel_crtc_has_dp_encoder(crtc_state)) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d33033abbf18..19f756d571ae 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15664,14 +15664,13 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) continue; /* - * For MST and inactive encoders we don't have a crtc state. - * FIXME: no need to call get_power_domains in such cases, it - * will always return 0. + * MST-primary and inactive encoders don't have a crtc state + * and neither of these require any power domain references. */ - crtc_state = encoder->base.crtc ? - to_intel_crtc_state(encoder->base.crtc->state) : - NULL; + if (!encoder->base.crtc) + continue; + crtc_state = to_intel_crtc_state(encoder->base.crtc->state); get_domains = encoder->get_power_domains(encoder, crtc_state); for_each_power_domain(domain, get_domains) intel_display_power_get(dev_priv, domain); From 481827b441674b7f1d030c223decdb56266ff398 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:14:41 +0100 Subject: [PATCH 091/114] drm/i915: Record logical context support in driver caps Avoid looking at the magical engines[RCS] to decide if the HW and driver supports logical contexts, and instead record that knowledge during initialisation. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706101442.21279-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 6 +++--- drivers/gpu/drm/i915/intel_device_info.c | 2 ++ drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 5 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8a2196e4262b..c1c637e47bf5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2306,6 +2306,7 @@ intel_info(const struct drm_i915_private *dev_priv) } #define INTEL_INFO(dev_priv) intel_info((dev_priv)) +#define DRIVER_CAPS(dev_priv) (&(dev_priv)->caps) #define INTEL_GEN(dev_priv) ((dev_priv)->info.gen) #define INTEL_DEVID(dev_priv) ((dev_priv)->info.device_id) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 985ef70d9416..b10770cfccd2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -512,8 +512,8 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("%s context support initialized\n", - dev_priv->engine[RCS]->context_size ? "logical" : - "fake"); + DRIVER_CAPS(dev_priv)->has_logical_contexts ? + "logical" : "fake"); return 0; } @@ -720,7 +720,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct i915_gem_context *ctx; int ret; - if (!dev_priv->engine[RCS]->context_size) + if (!DRIVER_CAPS(dev_priv)->has_logical_contexts) return -ENODEV; if (args->pad != 0) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0fd13df424cf..0ef0c6448d53 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -858,6 +858,8 @@ void intel_device_info_runtime_init(struct intel_device_info *info) void intel_driver_caps_print(const struct intel_driver_caps *caps, struct drm_printer *p) { + drm_printf(p, "Has logical contexts? %s\n", + yesno(caps->has_logical_contexts)); drm_printf(p, "scheduler: %x\n", caps->scheduler); } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 933e31669557..633f9fbf72ea 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -186,6 +186,7 @@ struct intel_device_info { struct intel_driver_caps { unsigned int scheduler; + bool has_logical_contexts:1; }; static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 478c928912c4..e2f562853aee 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -302,6 +302,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; + if (engine->context_size) + DRIVER_CAPS(dev_priv)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; From 0fdbe58c4a0f8c2fa67e38a740ce6ff3cffa8c86 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:19:23 +0100 Subject: [PATCH 092/114] drm/i915/selftests: Skip live context execution test without logical contexts If the HW (or driver) doesn't support logical contexts, don't pretend we gain anything from trying to execute GPU commands with them. At best it reports -ENODEV, which is an unhelpful failure that we should just skip. v2: Be more specific and check the driver/engine caps for logical (HW) context support. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706101923.28548-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 0b36265a0f96..0d8e719802fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -336,11 +336,15 @@ static int igt_ctx_exec(void *arg) bool first_shared_gtt = true; int err = -ENODEV; - /* Create a few different contexts (with different mm) and write + /* + * Create a few different contexts (with different mm) and write * through each ctx/mm using the GPU making sure those writes end * up in the expected pages of our obj. */ + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file); @@ -367,6 +371,9 @@ static int igt_ctx_exec(void *arg) } for_each_engine(engine, i915, id) { + if (!engine->context_size) + continue; /* No logical context support in HW */ + if (!intel_engine_can_store_dword(engine)) continue; From 03bbc508a312d9f4833f1cb0f7b2da7517787b61 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 12:45:10 +0100 Subject: [PATCH 093/114] drm/i915/selftests: Skip live_execlists if the GPU is terminally wedged If the GPU is irrecoverably wedged, we can not execute any requests making testing execlists (request execution) pointless. Skip! Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706114510.18467-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index ea27c7cfbf96..730a02d39058 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -455,5 +455,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (!HAS_EXECLISTS(i915)) return 0; + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + return i915_subtests(tests, i915); } From add00e6d896fab882e6115ed4908b2456f1b3a85 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 12:54:02 +0100 Subject: [PATCH 094/114] drm/i915: Flush the WCB following a WC write If we have just completed a WC write, we must ensure that the WCB (Write Combining Buffer) is flushed out to main memory before we can expect to see the results. This is especially important when mixing WC with GTT as the physical paths are different and cachelines are not naturally flushed. Testcase: igt/drv_selftests/live_coherency #gdg Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706115402.18547-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0c0a1a959d0b..be63e8bbb6d2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -837,6 +837,10 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) } break; + case I915_GEM_DOMAIN_WC: + wmb(); + break; + case I915_GEM_DOMAIN_CPU: i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); break; From 58174eac1593f104c03a15330e0fea1b1ec01434 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 13:26:10 +0100 Subject: [PATCH 095/114] drm/i915/gtt: Suppress warnings for dma_map_page As we propagate back the error to the caller for them to handle, we do not need the lowest level spitting out a redundant warning upon an allocation failure inside dma_map_page(). Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706122611.4142-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a9330de886f8..92a06042e0bf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -561,8 +561,10 @@ static int __setup_page_dma(struct i915_address_space *vm, if (unlikely(!p->page)) return -ENOMEM; - p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); + p->daddr = dma_map_page_attrs(vm->dma, + p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_NO_WARN); if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { vm_free_page(vm, p->page); return -ENOMEM; @@ -643,8 +645,10 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) if (unlikely(!page)) goto skip; - addr = dma_map_page(vm->dma, page, 0, size, - PCI_DMA_BIDIRECTIONAL); + addr = dma_map_page_attrs(vm->dma, + page, 0, size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_NO_WARN); if (unlikely(dma_mapping_error(vm->dma, addr))) goto free_page; From 66daec6b21840ea09af4c41f706867df7d81c820 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 13:26:11 +0100 Subject: [PATCH 096/114] drm/i915/gtt: Control cache domain of dma_map_page() directly We already maually control the CPU cache for our page table directories, so we can tell the dma mapper to skip doing it as well. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706122611.4142-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 92a06042e0bf..e2793231ef49 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -564,6 +564,7 @@ static int __setup_page_dma(struct i915_address_space *vm, p->daddr = dma_map_page_attrs(vm->dma, p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_WARN); if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { vm_free_page(vm, p->page); @@ -648,6 +649,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) addr = dma_map_page_attrs(vm->dma, page, 0, size, PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_WARN); if (unlikely(dma_mapping_error(vm->dma, addr))) goto free_page; From 207b700050b8d323d0c23b457c200b22c7ed3737 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 13:53:38 +0100 Subject: [PATCH 097/114] drm/i915/selftests: Limit live_gtt allocation test to fit within RAM Limit the GTT size we try and allocate to ensure that it fits within RAM and does not trigger the oomkiller indiscriminately. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706125338.24432-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e108fe4e0fd9..600a3bcbd3d6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -148,7 +148,7 @@ static int igt_ppgtt_alloc(void *arg) { struct drm_i915_private *dev_priv = arg; struct i915_hw_ppgtt *ppgtt; - u64 size, last; + u64 size, last, limit; int err = 0; /* Allocate a ppggt and try to fill the entire range */ @@ -163,10 +163,18 @@ static int igt_ppgtt_alloc(void *arg) if (!ppgtt->vm.allocate_va_range) goto err_ppgtt_cleanup; + /* + * While we only allocate the page tables here and so we could + * address a much larger GTT than we could actually fit into + * RAM, a practical limit is the amount of physical pages in the system. + * This should ensure that we do not run into the oomkiller during + * the test and take down the machine wilfully. + */ + limit = totalram_pages << PAGE_SHIFT; + limit = min(ppgtt->vm.total, limit); + /* Check we can allocate the entire range */ - for (size = 4096; - size <= ppgtt->vm.total; - size <<= 2) { + for (size = 4096; size <= limit; size <<= 2) { err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size); if (err) { if (err == -ENOMEM) { @@ -183,9 +191,7 @@ static int igt_ppgtt_alloc(void *arg) } /* Check we can incrementally allocate the entire range */ - for (last = 0, size = 4096; - size <= ppgtt->vm.total; - last = size, size <<= 2) { + for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) { err = ppgtt->vm.allocate_va_range(&ppgtt->vm, last, size - last); if (err) { From 5b544337109081ac4de23e7ded1c31f7457e5f5e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 15:23:22 +0100 Subject: [PATCH 098/114] drm/i915/selftests: Replace magic 1<<22 with MI_USE_GGTT/MI_MEM_VIRTUAL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the magic bit with the proper symbolic name for instructing MI_STORE_DWORD_IMM to use a virtual address (on gen3) or the global GTT address (still virtual!) on gen4+. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180706142323.25699-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/selftests/huge_pages.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 1193dd36913a..ab662dabcff7 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -919,12 +919,12 @@ gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) *cmd++ = val; } else if (gen >= 4) { *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? 1 << 22 : 0); + (gen < 6 ? MI_USE_GGTT : 0); *cmd++ = 0; *cmd++ = offset; *cmd++ = val; } else { - *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *cmd++ = offset; *cmd++ = val; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index cb9eef1635e1..294c58aba2c1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -210,12 +210,12 @@ static int gpu_set(struct drm_i915_gem_object *obj, *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; } else if (INTEL_GEN(i915) >= 4) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; *cs++ = v; } else { - *cs++ = MI_STORE_DWORD_IMM | 1 << 22; + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *cs++ = i915_ggtt_offset(vma) + offset; *cs++ = v; *cs++ = MI_NOOP; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 0d8e719802fa..65100d3e31cf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -63,12 +63,12 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) *cmd++ = value; } else if (gen >= 4) { *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? 1 << 22 : 0); + (gen < 6 ? MI_USE_GGTT : 0); *cmd++ = 0; *cmd++ = offset; *cmd++ = value; } else { - *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *cmd++ = offset; *cmd++ = value; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 5cb808dc5b50..0fc6da81f86e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -171,7 +171,7 @@ static int emit_recurse_batch(struct hang *h, *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); } else if (INTEL_GEN(i915) >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; From 8fdbfd8686329e286465bcef11a7ef20be84d6b6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 15:23:23 +0100 Subject: [PATCH 099/114] drm/i915/selftests: Fixup missing MI_MEM_VIRTUAL for live_hangcheck MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always want to use a virtual address (i.e. use the GTT) for MI_STORE_DWORD_IMM, but forgot the ever so important flag in live_hangcheck for gen3. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180706142323.25699-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 0fc6da81f86e..c838f7d08cb9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -184,7 +184,7 @@ static int emit_recurse_batch(struct hang *h, *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } else { - *batch++ = MI_STORE_DWORD_IMM; + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; *batch++ = MI_ARB_CHECK; From da99fe5f858525b0eac5a5296bba5d6bedb81abd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:39:42 +0100 Subject: [PATCH 100/114] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Currently all callers are responsible for adding the vma to the active timeline and then exporting its fence. Combine the two operations into i915_vma_move_to_active() to move all the extra handling from the callers to the single site. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706103947.15919-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 47 +++++++++---------- drivers/gpu/drm/i915/selftests/huge_pages.c | 4 -- .../drm/i915/selftests/i915_gem_coherency.c | 4 -- .../gpu/drm/i915/selftests/i915_gem_context.c | 4 -- .../gpu/drm/i915/selftests/i915_gem_object.c | 4 -- .../drm/i915/selftests/intel_workarounds.c | 3 -- 6 files changed, 21 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c2dd9b4cdace..91f20445147f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1166,15 +1166,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); i915_vma_move_to_active(batch, rq, 0); - reservation_object_lock(batch->resv, NULL); - reservation_object_add_excl_fence(batch->resv, &rq->fence); - reservation_object_unlock(batch->resv); i915_vma_unpin(batch); i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - reservation_object_lock(vma->resv, NULL); - reservation_object_add_excl_fence(vma->resv, &rq->fence); - reservation_object_unlock(vma->resv); rq->batch = batch; @@ -1771,25 +1765,6 @@ slow: return eb_relocate_slow(eb); } -static void eb_export_fence(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - struct reservation_object *resv = vma->resv; - - /* - * Ignore errors from failing to allocate the new fence, we can't - * handle an error right now. Worst case should be missed - * synchronisation leading to rendering corruption. - */ - reservation_object_lock(resv, NULL); - if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &rq->fence); - else if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &rq->fence); - reservation_object_unlock(resv); -} - static int eb_move_to_gpu(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; @@ -1844,7 +1819,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) struct i915_vma *vma = eb->vma[i]; i915_vma_move_to_active(vma, eb->request, flags); - eb_export_fence(vma, eb->request, flags); __eb_unreserve_vma(vma, flags); vma->exec_flags = NULL; @@ -1884,6 +1858,25 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return true; } +static void export_fence(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags) +{ + struct reservation_object *resv = vma->resv; + + /* + * Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + reservation_object_lock(resv, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &rq->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &rq->fence); + reservation_object_unlock(resv); +} + void i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) @@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_gem_active_set(&vma->last_fence, rq); + + export_fence(vma, rq, flags); } static int i915_reset_gen7_sol_offsets(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index ab662dabcff7..84bed69f30cc 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -998,10 +998,6 @@ static int gpu_write(struct i915_vma *vma, i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - reservation_object_lock(vma->resv, NULL); - reservation_object_add_excl_fence(vma->resv, &rq->fence); - reservation_object_unlock(vma->resv); - err_request: i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 294c58aba2c1..97a16311f083 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -225,10 +225,6 @@ static int gpu_set(struct drm_i915_gem_object *obj, i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); - reservation_object_lock(obj->resv, NULL); - reservation_object_add_excl_fence(obj->resv, &rq->fence); - reservation_object_unlock(obj->resv); - i915_request_add(rq); return 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 65100d3e31cf..c642ab97698e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -178,10 +178,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj, i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); - reservation_object_lock(obj->resv, NULL); - reservation_object_add_excl_fence(obj->resv, &rq->fence); - reservation_object_unlock(obj->resv); - i915_request_add(rq); return 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 6fe71865b710..b2ccbc5e2bbe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -466,10 +466,6 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - reservation_object_lock(vma->resv, NULL); - reservation_object_add_excl_fence(vma->resv, &rq->fence); - reservation_object_unlock(vma->resv); - i915_request_add(rq); i915_gem_object_set_active_reference(obj); diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 3d86b90ab722..4a9dc01a364a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -68,9 +68,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) intel_ring_advance(rq, cs); i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - reservation_object_lock(vma->resv, NULL); - reservation_object_add_excl_fence(vma->resv, &rq->fence); - reservation_object_unlock(vma->resv); i915_gem_object_get(result); i915_gem_object_set_active_reference(result); From 6dd7526f6f6c73961eecec8a4b9b717d414010f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:39:43 +0100 Subject: [PATCH 101/114] drm/i915: Export i915_request_skip() In the next patch, we will want to start skipping requests on failing to complete their payloads. So export the utility function current used to make requests inoperable following a failed gpu reset. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706103947.15919-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 25 +++---------------------- drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 2 ++ 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index be63e8bbb6d2..2e05cf114083 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3085,25 +3085,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) return err; } -static void skip_request(struct i915_request *request) -{ - void *vaddr = request->ring->vaddr; - u32 head; - - /* As this request likely depends on state from the lost - * context, clear out all the user operations leaving the - * breadcrumb at the end (so we get the fence notifications). - */ - head = request->head; - if (request->postfix < head) { - memset(vaddr + head, 0, request->ring->size - head); - head = 0; - } - memset(vaddr + head, 0, request->postfix - head); - - dma_fence_set_error(&request->fence, -EIO); -} - static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -3118,10 +3099,10 @@ static void engine_skip_context(struct i915_request *request) list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->gem_context == hung_ctx) - skip_request(request); + i915_request_skip(request, -EIO); list_for_each_entry(request, &timeline->requests, link) - skip_request(request); + i915_request_skip(request, -EIO); spin_unlock(&timeline->lock); spin_unlock_irqrestore(&engine->timeline.lock, flags); @@ -3164,7 +3145,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, if (stalled) { i915_gem_context_mark_guilty(request->gem_context); - skip_request(request); + i915_request_skip(request, -EIO); /* If this context is now banned, skip all pending requests. */ if (i915_gem_context_is_banned(request->gem_context)) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a2f7e9358450..7ae08b68121e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1013,6 +1013,27 @@ i915_request_await_object(struct i915_request *to, return ret; } +void i915_request_skip(struct i915_request *rq, int error) +{ + void *vaddr = rq->ring->vaddr; + u32 head; + + GEM_BUG_ON(!IS_ERR_VALUE((long)error)); + dma_fence_set_error(&rq->fence, error); + + /* + * As this request likely depends on state from the lost + * context, clear out all the user operations leaving the + * breadcrumb at the end (so we get the fence notifications). + */ + head = rq->infix; + if (rq->postfix < head) { + memset(vaddr + head, 0, rq->ring->size - head); + head = 0; + } + memset(vaddr + head, 0, rq->postfix - head); +} + /* * NB: This function is not allowed to fail. Doing so would mean the the * request is not being tracked for completion but the work itself is diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7ee220ded9c9..a355a081485f 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq); void __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); +void i915_request_skip(struct i915_request *request, int error); + void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); From a523697857cdfb6a548f6caf38f42f4fe0f7d757 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:39:44 +0100 Subject: [PATCH 102/114] drm/i915: Start returning an error from i915_vma_move_to_active() Handling such a late error in request construction is tricky, but to accommodate future patches which may allocate here, we potentially could err. To handle the error after already adjusting global state to track the new request, we must finish and submit the request. But we don't want to use the request as not everything is being tracked by it, so we opt to cancel the commands inside the request. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706103947.15919-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 ++++- drivers/gpu/drm/i915/i915_drv.h | 6 ++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++------ drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 9 +++++-- .../drm/i915/selftests/i915_gem_coherency.c | 4 +-- .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++++-- .../gpu/drm/i915/selftests/i915_gem_object.c | 7 +++--- drivers/gpu/drm/i915/selftests/i915_request.c | 8 ++++-- .../gpu/drm/i915/selftests/intel_hangcheck.c | 11 ++++++-- drivers/gpu/drm/i915/selftests/intel_lrc.c | 11 ++++++-- .../drm/i915/selftests/intel_workarounds.c | 6 +++-- 12 files changed, 78 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 928818f218f7..b0e566956b8d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -476,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) i915_gem_obj_finish_shmem_access(bb->obj); bb->accessing = false; - i915_vma_move_to_active(bb->vma, workload->req, 0); + ret = i915_vma_move_to_active(bb->vma, + workload->req, + 0); + if (ret) + goto err; } } return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c1c637e47bf5..07846e63671d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3090,9 +3090,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) } int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); -void i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags); +int __must_check i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 91f20445147f..97136e4ce91d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1165,12 +1165,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_request; GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); - i915_vma_move_to_active(batch, rq, 0); - i915_vma_unpin(batch); + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; - i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; rq->batch = batch; + i915_vma_unpin(batch); cache->rq = rq; cache->rq_cmd = cmd; @@ -1179,6 +1183,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, /* Return with batch mapping (cmd) still pinned */ return 0; +skip_request: + i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: @@ -1818,7 +1824,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) unsigned int flags = eb->flags[i]; struct i915_vma *vma = eb->vma[i]; - i915_vma_move_to_active(vma, eb->request, flags); + err = i915_vma_move_to_active(vma, eb->request, flags); + if (unlikely(err)) { + i915_request_skip(eb->request, err); + return err; + } __eb_unreserve_vma(vma, flags); vma->exec_flags = NULL; @@ -1877,9 +1887,9 @@ static void export_fence(struct i915_vma *vma, reservation_object_unlock(resv); } -void i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) +int i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; const unsigned int idx = rq->engine->id; @@ -1916,6 +1926,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, i915_gem_active_set(&vma->last_fence, rq); export_fence(vma, rq, flags); + return 0; } static int i915_reset_gen7_sol_offsets(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 3210cedfa46c..90baf9086d0a 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct i915_request *rq) goto err_unpin; } - i915_vma_move_to_active(so.vma, rq, 0); + err = i915_vma_move_to_active(so.vma, rq, 0); err_unpin: i915_vma_unpin(so.vma); err_vma: diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 84bed69f30cc..d9f439f6219f 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma, goto err_request; } - i915_vma_move_to_active(batch, rq, 0); + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto err_request; + i915_gem_object_set_active_reference(batch->obj); i915_vma_unpin(batch); i915_vma_close(batch); @@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma, if (err) goto err_request; - i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + i915_request_skip(rq, err); err_request: i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 97a16311f083..1de7c1402fd5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj, } intel_ring_advance(rq, cs); - i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); i915_request_add(rq); - return 0; + return err; } static bool always_valid(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index c642ab97698e..5fbe15f4effd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -170,18 +170,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj, if (err) goto err_request; - i915_vma_move_to_active(batch, rq, 0); + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + i915_gem_object_set_active_reference(batch->obj); i915_vma_unpin(batch); i915_vma_close(batch); - i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); i915_request_add(rq); return 0; +skip_request: + i915_request_skip(rq, err); err_request: i915_request_add(rq); err_batch: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index b2ccbc5e2bbe..25c2b2d433bd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -464,13 +464,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) return PTR_ERR(rq); } - i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_request_add(rq); - i915_gem_object_set_active_reference(obj); + __i915_gem_object_release_unless_active(obj); i915_vma_unpin(vma); - return 0; + + return err; } static bool assert_mmap_offset(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index cc27edc40356..43995fc3534d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -675,7 +675,9 @@ static int live_all_engines(void *arg) i915_gem_object_set_active_reference(batch->obj); } - i915_vma_move_to_active(batch, request[id], 0); + err = i915_vma_move_to_active(batch, request[id], 0); + GEM_BUG_ON(err); + i915_request_get(request[id]); i915_request_add(request[id]); } @@ -785,7 +787,9 @@ static int live_sequential_engines(void *arg) GEM_BUG_ON(err); request[id]->batch = batch; - i915_vma_move_to_active(batch, request[id], 0); + err = i915_vma_move_to_active(batch, request[id], 0); + GEM_BUG_ON(err); + i915_gem_object_set_active_reference(batch->obj); i915_vma_get(batch); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index c838f7d08cb9..73462a65a330 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -130,13 +130,19 @@ static int emit_recurse_batch(struct hang *h, if (err) goto unpin_vma; - i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); + if (err) + goto unpin_hws; + if (!i915_gem_object_has_active_reference(vma->obj)) { i915_gem_object_get(vma->obj); i915_gem_object_set_active_reference(vma->obj); } - i915_vma_move_to_active(hws, rq, 0); + err = i915_vma_move_to_active(hws, rq, 0); + if (err) + goto unpin_hws; + if (!i915_gem_object_has_active_reference(hws->obj)) { i915_gem_object_get(hws->obj); i915_gem_object_set_active_reference(hws->obj); @@ -205,6 +211,7 @@ static int emit_recurse_batch(struct hang *h, err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); +unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 730a02d39058..636cb68191e3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -104,13 +104,19 @@ static int emit_recurse_batch(struct spinner *spin, if (err) goto unpin_vma; - i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); + if (err) + goto unpin_hws; + if (!i915_gem_object_has_active_reference(vma->obj)) { i915_gem_object_get(vma->obj); i915_gem_object_set_active_reference(vma->obj); } - i915_vma_move_to_active(hws, rq, 0); + err = i915_vma_move_to_active(hws, rq, 0); + if (err) + goto unpin_hws; + if (!i915_gem_object_has_active_reference(hws->obj)) { i915_gem_object_get(hws->obj); i915_gem_object_set_active_reference(hws->obj); @@ -134,6 +140,7 @@ static int emit_recurse_batch(struct spinner *spin, err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); +unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 4a9dc01a364a..fafdec3fe83e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -49,6 +49,10 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) goto err_pin; } + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto err_req; + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; if (INTEL_GEN(ctx->i915) >= 8) srm++; @@ -67,8 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) } intel_ring_advance(rq, cs); - i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_gem_object_get(result); i915_gem_object_set_active_reference(result); From e6bb1d7f1adfcd24ac7d82ab157a8b8809a0d2c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:39:45 +0100 Subject: [PATCH 103/114] drm/i915: Move i915_vma_move_to_active() to i915_vma.c i915_vma_move_to_active() has grown beyond its execbuf origins, and should take its rightful place in i915_vma.c as a method for i915_vma! Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706103947.15919-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 -- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ---------------------- drivers/gpu/drm/i915/i915_vma.c | 61 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_vma.h | 4 ++ 4 files changed, 65 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07846e63671d..c79008177708 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3090,9 +3090,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) } int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); -int __must_check i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 97136e4ce91d..3f0c612d42e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1868,67 +1868,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return true; } -static void export_fence(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - struct reservation_object *resv = vma->resv; - - /* - * Ignore errors from failing to allocate the new fence, we can't - * handle an error right now. Worst case should be missed - * synchronisation leading to rendering corruption. - */ - reservation_object_lock(resv, NULL); - if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &rq->fence); - else if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &rq->fence); - reservation_object_unlock(resv); -} - -int i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - struct drm_i915_gem_object *obj = vma->obj; - const unsigned int idx = rq->engine->id; - - lockdep_assert_held(&rq->i915->drm.struct_mutex); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - - /* - * Add a reference if we're newly entering the active list. - * The order in which we add operations to the retirement queue is - * vital here: mark_active adds to the start of the callback list, - * such that subsequent callbacks are called first. Therefore we - * add the active reference first and queue for it to be dropped - * *last*. - */ - if (!i915_vma_is_active(vma)) - obj->active_count++; - i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], rq); - list_move_tail(&vma->vm_link, &vma->vm->active_list); - - obj->write_domain = 0; - if (flags & EXEC_OBJECT_WRITE) { - obj->write_domain = I915_GEM_DOMAIN_RENDER; - - if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - i915_gem_active_set(&obj->frontbuffer_write, rq); - - obj->read_domains = 0; - } - obj->read_domains |= I915_GEM_GPU_DOMAINS; - - if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_gem_active_set(&vma->last_fence, rq); - - export_fence(vma, rq, flags); - return 0; -} - static int i915_reset_gen7_sol_offsets(struct i915_request *rq) { u32 *cs; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 518de47111ff..6f3a0f2296c2 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -859,6 +859,67 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +static void export_fence(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags) +{ + struct reservation_object *resv = vma->resv; + + /* + * Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + reservation_object_lock(resv, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &rq->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &rq->fence); + reservation_object_unlock(resv); +} + +int i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + const unsigned int idx = rq->engine->id; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + /* + * Add a reference if we're newly entering the active list. + * The order in which we add operations to the retirement queue is + * vital here: mark_active adds to the start of the callback list, + * such that subsequent callbacks are called first. Therefore we + * add the active reference first and queue for it to be dropped + * *last*. + */ + if (!i915_vma_is_active(vma)) + obj->active_count++; + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], rq); + list_move_tail(&vma->vm_link, &vma->vm->active_list); + + obj->write_domain = 0; + if (flags & EXEC_OBJECT_WRITE) { + obj->write_domain = I915_GEM_DOMAIN_RENDER; + + if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) + i915_gem_active_set(&obj->frontbuffer_write, rq); + + obj->read_domains = 0; + } + obj->read_domains |= I915_GEM_GPU_DOMAINS; + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + i915_gem_active_set(&vma->last_fence, rq); + + export_fence(vma, rq, flags); + return 0; +} + int i915_vma_unbind(struct i915_vma *vma) { unsigned long active; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 66a228931517..a218b689e418 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -215,6 +215,10 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, return vma->active & BIT(engine); } +int __must_check i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags); + static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); From 5c3f8c221c77ccdce3c2a8b96d196e5f4e2dac0c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:39:46 +0100 Subject: [PATCH 104/114] drm/i915: Track vma activity per fence.context, not per engine In the next patch, we will want to be able to use more flexible request timelines that can hop between engines. From the vma pov, we can then not rely on the binding of this request to an engine and so can not ensure that different requests are ordered through a per-engine timeline, and so we must track activity of all timelines. (We track activity on the vma itself to prevent unbinding from HW before the HW has finished accessing it.) v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree index is fraught with aliasing of unsigned longs). v3: s/lookup_active/active_instance/ because we can never agree on names Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706103947.15919-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +- drivers/gpu/drm/i915/i915_gpu_error.c | 14 +--- drivers/gpu/drm/i915/i915_gpu_error.h | 2 +- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/i915_vma.c | 112 +++++++++++++++++++------- drivers/gpu/drm/i915/i915_vma.h | 46 +++-------- 6 files changed, 100 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e2793231ef49..4db31aaaa9d3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2074,7 +2074,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) struct drm_i915_private *i915 = ppgtt->base.vm.i915; struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - int i; GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(size > ggtt->vm.total); @@ -2083,14 +2082,14 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) - init_request_active(&vma->last_read[i], NULL); init_request_active(&vma->last_fence, NULL); vma->vm = &ggtt->vm; vma->ops = &pd_vma_ops; vma->private = ppgtt; + vma->active = RB_ROOT; + vma->size = size; vma->fence_size = size; vma->flags = I915_VMA_GGTT; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index df524c9cad40..8c81cf3aa182 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { - int i; - err_printf(m, "%s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x_%08x %8u %02x %02x [ ", + err_printf(m, " %08x_%08x %8u %02x %02x %02x", upper_32_bits(err->gtt_offset), lower_32_bits(err->gtt_offset), err->size, err->read_domains, - err->write_domain); - for (i = 0; i < I915_NUM_ENGINES; i++) - err_printf(m, "%02x ", err->rseqno[i]); - - err_printf(m, "] %02x", err->wseqno); + err->write_domain, + err->wseqno); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; - int i; err->size = obj->base.size; err->name = obj->base.name; - for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = __active_get_seqno(&vma->last_read[i]); err->wseqno = __active_get_seqno(&obj->frontbuffer_write); err->engine = __active_get_engine_id(&obj->frontbuffer_write); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 58910f1dc67c..f893a4e8b783 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -177,7 +177,7 @@ struct i915_gpu_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 rseqno[I915_NUM_ENGINES], wseqno; + u32 wseqno; u64 gtt_offset; u32 read_domains; u32 write_domain; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a355a081485f..e1c9365dfefb 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -380,6 +380,7 @@ static inline void init_request_active(struct i915_gem_active *active, i915_gem_retire_fn retire) { + RCU_INIT_POINTER(active->request, NULL); INIT_LIST_HEAD(&active->link); active->retire = retire ?: i915_gem_retire_noop; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 6f3a0f2296c2..b4cc98330225 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) #endif +struct i915_vma_active { + struct i915_gem_active base; + struct i915_vma *vma; + struct rb_node node; + u64 timeline; +}; + static void -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) { - const unsigned int idx = rq->engine->id; - struct i915_vma *vma = - container_of(active, struct i915_vma, last_read[idx]); struct drm_i915_gem_object *obj = vma->obj; - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); - - i915_vma_clear_active(vma, idx); - if (i915_vma_is_active(vma)) + GEM_BUG_ON(!i915_vma_is_active(vma)); + if (--vma->active_count) return; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) } } +static void +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq) +{ + struct i915_vma_active *active = + container_of(base, typeof(*active), base); + + __i915_vma_retire(active->vma, rq); +} + static struct i915_vma * vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj, { struct i915_vma *vma; struct rb_node *rb, **p; - int i; /* The aliasing_ppgtt should never be used directly! */ GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) - init_request_active(&vma->last_read[i], i915_vma_retire); + vma->active = RB_ROOT; + init_request_active(&vma->last_fence, NULL); vma->vm = vm; vma->ops = &vm->vma_ops; @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma) static void __i915_vma_destroy(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - int i; + struct i915_vma_active *iter, *n; GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(vma->fence); - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) - GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i])); GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); list_del(&vma->obj_link); @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma) if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { + GEM_BUG_ON(i915_gem_active_isset(&iter->base)); + kfree(iter); + } + kmem_cache_free(i915->vmas, vma); } @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma, reservation_object_unlock(resv); } +static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx) +{ + struct i915_vma_active *active; + struct rb_node **p, *parent; + + parent = NULL; + p = &vma->active.rb_node; + while (*p) { + parent = *p; + + active = rb_entry(parent, struct i915_vma_active, node); + if (active->timeline == idx) + return &active->base; + + if (active->timeline < idx) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + + active = kmalloc(sizeof(*active), GFP_KERNEL); + if (unlikely(!active)) + return ERR_PTR(-ENOMEM); + + init_request_active(&active->base, i915_vma_retire); + active->vma = vma; + active->timeline = idx; + + rb_link_node(&active->node, parent, p); + rb_insert_color(&active->node, &vma->active); + + return &active->base; +} + int i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; - const unsigned int idx = rq->engine->id; + struct i915_gem_active *active; lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + active = active_instance(vma, rq->fence.context); + if (IS_ERR(active)) + return PTR_ERR(active); + /* * Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_vma_is_active(vma)) + if (!i915_gem_active_isset(active) && !vma->active_count++) { + list_move_tail(&vma->vm_link, &vma->vm->active_list); obj->active_count++; - i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], rq); - list_move_tail(&vma->vm_link, &vma->vm->active_list); + } + i915_gem_active_set(active, rq); + GEM_BUG_ON(!i915_vma_is_active(vma)); + GEM_BUG_ON(!obj->active_count); obj->write_domain = 0; if (flags & EXEC_OBJECT_WRITE) { @@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma, int i915_vma_unbind(struct i915_vma *vma) { - unsigned long active; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma) * have side-effects such as unpinning or even unbinding this vma. */ might_sleep(); - active = i915_vma_get_active(vma); - if (active) { - int idx; + if (i915_vma_is_active(vma)) { + struct i915_vma_active *active, *n; /* * When a closed VMA is retired, it is unbound - eek. @@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma) */ __i915_vma_pin(vma); - for_each_active(active, idx) { - ret = i915_gem_active_retire(&vma->last_read[idx], + rbtree_postorder_for_each_entry_safe(active, n, + &vma->active, node) { + ret = i915_gem_active_retire(&active->base, &vma->vm->i915->drm.struct_mutex); if (ret) - break; - } - - if (!ret) { - ret = i915_gem_active_retire(&vma->last_fence, - &vma->vm->i915->drm.struct_mutex); + goto unpin; } + ret = i915_gem_active_retire(&vma->last_fence, + &vma->vm->i915->drm.struct_mutex); +unpin: __i915_vma_unpin(vma); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index a218b689e418..c297b0a0dc47 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -26,6 +26,7 @@ #define __I915_VMA_H__ #include +#include #include @@ -94,8 +95,8 @@ struct i915_vma { #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) #define I915_VMA_GGTT_WRITE BIT(12) - unsigned int active; - struct i915_gem_active last_read[I915_NUM_ENGINES]; + unsigned int active_count; + struct rb_root active; struct i915_gem_active last_fence; /** @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj, void i915_vma_unpin_and_release(struct i915_vma **p_vma); +static inline bool i915_vma_is_active(struct i915_vma *vma) +{ + return vma->active_count; +} + +int __must_check i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags); + static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) { return vma->flags & I915_VMA_GGTT; @@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma) return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); } -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) -{ - return vma->active; -} - -static inline bool i915_vma_is_active(const struct i915_vma *vma) -{ - return i915_vma_get_active(vma); -} - -static inline void i915_vma_set_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active |= BIT(engine); -} - -static inline void i915_vma_clear_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active &= ~BIT(engine); -} - -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, - unsigned int engine) -{ - return vma->active & BIT(engine); -} - -int __must_check i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags); - static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); From 8b293eb53a7d7605f762351918083a4e402dc784 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 13:31:57 +0100 Subject: [PATCH 105/114] drm/i915: Track the last-active inside the i915_vma Using a VMA on more than one timeline concurrently is the exception rather than the rule (using it concurrently on multiple engines). As we expect to only use one active tracker, store the most recently used tracker inside the i915_vma itself and only fallback to the rbtree if we need a second or more concurrent active trackers. v2: Comments on how we overwrite any existing last_active cache. v3: __list_del_entry() before list_replace_init() is confusing and, much more important, entirely redundant. v4: Note that both last_active and the rbtree may be simultaneously tracking this timeline, albeit with different requests, and so the vma may be retired twice for the same timeline. v5: No, that list_del is required! Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706123157.9645-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 58 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_vma.h | 1 + 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index b4cc98330225..ed4e0fb558f7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq) __i915_vma_retire(active->vma, rq); } +static void +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq) +{ + __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq); +} + static struct i915_vma * vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj, vma->active = RB_ROOT; + init_request_active(&vma->last_active, i915_vma_last_retire); init_request_active(&vma->last_fence, NULL); vma->vm = vm; vma->ops = &vm->vma_ops; @@ -895,6 +902,29 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx) { struct i915_vma_active *active; struct rb_node **p, *parent; + struct i915_request *old; + + /* + * We track the most recently used timeline to skip a rbtree search + * for the common case, under typical loads we never need the rbtree + * at all. We can reuse the last_active slot if it is empty, that is + * after the previous activity has been retired, or if the active + * matches the current timeline. + * + * Note that we allow the timeline to be active simultaneously in + * the rbtree and the last_active cache. We do this to avoid having + * to search and replace the rbtree element for a new timeline, with + * the cost being that we must be aware that the vma may be retired + * twice for the same timeline (as the older rbtree element will be + * retired before the new request added to last_active). + */ + old = i915_gem_active_raw(&vma->last_active, + &vma->vm->i915->drm.struct_mutex); + if (!old || old->fence.context == idx) + goto out; + + /* Move the currently active fence into the rbtree */ + idx = old->fence.context; parent = NULL; p = &vma->active.rb_node; @@ -903,7 +933,7 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx) active = rb_entry(parent, struct i915_vma_active, node); if (active->timeline == idx) - return &active->base; + goto replace; if (active->timeline < idx) p = &parent->rb_right; @@ -922,7 +952,26 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx) rb_link_node(&active->node, parent, p); rb_insert_color(&active->node, &vma->active); - return &active->base; +replace: + /* + * Overwrite the previous active slot in the rbtree with last_active, + * leaving last_active zeroed. If the previous slot is still active, + * we must be careful as we now only expect to receive one retire + * callback not two, and so much undo the active counting for the + * overwritten slot. + */ + if (i915_gem_active_isset(&active->base)) { + /* Retire ourselves from the old rq->active_list */ + __list_del_entry(&active->base.link); + vma->active_count--; + GEM_BUG_ON(!vma->active_count); + } + GEM_BUG_ON(list_empty(&vma->last_active.link)); + list_replace_init(&vma->last_active.link, &active->base.link); + active->base.request = fetch_and_zero(&vma->last_active.request); + +out: + return &vma->last_active; } int i915_vma_move_to_active(struct i915_vma *vma, @@ -1002,6 +1051,11 @@ int i915_vma_unbind(struct i915_vma *vma) */ __i915_vma_pin(vma); + ret = i915_gem_active_retire(&vma->last_active, + &vma->vm->i915->drm.struct_mutex); + if (ret) + goto unpin; + rbtree_postorder_for_each_entry_safe(active, n, &vma->active, node) { ret = i915_gem_active_retire(&active->base, diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index c297b0a0dc47..f06d66377107 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -97,6 +97,7 @@ struct i915_vma { unsigned int active_count; struct rb_root active; + struct i915_gem_active last_active; struct i915_gem_active last_fence; /** From 3a32497f0dbe170794e1506deb41dc44c4fea8d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 18:49:26 +0100 Subject: [PATCH 106/114] drm/i915/selftests: Provide full mb() around clflush clflush is an unserialised instruction and the IA manual strongly advises you to serialise it with a mb. To be cautious, apply one before and one after, so that it is serialised with both writes and reads without worrying too much about the required direction. Signed-off-by: Chris Wilson Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180706174926.4712-1-chris@chris-wilson.co.uk --- .../drm/i915/selftests/i915_gem_coherency.c | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 1de7c1402fd5..3a095c37c120 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -42,11 +42,21 @@ static int cpu_set(struct drm_i915_gem_object *obj, page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); - if (needs_clflush & CLFLUSH_BEFORE) + + if (needs_clflush & CLFLUSH_BEFORE) { + mb(); clflush(map+offset_in_page(offset) / sizeof(*map)); + mb(); + } + map[offset_in_page(offset) / sizeof(*map)] = v; - if (needs_clflush & CLFLUSH_AFTER) + + if (needs_clflush & CLFLUSH_AFTER) { + mb(); clflush(map+offset_in_page(offset) / sizeof(*map)); + mb(); + } + kunmap_atomic(map); i915_gem_obj_finish_shmem_access(obj); @@ -68,8 +78,13 @@ static int cpu_get(struct drm_i915_gem_object *obj, page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); - if (needs_clflush & CLFLUSH_BEFORE) + + if (needs_clflush & CLFLUSH_BEFORE) { + mb(); clflush(map+offset_in_page(offset) / sizeof(*map)); + mb(); + } + *v = map[offset_in_page(offset) / sizeof(*map)]; kunmap_atomic(map); From 07e070e1e3b24e4b66a3b5e55e6486d7fe9769dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 21:59:47 +0100 Subject: [PATCH 107/114] drm/i915/selftests: Avoid warning if runtime pm is disabled Inside the mock GEM device, we try to grab the runtime pm for the fake device to prevent it from ever suspending. However, if CONFIG_PM is not set, trying to obtain the wakref returns an error which we WARN about. Suppress the expected warning. Signed-off-by: Chris Wilson Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180706205947.11209-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index c97075c5ccaf..43ed8b28aeaa 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -157,7 +157,8 @@ struct drm_i915_private *mock_gem_device(void) dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - WARN_ON(pm_runtime_get_sync(&pdev->dev)); + if (pm_runtime_enabled(&pdev->dev)) + WARN_ON(pm_runtime_get_sync(&pdev->dev)); i915 = (struct drm_i915_private *)(pdev + 1); pci_set_drvdata(pdev, i915); From 890fd185d53037d05d10a0825950c4b038e39d4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 22:07:10 +0100 Subject: [PATCH 108/114] drm/i915: Replace nested subclassing with explicit subclasses In the next patch, we will want a third distinct class of timeline that may overlap with the current pair of client and engine timeline classes. Rather than use the ad hoc markup of SINGLE_DEPTH_NESTING, initialise the different timeline classes with an explicit subclass. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706210710.16251-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_timeline.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/selftests/mock_engine.c | 2 ++ 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e05cf114083..1a9dab302433 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3095,7 +3095,7 @@ static void engine_skip_context(struct i915_request *request) GEM_BUG_ON(timeline == &engine->timeline); spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&timeline->lock); list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->gem_context == hung_ctx) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7ae08b68121e..3248369dbcfb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -503,7 +503,7 @@ static void move_to_timeline(struct i915_request *request, GEM_BUG_ON(request->timeline == &request->engine->timeline); lockdep_assert_held(&request->engine->timeline.lock); - spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&request->timeline->lock); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); } diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index dc2a4632faa7..a2c2c3ab5fb0 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -37,6 +37,8 @@ struct i915_timeline { u32 seqno; spinlock_t lock; +#define TIMELINE_CLIENT 0 /* default subclass */ +#define TIMELINE_ENGINE 1 /** * List of breadcrumbs associated with GPU requests currently diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e2f562853aee..0ac497275a51 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -483,6 +483,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) void intel_engine_setup_common(struct intel_engine_cs *engine) { i915_timeline_init(engine->i915, &engine->timeline, engine->name); + lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE); intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index c2a0451336cf..22a73da45ad5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -200,6 +200,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.submit_request = mock_submit_request; i915_timeline_init(i915, &engine->base.timeline, engine->base.name); + lockdep_set_subclass(&engine->base.timeline.lock, TIMELINE_ENGINE); + intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ From e147913247e3a7ea98e72e8537336bcea06c2405 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 7 Jul 2018 11:04:05 +0100 Subject: [PATCH 109/114] drm/i915/selftests: Magic numbers for old Y-tiling i915g has a slightly different tiling layout, and so requires a different reference swizzle pattern. Testcase: igt/drv_selftests/live_objects #gdg Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180707100405.817-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 25c2b2d433bd..f4a5099c75b5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -169,9 +169,16 @@ static u64 tiled_offset(const struct tile *tile, u64 v) v += y * tile->width; v += div64_u64_rem(x, tile->width, &x) << tile->size; v += x; - } else { + } else if (tile->width == 128) { const unsigned int ytile_span = 16; - const unsigned int ytile_height = 32 * ytile_span; + const unsigned int ytile_height = 512; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } else { + const unsigned int ytile_span = 32; + const unsigned int ytile_height = 256; v += y * ytile_span; v += div64_u64_rem(x, ytile_span, &x) * ytile_height; From ec625fb932bb057e2d3c2ed28eee56a827385ab8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jul 2018 13:20:42 +0100 Subject: [PATCH 110/114] drm/i915: Provide a timeout to i915_gem_wait_for_idle() Usually we have no idea about the upper bound we need to wait to catch up with userspace when idling the device, but in a few situations we know the system was idle beforehand and can provide a short timeout in order to very quickly catch a failure, long before hangcheck kicks in. In the following patches, we will use the timeout to curtain two overly long waits, where we know we can expect the GPU to complete within a reasonable time or declare it broken. In particular, with a broken GPU we expect it to fail during the initial GPU setup where do a couple of context switches to record the defaults. This is a task that takes a few milliseconds even on the slowest of devices, but we may have to wait 60s for hangcheck to give in and declare the machine inoperable. In this a case where any gpu hang is unacceptable, both from a timeliness and practical standpoint. The other improvement is that in selftests, we do not need to arm an independent timer to inject a wedge, as we can just limit the timeout on the wait directly. v2: Include the timeout parameter in the trace. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180709122044.7028-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 48 +++++++++++-------- drivers/gpu/drm/i915/i915_gem_evict.c | 3 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 11 +++-- drivers/gpu/drm/i915/i915_perf.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 6 ++- .../gpu/drm/i915/selftests/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 2 +- 11 files changed, 59 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 544e5e7f011f..099f97ef2303 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4105,7 +4105,8 @@ fault_irq_set(struct drm_i915_private *i915, err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE); + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (err) goto err_unlock; @@ -4210,7 +4211,8 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_ACTIVE) ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (val & DROP_RETIRE) i915_retire_requests(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c79008177708..fcb8f49a9b8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3157,7 +3157,7 @@ void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_fini(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); + unsigned int flags, long timeout); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1a9dab302433..91d705a67d38 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2267,7 +2267,9 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) /* Attempt to reap some mmap space from dead objects */ do { - err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); + err = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (err) break; @@ -3742,14 +3744,14 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) +static long wait_for_timeline(struct i915_timeline *tl, + unsigned int flags, long timeout) { struct i915_request *rq; - long ret; rq = i915_gem_active_get_unlocked(&tl->last_request); if (!rq) - return 0; + return timeout; /* * "Race-to-idle". @@ -3763,10 +3765,10 @@ static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) if (flags & I915_WAIT_FOR_IDLE_BOOST) gen6_rps_boost(rq, NULL); - ret = i915_request_wait(rq, flags, MAX_SCHEDULE_TIMEOUT); + timeout = i915_request_wait(rq, flags, timeout); i915_request_put(rq); - return ret < 0 ? ret : 0; + return timeout; } static int wait_for_engines(struct drm_i915_private *i915) @@ -3782,10 +3784,12 @@ static int wait_for_engines(struct drm_i915_private *i915) return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) +int i915_gem_wait_for_idle(struct drm_i915_private *i915, + unsigned int flags, long timeout) { - GEM_TRACE("flags=%x (%s)\n", - flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked"); + GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", + flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", + timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) @@ -3798,9 +3802,9 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - err = wait_for_timeline(tl, flags); - if (err) - return err; + timeout = wait_for_timeline(tl, flags, timeout); + if (timeout < 0) + return timeout; } err = wait_for_engines(i915); @@ -3812,12 +3816,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) } else { struct intel_engine_cs *engine; enum intel_engine_id id; - int err; for_each_engine(engine, i915, id) { - err = wait_for_timeline(&engine->timeline, flags); - if (err) - return err; + struct i915_timeline *tl = &engine->timeline; + + timeout = wait_for_timeline(tl, flags, timeout); + if (timeout < 0) + return timeout; } } @@ -5052,7 +5057,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST); + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); if (ret && ret != -EIO) goto err_unlock; @@ -5356,7 +5362,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_active; - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) goto err_active; @@ -5421,7 +5429,9 @@ err_active: if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) goto out_ctx; - if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + if (WARN_ON(i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT))) goto out_ctx; i915_gem_contexts_lost(i915); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 54814a196ee4..02b83a5ed96c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -69,7 +69,8 @@ static int ggtt_flush(struct drm_i915_private *i915) err = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4db31aaaa9d3..210baf3c8d11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2793,7 +2793,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, 0)) { + if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 55e84e71f526..c61f5b80fee3 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -172,7 +172,9 @@ i915_gem_shrink(struct drm_i915_private *i915, * we will free as much as we can and hope to get a second chance. */ if (flags & I915_SHRINK_ACTIVE) - i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); trace_i915_gem_shrink(i915, target, flags); i915_retire_requests(i915); @@ -392,7 +394,8 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); do { - if (i915_gem_wait_for_idle(i915, 0) == 0 && + if (i915_gem_wait_for_idle(i915, + 0, MAX_SCHEDULE_TIMEOUT) == 0 && shrinker_lock(i915, unlock)) break; @@ -466,7 +469,9 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 447407fee3b8..6bf10952c724 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1836,7 +1836,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * So far the best way to work around this issue seems to be draining * the GPU from any submitted work. */ - ret = i915_gem_wait_for_idle(dev_priv, wait_flags); + ret = i915_gem_wait_for_idle(dev_priv, + wait_flags, + MAX_SCHEDULE_TIMEOUT); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3248369dbcfb..5c2c93cbab12 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -206,7 +206,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* Carefully retire all requests without writing to the rings */ ret = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -735,7 +736,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* Ratelimit ourselves to prevent oom from malicious clients */ ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE); + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (ret) goto err_unreserve; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 5fbe15f4effd..ab2590242033 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -478,7 +478,9 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, } } - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 43995fc3534d..c4aac6141e04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -286,7 +286,9 @@ static int begin_live_test(struct live_test *t, t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 0d06f559243f..09ab037ce803 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -64,7 +64,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) } wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); + i915_gem_wait_for_idle(i915, flags, MAX_SCHEDULE_TIMEOUT); return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; } From 2621cefaa42b3a7455d30e78831c6b55290d40c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jul 2018 13:20:43 +0100 Subject: [PATCH 111/114] drm/i915: Provide a timeout to i915_gem_wait_for_idle() on setup With a broken GPU we expect it to fail during the initial GPU setup where do a couple of context switches to record the defaults. This is a task that takes a few milliseconds even on the slowest of devices, but we may have to wait 60s for hangcheck to give in and declare the machine inoperable. In this a case where any gpu hang is unacceptable, both from a timeliness and practical standpoint. We can therefore set a timeout on our wait-for-idle that is shorter than the hangcheck (which may be up to 60s for a declaring a wedged driver) and so detect the broken GPU much more quickly during driver load (and so prevent stalling userspace for ages). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180709122044.7028-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 91d705a67d38..b35cbfd16c9c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5362,11 +5362,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_active; - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) + if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { + i915_gem_set_wedged(i915); + err = -EIO; /* Caller will declare us wedged */ goto err_active; + } assert_kernel_context_is_current(i915); From d9a13ce3fa5f396f100ab3f58e5ca127c77a74bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jul 2018 13:20:44 +0100 Subject: [PATCH 112/114] drm/i915/selftests: Replace wait-on-timeout with explicit timeout In igt_flush_test() we install a background timer in order to ensure that the wait completes within a certain time. We can now tell the wait that it has to complete within a timeout, and so no longer need the background timer. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180709122044.7028-3-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/igt_flush_test.c | 55 +++---------------- 1 file changed, 9 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 09ab037ce803..af66e3d4e23a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -9,52 +9,8 @@ #include "../i915_selftest.h" #include "igt_flush_test.h" -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); - - GEM_TRACE("%pS timed out.\n", w->symbol); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) { - struct wedge_me w; - cond_resched(); if (flags & I915_WAIT_LOCKED && @@ -63,8 +19,15 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) i915_gem_set_wedged(i915); } - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags, MAX_SCHEDULE_TIMEOUT); + if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); + + GEM_TRACE("%pS timed out.\n", __builtin_return_address(0)); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(i915); + } return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; } From 932cac10c8fb07e72a1eba28e71b92f7f2cd756e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jul 2018 14:01:58 +0100 Subject: [PATCH 113/114] drm/i915/selftests: Prevent background reaping of active objects igt_mmap_offset_exhaustion() wants to test what happens when the mmap space is filled with zombie objects, objects discarded by userspace but still active on the GPU. As they are only protected by the active reference, we have to be certain that active reference is kept while we peek into our dangling pointer. That active reference should not be freed until we retire, but we do that retirement from a background thread. This leaves us with a subtle timing problem, exacerbated and highlighted by KASAN: <3>[ 132.380399] BUG: KASAN: use-after-free in drm_gem_create_mmap_offset+0x8c/0xd0 <3>[ 132.380430] Read of size 8 at addr ffff8801e13245f8 by task drv_selftest/5822 <4>[ 132.380470] CPU: 0 PID: 5822 Comm: drv_selftest Tainted: G U 4.18.0-rc3-g7ae7763aa2be-kasan_48+ #1 <4>[ 132.380473] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 <4>[ 132.380475] Call Trace: <4>[ 132.380481] dump_stack+0x7c/0xbb <4>[ 132.380487] print_address_description+0x65/0x270 <4>[ 132.380493] kasan_report+0x25b/0x380 <4>[ 132.380497] ? drm_gem_create_mmap_offset+0x8c/0xd0 <4>[ 132.380503] drm_gem_create_mmap_offset+0x8c/0xd0 <4>[ 132.380584] i915_gem_object_create_mmap_offset+0x6d/0x100 [i915] <4>[ 132.380650] igt_mmap_offset_exhaustion+0x462/0x940 [i915] <4>[ 132.380714] ? i915_gem_close_object+0x740/0x740 [i915] <4>[ 132.380784] ? igt_gem_huge+0x269/0x3d0 [i915] <4>[ 132.380865] __i915_subtests+0x5a/0x160 [i915] <4>[ 132.380936] __run_selftests+0x1a2/0x2f0 [i915] <4>[ 132.381008] i915_live_selftests+0x4e/0x80 [i915] <4>[ 132.381071] i915_pci_probe+0xd8/0x1b0 [i915] <4>[ 132.381077] pci_device_probe+0x1c5/0x3a0 <4>[ 132.381087] driver_probe_device+0x6b6/0xcb0 <4>[ 132.381094] __driver_attach+0x22d/0x2c0 <4>[ 132.381100] ? driver_probe_device+0xcb0/0xcb0 <4>[ 132.381103] bus_for_each_dev+0x113/0x1a0 <4>[ 132.381108] ? check_flags.part.24+0x450/0x450 <4>[ 132.381112] ? subsys_dev_iter_exit+0x10/0x10 <4>[ 132.381123] bus_add_driver+0x38b/0x6e0 <4>[ 132.381131] driver_register+0x189/0x400 <4>[ 132.381136] ? 0xffffffffc12d8000 <4>[ 132.381140] do_one_initcall+0xa0/0x4c0 <4>[ 132.381145] ? initcall_blacklisted+0x180/0x180 <4>[ 132.381152] ? do_init_module+0x4a/0x54c <4>[ 132.381156] ? rcu_lockdep_current_cpu_online+0xdc/0x130 <4>[ 132.381161] ? kasan_unpoison_shadow+0x30/0x40 <4>[ 132.381169] do_init_module+0x1b5/0x54c <4>[ 132.381177] load_module+0x619e/0x9b70 <4>[ 132.381202] ? module_frob_arch_sections+0x20/0x20 <4>[ 132.381211] ? vfs_read+0x257/0x2f0 <4>[ 132.381214] ? vfs_read+0x257/0x2f0 <4>[ 132.381221] ? kernel_read+0x8b/0x130 <4>[ 132.381231] ? copy_strings_kernel+0x120/0x120 <4>[ 132.381244] ? __se_sys_finit_module+0x17c/0x1a0 <4>[ 132.381248] __se_sys_finit_module+0x17c/0x1a0 <4>[ 132.381252] ? __ia32_sys_init_module+0xa0/0xa0 <4>[ 132.381261] ? __se_sys_newstat+0x77/0xd0 <4>[ 132.381265] ? cp_new_stat+0x590/0x590 <4>[ 132.381269] ? kmem_cache_free+0x2f0/0x340 <4>[ 132.381285] do_syscall_64+0x97/0x400 <4>[ 132.381292] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 132.381295] RIP: 0033:0x7eff4af46839 <4>[ 132.381297] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 <4>[ 132.381426] RSP: 002b:00007ffcd84f4cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 <4>[ 132.381432] RAX: ffffffffffffffda RBX: 000055dfdeb429a0 RCX: 00007eff4af46839 <4>[ 132.381435] RDX: 0000000000000000 RSI: 000055dfdeb43670 RDI: 0000000000000004 <4>[ 132.381437] RBP: 000055dfdeb43670 R08: 0000000000000004 R09: 0000000000000000 <4>[ 132.381440] R10: 00007ffcd84f4e60 R11: 0000000000000246 R12: 0000000000000000 <4>[ 132.381442] R13: 000055dfdeb3bec0 R14: 0000000000000000 R15: 000000000000003b <3>[ 132.381466] Allocated by task 5822: <4>[ 132.381485] kmem_cache_alloc+0xdf/0x2e0 <4>[ 132.381546] i915_gem_object_create_internal+0x24/0x1e0 [i915] <4>[ 132.381609] igt_mmap_offset_exhaustion+0x257/0x940 [i915] <4>[ 132.381677] __i915_subtests+0x5a/0x160 [i915] <4>[ 132.381742] __run_selftests+0x1a2/0x2f0 [i915] <4>[ 132.381806] i915_live_selftests+0x4e/0x80 [i915] <4>[ 132.381865] i915_pci_probe+0xd8/0x1b0 [i915] <4>[ 132.381868] pci_device_probe+0x1c5/0x3a0 <4>[ 132.381871] driver_probe_device+0x6b6/0xcb0 <4>[ 132.381874] __driver_attach+0x22d/0x2c0 <4>[ 132.381877] bus_for_each_dev+0x113/0x1a0 <4>[ 132.381880] bus_add_driver+0x38b/0x6e0 <4>[ 132.381884] driver_register+0x189/0x400 <4>[ 132.381886] do_one_initcall+0xa0/0x4c0 <4>[ 132.381889] do_init_module+0x1b5/0x54c <4>[ 132.381892] load_module+0x619e/0x9b70 <4>[ 132.381895] __se_sys_finit_module+0x17c/0x1a0 <4>[ 132.381898] do_syscall_64+0x97/0x400 <4>[ 132.381901] entry_SYSCALL_64_after_hwframe+0x49/0xbe <3>[ 132.381914] Freed by task 150: <4>[ 132.381931] kmem_cache_free+0xb7/0x340 <4>[ 132.381995] __i915_gem_free_objects+0x875/0xf50 [i915] <4>[ 132.382054] __i915_gem_free_work+0x69/0xb0 [i915] <4>[ 132.382058] process_one_work+0x78b/0x1740 <4>[ 132.382061] worker_thread+0x82/0xb80 <4>[ 132.382064] kthread+0x30c/0x3d0 <4>[ 132.382067] ret_from_fork+0x3a/0x50 <3>[ 132.382081] The buggy address belongs to the object at ffff8801e1324500 which belongs to the cache drm_i915_gem_object of size 1168 <3>[ 132.382133] The buggy address is located 248 bytes inside of 1168-byte region [ffff8801e1324500, ffff8801e1324990) <3>[ 132.382179] The buggy address belongs to the page: <0>[ 132.382202] page:ffffea000784c800 count:1 mapcount:0 mapping:ffff8801dedf6500 index:0xffff8801e1323ec0 compound_mapcount: 0 <0>[ 132.382251] flags: 0x8000000000008100(slab|head) <1>[ 132.382274] raw: 8000000000008100 ffff8801d6317440 ffff8801d6317440 ffff8801dedf6500 <1>[ 132.382307] raw: ffff8801e1323ec0 0000000000140013 00000001ffffffff 0000000000000000 <1>[ 132.382339] page dumped because: kasan: bad access detected <3>[ 132.382373] Memory state around the buggy address: <3>[ 132.382395] ffff8801e1324480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc <3>[ 132.382426] ffff8801e1324500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb <3>[ 132.382457] >ffff8801e1324580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb <3>[ 132.382488] ^ <3>[ 132.382517] ffff8801e1324600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb <3>[ 132.382548] ffff8801e1324680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb This patch tricks the system into running without the background retire thread, until after we finish the test. The only reaping should then be performed by the mmap offset routine to reclaim the space as required. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180709130208.11730-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_object.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index f4a5099c75b5..d77acf4cc439 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -507,6 +507,15 @@ static int igt_mmap_offset_exhaustion(void *arg) u64 hole_start, hole_end; int loop, err; + /* Disable background reaper */ + mutex_lock(&i915->drm.struct_mutex); + if (!i915->gt.active_requests++) + i915_gem_unpark(i915); + mutex_unlock(&i915->drm.struct_mutex); + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); + GEM_BUG_ON(!i915->gt.awake); + /* Trim the device mmap space to only a page */ memset(&resv, 0, sizeof(resv)); drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { @@ -515,7 +524,7 @@ static int igt_mmap_offset_exhaustion(void *arg) err = drm_mm_reserve_node(mm, &resv); if (err) { pr_err("Failed to trim VMA manager, err=%d\n", err); - return err; + goto out_park; } break; } @@ -576,6 +585,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto err_obj; } + /* NB we rely on the _active_ reference to access obj now */ GEM_BUG_ON(!i915_gem_object_is_active(obj)); err = i915_gem_object_create_mmap_offset(obj); if (err) { @@ -587,6 +597,13 @@ static int igt_mmap_offset_exhaustion(void *arg) out: drm_mm_remove_node(&resv); +out_park: + mutex_lock(&i915->drm.struct_mutex); + if (--i915->gt.active_requests) + queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); + else + queue_delayed_work(i915->wq, &i915->gt.idle_work, 0); + mutex_unlock(&i915->drm.struct_mutex); return err; err_obj: i915_gem_object_put(obj); From 82edc7e8b8c06151bdc653935bc13b83e2f0fcfa Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 9 Jul 2018 15:39:27 -0700 Subject: [PATCH 114/114] drm/i915: Update DRIVER_DATE to 20180709 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fcb8f49a9b8a..a90a37ce2ef9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -86,8 +86,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180620" -#define DRIVER_TIMESTAMP 1529529048 +#define DRIVER_DATE "20180709" +#define DRIVER_TIMESTAMP 1531175967 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions