From 5e9e0a3a3be7cf51f80327c069a8706cb59d0c24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 28 Oct 2019 13:30:32 +0200 Subject: [PATCH 001/222] drm/i915: Fix max cursor size for i915g/gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently the 128x128 and 256x256 ARGB cursor modes were only added on LPT/CST. While the display section of bspec isn't super clear on the subject, it does highlight these two modes in a different color, has a few changlog entries indicating the 256x256 mode was added for a LPT DCN, and that the 128x128 mode was also added later (though no DCN/platform note there). The "device dependencies" bspec section does list the 256x256x32 as a new feature for LPT/CST, and goes on to mention that current hw only has the 64x64x32 mode (which reinforces the notion that the 128x128 mode was also added at the same time). Testing on actual hardware confirms all of this. CI shows all the 128x128 and 256x256 tests failing on GDG, and my ALV definitely doesn't like them. So we shall limit GDG/ALV to 64x64 only. And while at it let's adjust the mobile gen2 case to list the two platforms explicitly so that the if-ladder looks reasonably uniform. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028113036.27553-2-ville.syrjala@linux.intel.com Acked-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9c8af50011e7..bf8fd2f626a0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -17667,7 +17667,8 @@ static void intel_mode_config_init(struct drm_i915_private *i915) if (IS_I845G(i915) || IS_I865G(i915)) { mode_config->cursor_width = IS_I845G(i915) ? 64 : 512; mode_config->cursor_height = 1023; - } else if (IS_GEN(i915, 2)) { + } else if (IS_I830(i915) || IS_I85X(i915) || + IS_I915G(i915) || IS_I915GM(i915)) { mode_config->cursor_width = 64; mode_config->cursor_height = 64; } else { From 0e12b4e31f0e34e19327e3958ee2b1c0f0d4d5b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 28 Oct 2019 13:30:33 +0200 Subject: [PATCH 002/222] drm/i915: Fix overlay colorkey for 30bpp and 8bpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with the video sprites the colorkey is always specified as 8bpc. For 10bpc primary plane formats we just ignore the two lsbs of each component. For C8 we'll replicate the same key to each chanel, which is what the hardware wants. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028113036.27553-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_overlay.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 66711e62fa71..3a69ab159dd7 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -100,12 +100,15 @@ #define CLK_RGB24_MASK 0x0 #define CLK_RGB16_MASK 0x070307 #define CLK_RGB15_MASK 0x070707 -#define CLK_RGB8I_MASK 0xffffff +#define RGB30_TO_COLORKEY(c) \ + (((c & 0x3FC00000) >> 6) | ((c & 0x000FF000) >> 4) | ((c & 0x000003FC) >> 2)) #define RGB16_TO_COLORKEY(c) \ (((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3)) #define RGB15_TO_COLORKEY(c) \ (((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3)) +#define RGB8I_TO_COLORKEY(c) \ + (((c & 0xFF) << 16) | ((c & 0XFF) << 8) | ((c & 0xFF) << 0)) /* overlay flip addr flag */ #define OFC_UPDATE 0x1 @@ -682,8 +685,8 @@ static void update_colorkey(struct intel_overlay *overlay, switch (format) { case DRM_FORMAT_C8: - key = 0; - flags |= CLK_RGB8I_MASK; + key = RGB8I_TO_COLORKEY(key); + flags |= CLK_RGB24_MASK; break; case DRM_FORMAT_XRGB1555: key = RGB15_TO_COLORKEY(key); @@ -693,6 +696,11 @@ static void update_colorkey(struct intel_overlay *overlay, key = RGB16_TO_COLORKEY(key); flags |= CLK_RGB16_MASK; break; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + key = RGB30_TO_COLORKEY(key); + flags |= CLK_RGB24_MASK; + break; default: flags |= CLK_RGB24_MASK; break; From e0b5d48e555d88d4ebe335469ac953b159915d92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 28 Oct 2019 13:30:34 +0200 Subject: [PATCH 003/222] drm/i915: Configure overlay cc_out precision based on crtc gamma config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Put the overlay color conversion unit into 10bit mode if the pipe isn't using the 8bit legacy gamma. Not 100% sure this is what the intention of the bit was but makes at least some sense to me. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028113036.27553-4-ville.syrjala@linux.intel.com Acked-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_overlay.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 3a69ab159dd7..c2d89d7f8852 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -785,9 +785,13 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB); if (!overlay->active) { - u32 oconfig; + const struct intel_crtc_state *crtc_state = + overlay->crtc->config; + u32 oconfig = 0; - oconfig = OCONF_CC_OUT_8BIT; + if (crtc_state->gamma_enable && + crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + oconfig |= OCONF_CC_OUT_8BIT; if (IS_GEN(dev_priv, 4)) oconfig |= OCONF_CSC_MODE_BT709; oconfig |= pipe == 0 ? From 7cd0f22019feef75197d4b9c4d8eea1914bc0d13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 28 Oct 2019 13:30:35 +0200 Subject: [PATCH 004/222] drm/i915: Enable pipe gamma for the overlay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We pass the plane data through the pipe gamma for all the other planes. Can't see why we should treat the overlay differently, so let's enable pipe gamma for it as well. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028113036.27553-5-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_overlay.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index c2d89d7f8852..84270e65fb52 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -792,6 +792,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (crtc_state->gamma_enable && crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) oconfig |= OCONF_CC_OUT_8BIT; + if (crtc_state->gamma_enable) + oconfig |= OCONF_GAMMA2_ENABLE; if (IS_GEN(dev_priv, 4)) oconfig |= OCONF_CSC_MODE_BT709; oconfig |= pipe == 0 ? From 963f328b9cea33886cc4cd7a833bcd0f66074d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 28 Oct 2019 13:30:36 +0200 Subject: [PATCH 005/222] drm/i915: Protect overlay colorkey macro arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Put the customary () around the macro argument in the overlay colorkey macros. And while at switch to using a consistent case for the hex constants. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028113036.27553-6-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_overlay.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 84270e65fb52..52b4f6193b4c 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -102,13 +102,13 @@ #define CLK_RGB15_MASK 0x070707 #define RGB30_TO_COLORKEY(c) \ - (((c & 0x3FC00000) >> 6) | ((c & 0x000FF000) >> 4) | ((c & 0x000003FC) >> 2)) + ((((c) & 0x3fc00000) >> 6) | (((c) & 0x000ff000) >> 4) | (((c) & 0x000003fc) >> 2)) #define RGB16_TO_COLORKEY(c) \ - (((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3)) + ((((c) & 0xf800) << 8) | (((c) & 0x07e0) << 5) | (((c) & 0x001f) << 3)) #define RGB15_TO_COLORKEY(c) \ - (((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3)) + ((((c) & 0x7c00) << 9) | (((c) & 0x03e0) << 6) | (((c) & 0x001f) << 3)) #define RGB8I_TO_COLORKEY(c) \ - (((c & 0xFF) << 16) | ((c & 0XFF) << 8) | ((c & 0xFF) << 0)) + ((((c) & 0xff) << 16) | (((c) & 0xff) << 8) | (((c) & 0xff) << 0)) /* overlay flip addr flag */ #define OFC_UPDATE 0x1 From 794bdcf71f47b98f6e003190069d5064123067ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Apr 2020 23:30:50 +0200 Subject: [PATCH 006/222] drm/i915: avoid unused scale_user_to_hw() warning After the function is no longer marked 'inline', there is now a new warning pointing out that the only caller is inside of an #ifdef: drivers/gpu/drm/i915/display/intel_panel.c:493:12: warning: 'scale_user_to_hw' defined but not used [-Wunused-function] 493 | static u32 scale_user_to_hw(struct intel_connector *connector, | ^~~~~~~~~~~~~~~~ Move the function itself into that #ifdef as well. Fixes: 81b55ef1f47b ("drm/i915: drop a bunch of superfluous inlines") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200428213106.3139170-1-arnd@arndb.de --- drivers/gpu/drm/i915/display/intel_panel.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index aa931f9f0d6a..3c5056dbf607 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -489,16 +489,6 @@ static u32 scale(u32 source_val, return target_val; } -/* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */ -static u32 scale_user_to_hw(struct intel_connector *connector, - u32 user_level, u32 user_max) -{ - struct intel_panel *panel = &connector->panel; - - return scale(user_level, 0, user_max, - panel->backlight.min, panel->backlight.max); -} - /* Scale user_level in range [0..user_max] to [0..hw_max], clamping the result * to [hw_min..hw_max]. */ static u32 clamp_user_to_hw(struct intel_connector *connector, @@ -1255,6 +1245,16 @@ static u32 intel_panel_get_backlight(struct intel_connector *connector) return val; } +/* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */ +static u32 scale_user_to_hw(struct intel_connector *connector, + u32 user_level, u32 user_max) +{ + struct intel_panel *panel = &connector->panel; + + return scale(user_level, 0, user_max, + panel->backlight.min, panel->backlight.max); +} + /* set backlight brightness to level in range [0..max], scaling wrt hw min */ static void intel_panel_set_backlight(const struct drm_connector_state *conn_state, u32 user_level, u32 user_max) From 2f9078c34c448372da0bd830cc80caba544a950a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:10 +0200 Subject: [PATCH 007/222] drm/i915: Make skl_compute_dbuf_slices() behave consistently for all platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently skl_compute_dbuf_slices() returns 0 for any inactive pipe on icl+, but returns BIT(S1) on pre-icl for any pipe (whether it's active or not). Let's make the behaviour consistent and always return 0 for any inactive pipe. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-6-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 696491d71a1d..5c47b893e7b2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4606,7 +4606,7 @@ static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state, * For anything else just return one slice yet. * Should be extended for other platforms. */ - return BIT(DBUF_S1); + return active_pipes & BIT(pipe) ? BIT(DBUF_S1) : 0; } static u64 From b3f1ff5b5bf1fd1fefa917508ea1f2735070df93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:11 +0200 Subject: [PATCH 008/222] drm/i915: Polish some dbuf debugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polish some of the dbuf code to give more meaningful debug messages and whatnot. Also we can switch over to the per-device debugs/warns at the same time. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-7-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- .../drm/i915/display/intel_display_power.c | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 49998906cc61..f4734713643d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4491,10 +4491,12 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, - i915_reg_t reg, bool enable) +static void intel_dbuf_slice_set(struct drm_i915_private *dev_priv, + enum dbuf_slice slice, bool enable) { - u32 val, status; + i915_reg_t reg = DBUF_CTL_S(slice); + bool state; + u32 val; val = intel_de_read(dev_priv, reg); val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); @@ -4502,13 +4504,10 @@ static bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, intel_de_posting_read(dev_priv, reg); udelay(10); - status = intel_de_read(dev_priv, reg) & DBUF_POWER_STATE; - if ((enable && !status) || (!enable && status)) { - drm_err(&dev_priv->drm, "DBus power %s timeout!\n", - enable ? "enable" : "disable"); - return false; - } - return true; + state = intel_de_read(dev_priv, reg) & DBUF_POWER_STATE; + drm_WARN(&dev_priv->drm, enable != state, + "DBuf slice %d power %s timeout!\n", + slice, enable ? "enable" : "disable"); } static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) @@ -4524,12 +4523,13 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - int i; - int max_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; + int num_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; struct i915_power_domains *power_domains = &dev_priv->power_domains; + enum dbuf_slice slice; - drm_WARN(&dev_priv->drm, hweight8(req_slices) > max_slices, - "Invalid number of dbuf slices requested\n"); + drm_WARN(&dev_priv->drm, req_slices & ~(BIT(num_slices) - 1), + "Invalid set of dbuf slices (0x%x) requested (num dbuf slices %d)\n", + req_slices, num_slices); drm_dbg_kms(&dev_priv->drm, "Updating dbuf slices to 0x%x\n", req_slices); @@ -4543,11 +4543,9 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, */ mutex_lock(&power_domains->lock); - for (i = 0; i < max_slices; i++) { - intel_dbuf_slice_set(dev_priv, - DBUF_CTL_S(i), - (req_slices & BIT(i)) != 0); - } + for (slice = DBUF_S1; slice < num_slices; slice++) + intel_dbuf_slice_set(dev_priv, slice, + req_slices & BIT(slice)); dev_priv->enabled_dbuf_slices_mask = req_slices; From 56f48c1d44f6b884eed2ef8fdf5172bc295ff973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:12 +0200 Subject: [PATCH 009/222] drm/i915: Unify the low level dbuf code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The low level dbuf slice code is rather inconsitent with its functiona naming and organization. Make it more consistent. Also share the enable/disable functions between all platforms since the same code works just fine for all of them. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-8-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 6 +-- .../drm/i915/display/intel_display_power.c | 44 ++++++++----------- .../drm/i915/display/intel_display_power.h | 6 +-- 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bf8fd2f626a0..5bb666615f75 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15213,9 +15213,8 @@ static void icl_dbuf_slice_pre_update(struct intel_atomic_state *state) u8 required_slices = state->enabled_dbuf_slices_mask; u8 slices_union = hw_enabled_slices | required_slices; - /* If 2nd DBuf slice required, enable it here */ if (INTEL_GEN(dev_priv) >= 11 && slices_union != hw_enabled_slices) - icl_dbuf_slices_update(dev_priv, slices_union); + gen9_dbuf_slices_update(dev_priv, slices_union); } static void icl_dbuf_slice_post_update(struct intel_atomic_state *state) @@ -15224,9 +15223,8 @@ static void icl_dbuf_slice_post_update(struct intel_atomic_state *state) u8 hw_enabled_slices = dev_priv->enabled_dbuf_slices_mask; u8 required_slices = state->enabled_dbuf_slices_mask; - /* If 2nd DBuf slice is no more required disable it */ if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices) - icl_dbuf_slices_update(dev_priv, required_slices); + gen9_dbuf_slices_update(dev_priv, required_slices); } static void skl_commit_modeset_enables(struct intel_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index f4734713643d..a3e581947bec 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4491,15 +4491,18 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static void intel_dbuf_slice_set(struct drm_i915_private *dev_priv, - enum dbuf_slice slice, bool enable) +static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, + enum dbuf_slice slice, bool enable) { i915_reg_t reg = DBUF_CTL_S(slice); bool state; u32 val; val = intel_de_read(dev_priv, reg); - val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); + if (enable) + val |= DBUF_POWER_REQUEST; + else + val &= ~DBUF_POWER_REQUEST; intel_de_write(dev_priv, reg, val); intel_de_posting_read(dev_priv, reg); udelay(10); @@ -4510,18 +4513,8 @@ static void intel_dbuf_slice_set(struct drm_i915_private *dev_priv, slice, enable ? "enable" : "disable"); } -static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) -{ - icl_dbuf_slices_update(dev_priv, BIT(DBUF_S1)); -} - -static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) -{ - icl_dbuf_slices_update(dev_priv, 0); -} - -void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, - u8 req_slices) +void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices) { int num_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; struct i915_power_domains *power_domains = &dev_priv->power_domains; @@ -4544,28 +4537,29 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, mutex_lock(&power_domains->lock); for (slice = DBUF_S1; slice < num_slices; slice++) - intel_dbuf_slice_set(dev_priv, slice, - req_slices & BIT(slice)); + gen9_dbuf_slice_set(dev_priv, slice, req_slices & BIT(slice)); dev_priv->enabled_dbuf_slices_mask = req_slices; mutex_unlock(&power_domains->lock); } -static void icl_dbuf_enable(struct drm_i915_private *dev_priv) +static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) { - skl_ddb_get_hw_state(dev_priv); + dev_priv->enabled_dbuf_slices_mask = + intel_enabled_dbuf_slices_mask(dev_priv); + /* * Just power up at least 1 slice, we will * figure out later which slices we have and what we need. */ - icl_dbuf_slices_update(dev_priv, dev_priv->enabled_dbuf_slices_mask | - BIT(DBUF_S1)); + gen9_dbuf_slices_update(dev_priv, BIT(DBUF_S1) | + dev_priv->enabled_dbuf_slices_mask); } -static void icl_dbuf_disable(struct drm_i915_private *dev_priv) +static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) { - icl_dbuf_slices_update(dev_priv, 0); + gen9_dbuf_slices_update(dev_priv, 0); } static void icl_mbus_init(struct drm_i915_private *dev_priv) @@ -5125,7 +5119,7 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, intel_cdclk_init_hw(dev_priv); /* 5. Enable DBUF. */ - icl_dbuf_enable(dev_priv); + gen9_dbuf_enable(dev_priv); /* 6. Setup MBUS. */ icl_mbus_init(dev_priv); @@ -5148,7 +5142,7 @@ static void icl_display_core_uninit(struct drm_i915_private *dev_priv) /* 1. Disable all display engine functions -> aready done */ /* 2. Disable DBUF */ - icl_dbuf_disable(dev_priv); + gen9_dbuf_disable(dev_priv); /* 3. Disable CD clock */ intel_cdclk_uninit_hw(dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 6c917699293b..dc766af41e9b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -316,13 +316,13 @@ enum dbuf_slice { DBUF_S2, }; +void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices); + #define with_intel_display_power(i915, domain, wf) \ for ((wf) = intel_display_power_get((i915), (domain)); (wf); \ intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0) -void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, - u8 req_slices); - void chv_phy_powergate_lanes(struct intel_encoder *encoder, bool override, unsigned int mask); bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, From 3cf43cdc63fbc3df19ea8398e9b8717ab44a6304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:13 +0200 Subject: [PATCH 010/222] drm/i915: Introduce proper dbuf state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a global state to track the dbuf slices. Gets rid of all the nasty coupling between state->modeset and dbuf recomputation. Also we can now totally nuke state->active_pipe_changes. dev_priv->wm.distrust_bios_wm still remains, but that too will get nuked soon. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-9-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 67 ++++-- .../drm/i915/display/intel_display_power.c | 8 +- .../drm/i915/display/intel_display_types.h | 13 -- drivers/gpu/drm/i915/i915_drv.h | 11 +- drivers/gpu/drm/i915/intel_pm.c | 193 ++++++++++++------ drivers/gpu/drm/i915/intel_pm.h | 22 ++ 6 files changed, 211 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5bb666615f75..1ed0cec540db 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7579,6 +7579,8 @@ static void intel_crtc_disable_noatomic(struct intel_crtc *crtc, to_intel_bw_state(dev_priv->bw_obj.state); struct intel_cdclk_state *cdclk_state = to_intel_cdclk_state(dev_priv->cdclk.obj.state); + struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(dev_priv->dbuf.obj.state); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); enum intel_display_power_domain domain; @@ -7652,6 +7654,8 @@ static void intel_crtc_disable_noatomic(struct intel_crtc *crtc, cdclk_state->min_voltage_level[pipe] = 0; cdclk_state->active_pipes &= ~BIT(pipe); + dbuf_state->active_pipes &= ~BIT(pipe); + bw_state->data_rate[pipe] = 0; bw_state->num_active_planes[pipe] = 0; } @@ -14009,10 +14013,10 @@ static void verify_wm_state(struct intel_crtc *crtc, hw_enabled_slices = intel_enabled_dbuf_slices_mask(dev_priv); if (INTEL_GEN(dev_priv) >= 11 && - hw_enabled_slices != dev_priv->enabled_dbuf_slices_mask) + hw_enabled_slices != dev_priv->dbuf.enabled_slices) drm_err(&dev_priv->drm, "mismatch in DBUF Slices (expected 0x%x, got 0x%x)\n", - dev_priv->enabled_dbuf_slices_mask, + dev_priv->dbuf.enabled_slices, hw_enabled_slices); /* planes */ @@ -14553,9 +14557,7 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->modeset = true; state->active_pipes = intel_calc_active_pipes(state, dev_priv->active_pipes); - state->active_pipe_changes = state->active_pipes ^ dev_priv->active_pipes; - - if (state->active_pipe_changes) { + if (state->active_pipes != dev_priv->active_pipes) { ret = _intel_atomic_lock_global_state(state); if (ret) return ret; @@ -15209,22 +15211,38 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state) static void icl_dbuf_slice_pre_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - u8 hw_enabled_slices = dev_priv->enabled_dbuf_slices_mask; - u8 required_slices = state->enabled_dbuf_slices_mask; - u8 slices_union = hw_enabled_slices | required_slices; + const struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(state); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(state); - if (INTEL_GEN(dev_priv) >= 11 && slices_union != hw_enabled_slices) - gen9_dbuf_slices_update(dev_priv, slices_union); + if (!new_dbuf_state || + new_dbuf_state->enabled_slices == old_dbuf_state->enabled_slices) + return; + + WARN_ON(!new_dbuf_state->base.changed); + + gen9_dbuf_slices_update(dev_priv, + old_dbuf_state->enabled_slices | + new_dbuf_state->enabled_slices); } static void icl_dbuf_slice_post_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - u8 hw_enabled_slices = dev_priv->enabled_dbuf_slices_mask; - u8 required_slices = state->enabled_dbuf_slices_mask; + const struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(state); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(state); - if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices) - gen9_dbuf_slices_update(dev_priv, required_slices); + if (!new_dbuf_state || + new_dbuf_state->enabled_slices == old_dbuf_state->enabled_slices) + return; + + WARN_ON(!new_dbuf_state->base.changed); + + gen9_dbuf_slices_update(dev_priv, + new_dbuf_state->enabled_slices); } static void skl_commit_modeset_enables(struct intel_atomic_state *state) @@ -15467,9 +15485,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) if (state->modeset) intel_encoders_update_prepare(state); - /* Enable all new slices, we might need */ - if (state->modeset) - icl_dbuf_slice_pre_update(state); + icl_dbuf_slice_pre_update(state); /* Now enable the clocks, plane, pipe, and connectors that we set up. */ dev_priv->display.commit_modeset_enables(state); @@ -15524,9 +15540,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) dev_priv->display.optimize_watermarks(state, crtc); } - /* Disable all slices, we don't need */ - if (state->modeset) - icl_dbuf_slice_post_update(state); + icl_dbuf_slice_post_update(state); for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { intel_post_plane_update(state, crtc); @@ -17420,10 +17434,14 @@ void intel_modeset_init_hw(struct drm_i915_private *i915) { struct intel_cdclk_state *cdclk_state = to_intel_cdclk_state(i915->cdclk.obj.state); + struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(i915->dbuf.obj.state); intel_update_cdclk(i915); intel_dump_cdclk_config(&i915->cdclk.hw, "Current CDCLK"); cdclk_state->logical = cdclk_state->actual = i915->cdclk.hw; + + dbuf_state->enabled_slices = i915->dbuf.enabled_slices; } static int sanitize_watermarks_add_affected(struct drm_atomic_state *state) @@ -17712,6 +17730,10 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) if (ret) return ret; + ret = intel_dbuf_init(i915); + if (ret) + return ret; + ret = intel_bw_init(i915); if (ret) return ret; @@ -18228,6 +18250,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_cdclk_state *cdclk_state = to_intel_cdclk_state(dev_priv->cdclk.obj.state); + struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(dev_priv->dbuf.obj.state); enum pipe pipe; struct intel_crtc *crtc; struct intel_encoder *encoder; @@ -18258,7 +18282,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) enableddisabled(crtc_state->hw.active)); } - dev_priv->active_pipes = cdclk_state->active_pipes = active_pipes; + dev_priv->active_pipes = cdclk_state->active_pipes = + dbuf_state->active_pipes = active_pipes; readout_plane_state(dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index a3e581947bec..7a8213993110 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1161,7 +1161,7 @@ static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) { u8 hw_enabled_dbuf_slices = intel_enabled_dbuf_slices_mask(dev_priv); - u8 enabled_dbuf_slices = dev_priv->enabled_dbuf_slices_mask; + u8 enabled_dbuf_slices = dev_priv->dbuf.enabled_slices; drm_WARN(&dev_priv->drm, hw_enabled_dbuf_slices != enabled_dbuf_slices, @@ -4539,14 +4539,14 @@ void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, for (slice = DBUF_S1; slice < num_slices; slice++) gen9_dbuf_slice_set(dev_priv, slice, req_slices & BIT(slice)); - dev_priv->enabled_dbuf_slices_mask = req_slices; + dev_priv->dbuf.enabled_slices = req_slices; mutex_unlock(&power_domains->lock); } static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) { - dev_priv->enabled_dbuf_slices_mask = + dev_priv->dbuf.enabled_slices = intel_enabled_dbuf_slices_mask(dev_priv); /* @@ -4554,7 +4554,7 @@ static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) * figure out later which slices we have and what we need. */ gen9_dbuf_slices_update(dev_priv, BIT(DBUF_S1) | - dev_priv->enabled_dbuf_slices_mask); + dev_priv->dbuf.enabled_slices); } static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6a27e72ccf01..323f8da7f698 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -479,16 +479,6 @@ struct intel_atomic_state { bool dpll_set, modeset; - /* - * Does this transaction change the pipes that are active? This mask - * tracks which CRTC's have changed their active state at the end of - * the transaction (not counting the temporary disable during modesets). - * This mask should only be non-zero when intel_state->modeset is true, - * but the converse is not necessarily true; simply changing a mode may - * not flip the final active status of any CRTC's - */ - u8 active_pipe_changes; - u8 active_pipes; struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; @@ -506,9 +496,6 @@ struct intel_atomic_state { */ bool global_state_changed; - /* Number of enabled DBuf slices */ - u8 enabled_dbuf_slices_mask; - struct i915_sw_fence commit_ready; struct llist_node freed; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e993c64a02b0..a28cfe1741f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -947,6 +947,13 @@ struct drm_i915_private { struct intel_global_obj obj; } cdclk; + struct { + /* The current hardware dbuf configuration */ + u8 enabled_slices; + + struct intel_global_obj obj; + } dbuf; + /** * wq - Driver workqueue for GEM. * @@ -1123,12 +1130,12 @@ struct drm_i915_private { * Set during HW readout of watermarks/DDB. Some platforms * need to know when we're still using BIOS-provided values * (which we don't fully trust). + * + * FIXME get rid of this. */ bool distrust_bios_wm; } wm; - u8 enabled_dbuf_slices_mask; /* GEN11 has configurable 2 slices */ - struct dram_info { bool valid; bool is_16gb_dimm; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5c47b893e7b2..cb57786fdc9f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4040,7 +4040,7 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv) static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state, u8 active_pipes); -static void +static int skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state, const u64 total_data_rate, @@ -4053,30 +4053,29 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, const struct intel_crtc *crtc; u32 pipe_width = 0, total_width_in_range = 0, width_before_pipe_in_range = 0; enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe; + struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(intel_state); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(intel_state); + u8 active_pipes = new_dbuf_state->active_pipes; u16 ddb_size; u32 ddb_range_size; u32 i; u32 dbuf_slice_mask; - u32 active_pipes; u32 offset; u32 slice_size; u32 total_slice_mask; u32 start, end; - - if (drm_WARN_ON(&dev_priv->drm, !state) || !crtc_state->hw.active) { - alloc->start = 0; - alloc->end = 0; - *num_active = hweight8(dev_priv->active_pipes); - return; - } - - if (intel_state->active_pipe_changes) - active_pipes = intel_state->active_pipes; - else - active_pipes = dev_priv->active_pipes; + int ret; *num_active = hweight8(active_pipes); + if (!crtc_state->hw.active) { + alloc->start = 0; + alloc->end = 0; + return 0; + } + ddb_size = intel_get_ddb_size(dev_priv); slice_size = ddb_size / INTEL_INFO(dev_priv)->num_supported_dbuf_slices; @@ -4089,13 +4088,16 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, * that changes the active CRTC list or do modeset would need to * grab _all_ crtc locks, including the one we currently hold. */ - if (!intel_state->active_pipe_changes && !intel_state->modeset) { + if (old_dbuf_state->active_pipes == new_dbuf_state->active_pipes && + !dev_priv->wm.distrust_bios_wm) { /* * alloc may be cleared by clear_intel_crtc_state, * copy from old state to be sure + * + * FIXME get rid of this mess */ *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb; - return; + return 0; } /* @@ -4174,7 +4176,13 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, * FIXME: For now we always enable slice S1 as per * the Bspec display initialization sequence. */ - intel_state->enabled_dbuf_slices_mask = total_slice_mask | BIT(DBUF_S1); + new_dbuf_state->enabled_slices = total_slice_mask | BIT(DBUF_S1); + + if (old_dbuf_state->enabled_slices != new_dbuf_state->enabled_slices) { + ret = intel_atomic_serialize_global_state(&new_dbuf_state->base); + if (ret) + return ret; + } start = ddb_range_size * width_before_pipe_in_range / total_width_in_range; end = ddb_range_size * @@ -4185,9 +4193,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe, alloc->start, alloc->end); - DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n", - intel_state->enabled_dbuf_slices_mask, - INTEL_INFO(dev_priv)->num_supported_dbuf_slices); + + return 0; } static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state, @@ -4310,8 +4317,8 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv) { - dev_priv->enabled_dbuf_slices_mask = - intel_enabled_dbuf_slices_mask(dev_priv); + dev_priv->dbuf.enabled_slices = + intel_enabled_dbuf_slices_mask(dev_priv); } /* @@ -4758,6 +4765,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) u64 uv_plane_data_rate[I915_MAX_PLANES] = {}; u32 blocks; int level; + int ret; /* Clear the partitioning for disabled planes. */ memset(crtc_state->wm.skl.plane_ddb_y, 0, sizeof(crtc_state->wm.skl.plane_ddb_y)); @@ -4778,8 +4786,12 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) plane_data_rate, uv_plane_data_rate); - skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate, - alloc, &num_active); + ret = skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, + total_data_rate, + alloc, &num_active); + if (ret) + return ret; + alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) return 0; @@ -5700,14 +5712,11 @@ skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state, static int skl_compute_ddb(struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state; struct intel_crtc_state *new_crtc_state; struct intel_crtc *crtc; int ret, i; - state->enabled_dbuf_slices_mask = dev_priv->enabled_dbuf_slices_mask; - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { ret = skl_allocate_pipe_ddb(new_crtc_state); @@ -5855,7 +5864,8 @@ skl_print_wm_changes(struct intel_atomic_state *state) } } -static int intel_add_all_pipes(struct intel_atomic_state *state) +static int intel_add_affected_pipes(struct intel_atomic_state *state, + u8 pipe_mask) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -5863,6 +5873,9 @@ static int intel_add_all_pipes(struct intel_atomic_state *state) for_each_intel_crtc(&dev_priv->drm, crtc) { struct intel_crtc_state *crtc_state; + if ((pipe_mask & BIT(crtc->pipe)) == 0) + continue; + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); @@ -5875,49 +5888,54 @@ static int skl_ddb_add_affected_pipes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int ret; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i, ret; - /* - * If this is our first atomic update following hardware readout, - * we can't trust the DDB that the BIOS programmed for us. Let's - * pretend that all pipes switched active status so that we'll - * ensure a full DDB recompute. - */ if (dev_priv->wm.distrust_bios_wm) { - ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, - state->base.acquire_ctx); + /* + * skl_ddb_get_pipe_allocation_limits() currently requires + * all active pipes to be included in the state so that + * it can redistribute the dbuf among them, and it really + * wants to recompute things when distrust_bios_wm is set + * so we add all the pipes to the state. + */ + ret = intel_add_affected_pipes(state, ~0); if (ret) return ret; - - state->active_pipe_changes = INTEL_INFO(dev_priv)->pipe_mask; - - /* - * We usually only initialize state->active_pipes if we - * we're doing a modeset; make sure this field is always - * initialized during the sanitization process that happens - * on the first commit too. - */ - if (!state->modeset) - state->active_pipes = dev_priv->active_pipes; } - /* - * If the modeset changes which CRTC's are active, we need to - * recompute the DDB allocation for *all* active pipes, even - * those that weren't otherwise being modified in any way by this - * atomic commit. Due to the shrinking of the per-pipe allocations - * when new active CRTC's are added, it's possible for a pipe that - * we were already using and aren't changing at all here to suddenly - * become invalid if its DDB needs exceeds its new allocation. - * - * Note that if we wind up doing a full DDB recompute, we can't let - * any other display updates race with this transaction, so we need - * to grab the lock on *all* CRTC's. - */ - if (state->active_pipe_changes || state->modeset) { - ret = intel_add_all_pipes(state); + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + struct intel_dbuf_state *new_dbuf_state; + const struct intel_dbuf_state *old_dbuf_state; + + new_dbuf_state = intel_atomic_get_dbuf_state(state); + if (IS_ERR(new_dbuf_state)) + return ret; + + old_dbuf_state = intel_atomic_get_old_dbuf_state(state); + + new_dbuf_state->active_pipes = + intel_calc_active_pipes(state, old_dbuf_state->active_pipes); + + if (old_dbuf_state->active_pipes == new_dbuf_state->active_pipes) + break; + + ret = intel_atomic_lock_global_state(&new_dbuf_state->base); if (ret) return ret; + + /* + * skl_ddb_get_pipe_allocation_limits() currently requires + * all active pipes to be included in the state so that + * it can redistribute the dbuf among them. + */ + ret = intel_add_affected_pipes(state, + new_dbuf_state->active_pipes); + if (ret) + return ret; + + break; } return 0; @@ -7746,3 +7764,52 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) dev_priv->runtime_pm.suspended = false; atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } + +static struct intel_global_state *intel_dbuf_duplicate_state(struct intel_global_obj *obj) +{ + struct intel_dbuf_state *dbuf_state; + + dbuf_state = kmemdup(obj->state, sizeof(*dbuf_state), GFP_KERNEL); + if (!dbuf_state) + return NULL; + + return &dbuf_state->base; +} + +static void intel_dbuf_destroy_state(struct intel_global_obj *obj, + struct intel_global_state *state) +{ + kfree(state); +} + +static const struct intel_global_state_funcs intel_dbuf_funcs = { + .atomic_duplicate_state = intel_dbuf_duplicate_state, + .atomic_destroy_state = intel_dbuf_destroy_state, +}; + +struct intel_dbuf_state * +intel_atomic_get_dbuf_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_global_state *dbuf_state; + + dbuf_state = intel_atomic_get_global_obj_state(state, &dev_priv->dbuf.obj); + if (IS_ERR(dbuf_state)) + return ERR_CAST(dbuf_state); + + return to_intel_dbuf_state(dbuf_state); +} + +int intel_dbuf_init(struct drm_i915_private *dev_priv) +{ + struct intel_dbuf_state *dbuf_state; + + dbuf_state = kzalloc(sizeof(*dbuf_state), GFP_KERNEL); + if (!dbuf_state) + return -ENOMEM; + + intel_atomic_global_obj_init(dev_priv, &dev_priv->dbuf.obj, + &dbuf_state->base, &intel_dbuf_funcs); + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 614ac7f8d4cc..9f75ac4c2bd1 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -8,6 +8,8 @@ #include +#include "display/intel_global_state.h" + #include "i915_reg.h" #include "display/intel_bw.h" @@ -63,4 +65,24 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv); bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); +struct intel_dbuf_state { + struct intel_global_state base; + + u8 enabled_slices; + u8 active_pipes; +}; + +int intel_dbuf_init(struct drm_i915_private *dev_priv); + +struct intel_dbuf_state * +intel_atomic_get_dbuf_state(struct intel_atomic_state *state); + +#define to_intel_dbuf_state(x) container_of((x), struct intel_dbuf_state, base) +#define intel_atomic_get_old_dbuf_state(state) \ + to_intel_dbuf_state(intel_atomic_get_old_global_obj_state(state, &to_i915(state->base.dev)->dbuf.obj)) +#define intel_atomic_get_new_dbuf_state(state) \ + to_intel_dbuf_state(intel_atomic_get_new_global_obj_state(state, &to_i915(state->base.dev)->dbuf.obj)) + +int intel_dbuf_init(struct drm_i915_private *dev_priv); + #endif /* __INTEL_PM_H__ */ From 0cde0e0ff5f5ebd27507069250728c763c14ac81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:14 +0200 Subject: [PATCH 011/222] drm/i915: Nuke skl_ddb_get_hw_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skl_ddb_get_hw_state() is redundant and kinda called in thw wrong spot anyway. Just kill it. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-10-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 7 ------- drivers/gpu/drm/i915/intel_pm.h | 1 - 2 files changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cb57786fdc9f..a92d57d9b759 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4315,12 +4315,6 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, intel_display_power_put(dev_priv, power_domain, wakeref); } -void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv) -{ - dev_priv->dbuf.enabled_slices = - intel_enabled_dbuf_slices_mask(dev_priv); -} - /* * Determines the downscale amount of a plane for the purposes of watermark calculations. * The bspec defines downscale amount as: @@ -6181,7 +6175,6 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; - skl_ddb_get_hw_state(dev_priv); for_each_intel_crtc(&dev_priv->drm, crtc) { crtc_state = to_intel_crtc_state(crtc->base.state); diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 9f75ac4c2bd1..3fcc9b6e2cbf 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -39,7 +39,6 @@ u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv); void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct skl_ddb_entry *ddb_y, struct skl_ddb_entry *ddb_uv); -void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv); void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); From c7c0e7ebe4d9963573f81399374e4e95f37fd8e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:15 +0200 Subject: [PATCH 012/222] drm/i915: Move the dbuf pre/post plane update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encapsulate the dbuf state more by moving the pre/post plane functions out from intel_display.c. We stick them into intel_pm.c since that's where the rest of the code lives for now. Eventually we should add a new file for this stuff at which point we also need to decide if it makes sense to even split the wm code from the ddb code, or to keep them together. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-11-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 41 +------------------- drivers/gpu/drm/i915/intel_pm.c | 37 ++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.h | 2 + 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1ed0cec540db..3da4491bcd2b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15208,43 +15208,6 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state) } } -static void icl_dbuf_slice_pre_update(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - const struct intel_dbuf_state *new_dbuf_state = - intel_atomic_get_new_dbuf_state(state); - const struct intel_dbuf_state *old_dbuf_state = - intel_atomic_get_old_dbuf_state(state); - - if (!new_dbuf_state || - new_dbuf_state->enabled_slices == old_dbuf_state->enabled_slices) - return; - - WARN_ON(!new_dbuf_state->base.changed); - - gen9_dbuf_slices_update(dev_priv, - old_dbuf_state->enabled_slices | - new_dbuf_state->enabled_slices); -} - -static void icl_dbuf_slice_post_update(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - const struct intel_dbuf_state *new_dbuf_state = - intel_atomic_get_new_dbuf_state(state); - const struct intel_dbuf_state *old_dbuf_state = - intel_atomic_get_old_dbuf_state(state); - - if (!new_dbuf_state || - new_dbuf_state->enabled_slices == old_dbuf_state->enabled_slices) - return; - - WARN_ON(!new_dbuf_state->base.changed); - - gen9_dbuf_slices_update(dev_priv, - new_dbuf_state->enabled_slices); -} - static void skl_commit_modeset_enables(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -15485,7 +15448,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) if (state->modeset) intel_encoders_update_prepare(state); - icl_dbuf_slice_pre_update(state); + intel_dbuf_pre_plane_update(state); /* Now enable the clocks, plane, pipe, and connectors that we set up. */ dev_priv->display.commit_modeset_enables(state); @@ -15540,7 +15503,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) dev_priv->display.optimize_watermarks(state, crtc); } - icl_dbuf_slice_post_update(state); + intel_dbuf_post_plane_update(state); for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { intel_post_plane_update(state, crtc); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a92d57d9b759..d40d22eb65da 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7806,3 +7806,40 @@ int intel_dbuf_init(struct drm_i915_private *dev_priv) return 0; } + +void intel_dbuf_pre_plane_update(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(state); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(state); + + if (!new_dbuf_state || + new_dbuf_state->enabled_slices == old_dbuf_state->enabled_slices) + return; + + WARN_ON(!new_dbuf_state->base.changed); + + gen9_dbuf_slices_update(dev_priv, + old_dbuf_state->enabled_slices | + new_dbuf_state->enabled_slices); +} + +void intel_dbuf_post_plane_update(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(state); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(state); + + if (!new_dbuf_state || + new_dbuf_state->enabled_slices == old_dbuf_state->enabled_slices) + return; + + WARN_ON(!new_dbuf_state->base.changed); + + gen9_dbuf_slices_update(dev_priv, + new_dbuf_state->enabled_slices); +} diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 3fcc9b6e2cbf..6636d2a057cd 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -83,5 +83,7 @@ intel_atomic_get_dbuf_state(struct intel_atomic_state *state); to_intel_dbuf_state(intel_atomic_get_new_global_obj_state(state, &to_i915(state->base.dev)->dbuf.obj)) int intel_dbuf_init(struct drm_i915_private *dev_priv); +void intel_dbuf_pre_plane_update(struct intel_atomic_state *state); +void intel_dbuf_post_plane_update(struct intel_atomic_state *state); #endif /* __INTEL_PM_H__ */ From 70b1a26f299c729cc1a5099374cc02568b05ec7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Feb 2020 19:11:16 +0200 Subject: [PATCH 013/222] drm/i915: Clean up dbuf debugs during .atomic_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combine the two per-pipe dbuf debugs into one, and use the canonical [CRTC:%d:%s] style to identify the crtc. Also use the same style as the plane code uses for the ddb start/end, and prefix bitmask properly with 0x to make it clear they are in fact bitmasks. The "how many total slices we are going to use" debug we move to outside the crtc loop so it gets printed only once at the end. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200225171125.28885-12-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d40d22eb65da..a21e36ed1a77 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4105,10 +4105,6 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, */ dbuf_slice_mask = skl_compute_dbuf_slices(crtc_state, active_pipes); - DRM_DEBUG_KMS("DBuf slice mask %x pipe %c active pipes %x\n", - dbuf_slice_mask, - pipe_name(for_pipe), active_pipes); - /* * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2 * and slice size is 1024, the offset would be 1024 @@ -4191,8 +4187,10 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, alloc->start = offset + start; alloc->end = offset + end; - DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe, - alloc->start, alloc->end); + drm_dbg_kms(&dev_priv->drm, + "[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active pipes 0x%x\n", + for_crtc->base.id, for_crtc->name, + dbuf_slice_mask, alloc->start, alloc->end, active_pipes); return 0; } @@ -5706,7 +5704,10 @@ skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state, static int skl_compute_ddb(struct intel_atomic_state *state) { - struct intel_crtc_state *old_crtc_state; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_dbuf_state *old_dbuf_state; + const struct intel_dbuf_state *new_dbuf_state; + const struct intel_crtc_state *old_crtc_state; struct intel_crtc_state *new_crtc_state; struct intel_crtc *crtc; int ret, i; @@ -5723,6 +5724,17 @@ skl_compute_ddb(struct intel_atomic_state *state) return ret; } + old_dbuf_state = intel_atomic_get_old_dbuf_state(state); + new_dbuf_state = intel_atomic_get_new_dbuf_state(state); + + if (new_dbuf_state && + new_dbuf_state->enabled_slices != old_dbuf_state->enabled_slices) + drm_dbg_kms(&dev_priv->drm, + "Enabled dbuf slices 0x%x -> 0x%x (out of %d dbuf slices)\n", + old_dbuf_state->enabled_slices, + new_dbuf_state->enabled_slices, + INTEL_INFO(dev_priv)->num_supported_dbuf_slices); + return 0; } From 552e01f6639a82acc80eacd4493fff93a2a57256 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 15 May 2020 21:00:31 +0100 Subject: [PATCH 014/222] drm/i915/gem: Retry faulthandlers on ENOSPC As we no longer use the shmemfs allocation directly, we do not expect to receive -ENOSPC from a backing store allocation. The potential sources for -ENOSPC are then our own internal eviction code, so the choice is either to kill the potential application with SIGBUS or to retry the faulthandler. In this patch we retry the fault handler, but since this is a should never happen condition, it is arguable that we gather up copious debug and kill the application. At worst, we cause an interruptible busy-wait, stalling the application -- all causes should be transient and the system should eventually recover. A small stall is hopefully a better outcome than random oomkiller. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200515200031.12034-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 70f5f82da288..9d306dc9849d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -216,12 +216,12 @@ static vm_fault_t i915_error_to_vmf_fault(int err) case -ENXIO: /* unable to access backing store (on device) */ return VM_FAULT_SIGBUS; - case -ENOSPC: /* shmemfs allocation failure */ case -ENOMEM: /* our allocation failure */ return VM_FAULT_OOM; case 0: case -EAGAIN: + case -ENOSPC: /* transient failure to evict? */ case -ERESTARTSYS: case -EINTR: case -EBUSY: From 993fa32eb3d5ffb79e86a770ca982eb9c9f54011 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 15 May 2020 19:35:45 -0700 Subject: [PATCH 015/222] drm/i915: Mark check_shadow_context_ppgtt as maybe unused When CONFIG_DRM_I915_DEBUG_GEM is not set, clang warns: drivers/gpu/drm/i915/gvt/scheduler.c:884:1: warning: function 'check_shadow_context_ppgtt' is not needed and will not be emitted [-Wunneeded-internal-declaration] check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) ^ 1 warning generated. This warning is similar to -Wunused-function but rather than warning that the function is completely unused, it warns that it is used in some expression within the file but that expression will be evaluated to a constant or be optimized away in the final assembly, essentially making it appeared used but really isn't. Usually, this happens when a function or variable is only used in sizeof, where it will appear to be used but will be evaluated at compile time and not be required to be emitted. In this case, the function is only used in GEM_BUG_ON, which is defined as BUILD_BUG_ON_INVALID, which intentionally follows this pattern. To fix this warning, add __maybe_unused to make it clear that this is intentional depending on the configuration. Fixes: bec3df930fbd ("drm/i915/gvt: Support PPGTT table load command") Link: https://github.com/ClangBuiltLinux/linux/issues/1027 Signed-off-by: Nathan Chancellor Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200516023545.3332334-1-natechancellor@gmail.com --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c00189432b58..3a9bd8e4d8db 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -876,7 +876,7 @@ static void update_guest_pdps(struct intel_vgpu *vgpu, gpa + i * 8, &pdp[7 - i], 4); } -static bool +static __maybe_unused bool check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) { if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { From 67a64e51ba9266e64ae705f4dc9a244ad6da4376 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 May 2020 11:29:11 +0100 Subject: [PATCH 016/222] drm/i915/selftests: Refactor sibling selection Tvrtko spotted that some selftests were using 'break' not 'continue', which will fail for discontiguous engine layouts such as on Icelake (which may have vcs0 and vcs2). Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200518102911.3463-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 68 ++++++++++---------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 824f99c4cc7c..94854a467e66 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -3600,13 +3600,30 @@ out: return err; } +static unsigned int select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings) +{ + unsigned int n = 0; + unsigned int inst; + + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[n++] = gt->engine_class[class][inst]; + } + + return n; +} + static int live_virtual_engine(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned int class, inst; + unsigned int class; int err; if (intel_uc_uses_guc_submission(>->uc)) @@ -3624,13 +3641,7 @@ static int live_virtual_engine(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { int nsibling, n; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -3739,7 +3750,7 @@ static int live_virtual_mask(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; int err; if (intel_uc_uses_guc_submission(>->uc)) @@ -3748,13 +3759,7 @@ static int live_virtual_mask(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - break; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -3876,7 +3881,7 @@ static int live_virtual_preserved(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; /* * Check that the context image retains non-privileged (user) registers @@ -3894,13 +3899,7 @@ static int live_virtual_preserved(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { int nsibling, err; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -4111,7 +4110,7 @@ static int live_virtual_bond(void *arg) }; struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; int err; if (intel_uc_uses_guc_submission(>->uc)) @@ -4121,14 +4120,7 @@ static int live_virtual_bond(void *arg) const struct phase *p; int nsibling; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - break; - - GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -4266,7 +4258,7 @@ static int live_virtual_reset(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; /* * Check that we handle a reset event within a virtual engine. @@ -4284,13 +4276,7 @@ static int live_virtual_reset(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { int nsibling, err; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; From f5f7e790a53d7982c6224d3dbdcd7f35409a5eae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 May 2020 09:14:35 +0100 Subject: [PATCH 017/222] drm/i915/gt: Reuse the tasklet priority for virtual as their siblings In order to keep all the tasklets in the same execution lists and so fifo ordered, be consistent and use the same priority for all. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200518081440.17948-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 87e6c5bdd2dc..d7ef3f8640d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1403,7 +1403,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) struct i915_request *next = READ_ONCE(ve->request); if (next && next->execution_mask & ~rq->execution_mask) - tasklet_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.execlists.tasklet); } static inline void @@ -5560,7 +5560,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.execlists.tasklet); } spin_unlock_irqrestore(&ve->base.active.lock, flags); From b6a13a386e288495233245b31e0fb362b9331f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 18 May 2020 15:13:54 +0300 Subject: [PATCH 018/222] drm/i915: Fix dbuf slice mask when turning off all the pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current dbuf slice computation only happens when there are active pipes. If we are turning off all the pipes we just leave the dbuf slice mask at it's previous value, which may be something other that BIT(S1). If runtime PM will kick in it will however turn off everything but S1. Then on the next atomic commit (if the new dbuf slice mask matches the stale value we left behind) the code will not turn on the other slices we now need. This will lead to underruns as the planes are trying to use a dbuf slice that's not powered up. To work around let's just just explicitly set the dbuf slice mask to BIT(S1) when we are turning off all the pipes. Really the code should just calculate this stuff the same way regardless whether the pipes are on or off, but we're not quite there yet (need a bit more work on the dbuf state for that). v2: Let's not put the fix into dead code Cc: Stanislav Lisovskiy Acked-by: Chris Wilson Fixes: 3cf43cdc63fb ("drm/i915: Introduce proper dbuf state") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200518121354.20401-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a21e36ed1a77..0082582d8352 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4764,6 +4764,30 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) memset(crtc_state->wm.skl.plane_ddb_uv, 0, sizeof(crtc_state->wm.skl.plane_ddb_uv)); if (!crtc_state->hw.active) { + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->uapi.state); + struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(state); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(state); + + /* + * FIXME hack to make sure we compute this sensibly when + * turning off all the pipes. Otherwise we leave it at + * whatever we had previously, and then runtime PM will + * mess it up by turning off all but S1. Remove this + * once the dbuf state computation flow becomes sane. + */ + if (new_dbuf_state->active_pipes == 0) { + new_dbuf_state->enabled_slices = BIT(DBUF_S1); + + if (old_dbuf_state->enabled_slices != new_dbuf_state->enabled_slices) { + ret = intel_atomic_serialize_global_state(&new_dbuf_state->base); + if (ret) + return ret; + } + } + alloc->start = alloc->end = 0; return 0; } From cba597ac45170469a9a60c06d20d64b5af5dc48c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 16 May 2020 20:09:40 +0100 Subject: [PATCH 019/222] drm/i915/display: Return error from dbuf allocation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/intel_pm.c:5920 skl_ddb_add_affected_pipes() error: uninitialized symbol 'ret'. Fixes: 3cf43cdc63fb ("drm/i915: Introduce proper dbuf state") Signed-off-by: Chris Wilson Cc: Stanislav Lisovskiy Cc: Ville Syrjälä Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200516190940.12675-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0082582d8352..5ebc1585622e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5941,7 +5941,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state) new_dbuf_state = intel_atomic_get_dbuf_state(state); if (IS_ERR(new_dbuf_state)) - return ret; + return PTR_ERR(new_dbuf_state); old_dbuf_state = intel_atomic_get_old_dbuf_state(state); From 6f62bda1eaa51240cfaba326443e91dfb52fbf2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 May 2020 15:38:36 +0300 Subject: [PATCH 020/222] drm/i915: Fix 400 MHz FSB readout on elk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like elk redefines some of the CLKCFG FSB values to make room for 400 MHz FSB. The setting overlaps with one of the 266MHz settings (which is even documented in the ctg docs, and cofirmed to be correct on my ctg). So we limit the special case to elk only. Though it might also be that we have some kind of desktop vs. mobile difference going on here as eg. both g35 and elk use 0x0 for the 266 MHz setting, vs. 0x6 used by ctg). The g35 doesn't let me select 400MHz for the FSB strap so can't confirm which way it would go here. But anyways as it seems only elk has the 400MHz option we shouldn't lose anything by limiting the special case to it alone. My earlier experiments on this appear to have been nonsense as the comment I added claims that FSB strap of 400MHz results in a value of 0x4, but I've now retested it and I definitely get a value of 0x6 instead. So let's remove that bogus comment. v2: s/_ELK/_ALT/ in the define in anticipation of a full mobile vs. desktop CLKCFG split Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200514123838.3017-1-ville.syrjala@linux.intel.com Acked-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_cdclk.c | 9 +++++++-- drivers/gpu/drm/i915/i915_reg.h | 6 +----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 979a0241fdcb..c17cf611625c 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2705,8 +2705,13 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) u32 clkcfg; /* hrawclock is 1/4 the FSB frequency */ - clkcfg = intel_de_read(dev_priv, CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { + clkcfg = intel_de_read(dev_priv, CLKCFG) & CLKCFG_FSB_MASK; + + /* ELK seems to redefine some of the values */ + if (IS_G45(dev_priv) && clkcfg == CLKCFG_FSB_1600_ALT) + return 400000; + + switch (clkcfg) { case CLKCFG_FSB_400: return 100000; case CLKCFG_FSB_533: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6c076a24eb82..10187780e06c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3775,12 +3775,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define CLKCFG_FSB_1067 (6 << 0) /* hrawclk 266 */ #define CLKCFG_FSB_1067_ALT (0 << 0) /* hrawclk 266 */ #define CLKCFG_FSB_1333 (7 << 0) /* hrawclk 333 */ -/* - * Note that on at least on ELK the below value is reported for both - * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet - * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz. - */ #define CLKCFG_FSB_1333_ALT (4 << 0) /* hrawclk 333 */ +#define CLKCFG_FSB_1600_ALT (6 << 0) /* hrawclk 400 */ #define CLKCFG_FSB_MASK (7 << 0) #define CLKCFG_MEM_533 (1 << 4) #define CLKCFG_MEM_667 (2 << 4) From 42ab330530b47b44070af592e2f84a2a0b61532e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 May 2020 15:38:37 +0300 Subject: [PATCH 021/222] drm/i915: Document our lackluster FSB frequency readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the fact that we aren't reading out the actual FSB frequency but rather just the state of the FSB straps. Some BIOSen allow you to configure the two independently. So if someone sets the two up in an inconsistent manner we'll get the wrong answer here and thus will end up with incorrect aux/pps clock dividers. Alas, proper docs are no longer around so we can't do any better. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200514123838.3017-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_cdclk.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index c17cf611625c..d57dfec7e9a5 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2704,7 +2704,16 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) { u32 clkcfg; - /* hrawclock is 1/4 the FSB frequency */ + /* + * hrawclock is 1/4 the FSB frequency + * + * Note that this only reads the state of the FSB + * straps, not the actual FSB frequency. Some BIOSen + * let you configure each independently. Ideally we'd + * read out the actual FSB frequency but sadly we + * don't know which registers have that information, + * and all the relevant docs have gone to bit heaven :( + */ clkcfg = intel_de_read(dev_priv, CLKCFG) & CLKCFG_FSB_MASK; /* ELK seems to redefine some of the values */ From 488e017904f9a04d3b771a0bd70be97da57a834d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 May 2020 15:38:38 +0300 Subject: [PATCH 022/222] drm/i915: Read out hrawclk on all gen3+ platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've checked a bunch of gen3/4 machines and all seem to have consistent FSB frequency information in the CLKCFG register. So let's read out hrawclk on all gen3+ machines. Although apart from g4x/pnv aux/pps dividers we only really need this for for i965g/gm cs timestamp increment. The CLKCFG memory clock values seem less consistent but we don't care about those here. For posterity here's a list of CLKCFG vs. FSB dumps from a bunch of machines (only missing lpt for a full set): machine CLKCFG FSB alv1 0x00001411 533 alv2 0x00000420 400 (Chris) gdg1 0x20000022 800 gdg2 0x20000022 800 cst 0x00010043 666 blb 0x00002034 1333 pnv1 0x00000423 666 pnv2 0x00000433 666 965gm 0x00004342 800 946gz 0x00000022 800 965g 0x00000422 800 g35 0x00000430 1066 0x00000434 1333 ctg1 0x00644056 1066 ctg2 0x00644066 1066 elk1 0x00012420 1066 0x00012424 1333 0x00012436 1600 0x00012422 800 elk2 0x00012040 1066 For the mobile parts the chipset docs generally have these documented to some degree (alv being the exception). The two settings w/o any evidence are 0x5=400MHz on desktop and 0x7=1333MHz on mobile. Though the mobile 1333MHz case probably doesn't even exist since ctg is only documented to go up to 1066MHz. v2: Fix 400mhz readout for Chris's alv/celeron machine Do a clean mobile vs. dekstop split since that's really what seems to be going on Cc: Chris Wilson Cc: Lionel Landwerlin Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200514123838.3017-3-ville.syrjala@linux.intel.com Acked-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_cdclk.c | 64 ++++++++++++++-------- drivers/gpu/drm/i915/i915_reg.h | 3 +- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index d57dfec7e9a5..9419a4724357 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2700,7 +2700,7 @@ static int vlv_hrawclk(struct drm_i915_private *dev_priv) CCK_DISPLAY_REF_CLOCK_CONTROL); } -static int g4x_hrawclk(struct drm_i915_private *dev_priv) +static int i9xx_hrawclk(struct drm_i915_private *dev_priv) { u32 clkcfg; @@ -2716,27 +2716,43 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) */ clkcfg = intel_de_read(dev_priv, CLKCFG) & CLKCFG_FSB_MASK; - /* ELK seems to redefine some of the values */ - if (IS_G45(dev_priv) && clkcfg == CLKCFG_FSB_1600_ALT) - return 400000; - - switch (clkcfg) { - case CLKCFG_FSB_400: - return 100000; - case CLKCFG_FSB_533: - return 133333; - case CLKCFG_FSB_667: - return 166667; - case CLKCFG_FSB_800: - return 200000; - case CLKCFG_FSB_1067: - case CLKCFG_FSB_1067_ALT: - return 266667; - case CLKCFG_FSB_1333: - case CLKCFG_FSB_1333_ALT: - return 333333; - default: - return 133333; + if (IS_MOBILE(dev_priv)) { + switch (clkcfg) { + case CLKCFG_FSB_400: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067: + return 266667; + case CLKCFG_FSB_1333: + return 333333; + default: + MISSING_CASE(clkcfg); + return 133333; + } + } else { + switch (clkcfg) { + case CLKCFG_FSB_400_ALT: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067_ALT: + return 266667; + case CLKCFG_FSB_1333_ALT: + return 333333; + case CLKCFG_FSB_1600_ALT: + return 400000; + default: + return 133333; + } } } @@ -2757,8 +2773,8 @@ u32 intel_read_rawclk(struct drm_i915_private *dev_priv) freq = pch_rawclk(dev_priv); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) freq = vlv_hrawclk(dev_priv); - else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) - freq = g4x_hrawclk(dev_priv); + else if (INTEL_GEN(dev_priv) >= 3) + freq = i9xx_hrawclk(dev_priv); else /* no rawclk on other platforms, or no need to know it */ return 0; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 10187780e06c..f774ec2bcc99 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3768,7 +3768,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* Clocking configuration register */ #define CLKCFG _MMIO(MCHBAR_MIRROR_BASE + 0xc00) -#define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */ +#define CLKCFG_FSB_400 (0 << 0) /* hrawclk 100 */ +#define CLKCFG_FSB_400_ALT (5 << 0) /* hrawclk 100 */ #define CLKCFG_FSB_533 (1 << 0) /* hrawclk 133 */ #define CLKCFG_FSB_667 (3 << 0) /* hrawclk 166 */ #define CLKCFG_FSB_800 (2 << 0) /* hrawclk 200 */ From ae9b6cfe1352da25931bce3ea4acfd4dc1ac8a85 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 14 May 2020 23:45:53 +0300 Subject: [PATCH 023/222] drm/i915: Fix AUX power domain toggling across TypeC mode resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to select the port's AUX power domain while holding the TC port lock. The domain depends on the port's current TC mode, which may get changed under us if we're not holding the lock. This was left out from commit 8c10e2262663 ("drm/i915: Keep the TypeC port mode fixed for detect/AUX transfers") Cc: # v5.4+ Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200514204553.27193-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 408c3c1c5e81..40d42dcff0b7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1359,8 +1359,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, bool is_tc_port = intel_phy_is_tc(i915, phy); i915_reg_t ch_ctl, ch_data[5]; u32 aux_clock_divider; - enum intel_display_power_domain aux_domain = - intel_aux_power_domain(intel_dig_port); + enum intel_display_power_domain aux_domain; intel_wakeref_t aux_wakeref; intel_wakeref_t pps_wakeref; int i, ret, recv_bytes; @@ -1375,6 +1374,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, if (is_tc_port) intel_tc_port_lock(intel_dig_port); + aux_domain = intel_aux_power_domain(intel_dig_port); + aux_wakeref = intel_display_power_get(i915, aux_domain); pps_wakeref = pps_lock(intel_dp); From 4c1ccdf721128a4e8299786d586d145164d3cfcd Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:52 +0530 Subject: [PATCH 024/222] drm/i915/display/display_power: Prefer drm_WARN_ON over WARN_ON struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN_ON over WARN_ON at places where struct i915_power_domains struct is available. Conversion is done with below sementic patch: @@ identifier func, T; @@ func(struct i915_power_domains *T,...) { + struct drm_i915_private *i915 = container_of(T, struct drm_i915_private, power_domains); <+... -WARN_ON( +drm_WARN_ON(&i915->drm, ...) ...+> } changes since v1: - Fix commit subject (Jani) Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-2-pankaj.laxminarayan.bharadiya@intel.com --- .../drm/i915/display/intel_display_power.c | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 7a8213993110..7762e5d0e1b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1943,22 +1943,29 @@ static u64 __async_put_domains_mask(struct i915_power_domains *power_domains) static bool assert_async_put_domain_masks_disjoint(struct i915_power_domains *power_domains) { - return !WARN_ON(power_domains->async_put_domains[0] & - power_domains->async_put_domains[1]); + struct drm_i915_private *i915 = container_of(power_domains, + struct drm_i915_private, + power_domains); + return !drm_WARN_ON(&i915->drm, power_domains->async_put_domains[0] & + power_domains->async_put_domains[1]); } static bool __async_put_domains_state_ok(struct i915_power_domains *power_domains) { + struct drm_i915_private *i915 = container_of(power_domains, + struct drm_i915_private, + power_domains); enum intel_display_power_domain domain; bool err = false; err |= !assert_async_put_domain_masks_disjoint(power_domains); - err |= WARN_ON(!!power_domains->async_put_wakeref != - !!__async_put_domains_mask(power_domains)); + err |= drm_WARN_ON(&i915->drm, !!power_domains->async_put_wakeref != + !!__async_put_domains_mask(power_domains)); for_each_power_domain(domain, __async_put_domains_mask(power_domains)) - err |= WARN_ON(power_domains->domain_use_count[domain] != 1); + err |= drm_WARN_ON(&i915->drm, + power_domains->domain_use_count[domain] != 1); return !err; } @@ -2200,11 +2207,14 @@ static void queue_async_put_domains_work(struct i915_power_domains *power_domains, intel_wakeref_t wakeref) { - WARN_ON(power_domains->async_put_wakeref); + struct drm_i915_private *i915 = container_of(power_domains, + struct drm_i915_private, + power_domains); + drm_WARN_ON(&i915->drm, power_domains->async_put_wakeref); power_domains->async_put_wakeref = wakeref; - WARN_ON(!queue_delayed_work(system_unbound_wq, - &power_domains->async_put_work, - msecs_to_jiffies(100))); + drm_WARN_ON(&i915->drm, !queue_delayed_work(system_unbound_wq, + &power_domains->async_put_work, + msecs_to_jiffies(100))); } static void @@ -4365,6 +4375,9 @@ __set_power_wells(struct i915_power_domains *power_domains, const struct i915_power_well_desc *power_well_descs, int power_well_count) { + struct drm_i915_private *i915 = container_of(power_domains, + struct drm_i915_private, + power_domains); u64 power_well_ids = 0; int i; @@ -4384,8 +4397,8 @@ __set_power_wells(struct i915_power_domains *power_domains, if (id == DISP_PW_ID_NONE) continue; - WARN_ON(id >= sizeof(power_well_ids) * 8); - WARN_ON(power_well_ids & BIT_ULL(id)); + drm_WARN_ON(&i915->drm, id >= sizeof(power_well_ids) * 8); + drm_WARN_ON(&i915->drm, power_well_ids & BIT_ULL(id)); power_well_ids |= BIT_ULL(id); } From 4f3604821974a600d493c88ccdd5db0bcc89041d Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:53 +0530 Subject: [PATCH 025/222] drm/i915/display/dp: Prefer drm_WARN* over WARN* struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN* over WARN* at places where struct intel_dp or struct drm_i915_private pointer is available. Conversion is done with below sementic patch: @rule1@ identifier func, T; @@ func(...) { ... struct drm_i915_private *T = ...; <+... ( -WARN_ON( +drm_WARN_ON(&T->drm, ...) | -WARN_ON_ONCE( +drm_WARN_ON_ONCE(&T->drm, ...) ) ...+> } @rule2@ identifier func, T; @@ func(struct drm_i915_private *T,...) { <+... ( -WARN_ON( +drm_WARN_ON(&T->drm, ...) | -WARN_ON_ONCE( +drm_WARN_ON_ONCE(&T->drm, ...) ) ...+> } @rule3@ identifier func, T; @@ func(struct intel_dp *T,...) { + struct drm_i915_private *i915 = dp_to_i915(T); <+... ( -WARN_ON( +drm_WARN_ON(&i915->drm, ...) | -WARN_ON_ONCE( +drm_WARN_ON_ONCE(&i915->drm, ...) ) ...+> } @rule4@ identifier func, T; @@ func(...) { ... struct intel_dp *T = ...; + struct drm_i915_private *i915 = dp_to_i915(T); <+... ( -WARN_ON( +drm_WARN_ON(&i915->drm, ...) | -WARN_ON_ONCE( +drm_WARN_ON_ONCE(&i915->drm, ...) ) ...+> } Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-3-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 40d42dcff0b7..1768731678a1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -409,7 +409,10 @@ static int intel_dp_rate_index(const int *rates, int len, int rate) static void intel_dp_set_common_rates(struct intel_dp *intel_dp) { - WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + drm_WARN_ON(&i915->drm, + !intel_dp->num_source_rates || !intel_dp->num_sink_rates); intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates, intel_dp->num_source_rates, @@ -418,7 +421,7 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) intel_dp->common_rates); /* Paranoia, there should always be something in common. */ - if (WARN_ON(intel_dp->num_common_rates == 0)) { + if (drm_WARN_ON(&i915->drm, intel_dp->num_common_rates == 0)) { intel_dp->common_rates[0] = 162000; intel_dp->num_common_rates = 1; } @@ -1555,6 +1558,7 @@ static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct intel_dp *intel_dp = container_of(aux, struct intel_dp, aux); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); u8 txbuf[20], rxbuf[20]; size_t txsize, rxsize; int ret; @@ -1568,10 +1572,10 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) txsize = msg->size ? HEADER_SIZE + msg->size : BARE_ADDRESS_SIZE; rxsize = 2; /* 0 or 1 data bytes */ - if (WARN_ON(txsize > 20)) + if (drm_WARN_ON(&i915->drm, txsize > 20)) return -E2BIG; - WARN_ON(!msg->buffer != !msg->size); + drm_WARN_ON(&i915->drm, !msg->buffer != !msg->size); if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); @@ -1596,7 +1600,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) txsize = msg->size ? HEADER_SIZE : BARE_ADDRESS_SIZE; rxsize = msg->size + 1; - if (WARN_ON(rxsize > 20)) + if (drm_WARN_ON(&i915->drm, rxsize > 20)) return -E2BIG; ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, @@ -1871,10 +1875,11 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) int intel_dp_max_link_rate(struct intel_dp *intel_dp) { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); int len; len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate); - if (WARN_ON(len <= 0)) + if (drm_WARN_ON(&i915->drm, len <= 0)) return 162000; return intel_dp->common_rates[len - 1]; @@ -1882,10 +1887,11 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); int i = intel_dp_rate_index(intel_dp->sink_rates, intel_dp->num_sink_rates, rate); - if (WARN_ON(i < 0)) + if (drm_WARN_ON(&i915->drm, i < 0)) i = 0; return i; @@ -5601,7 +5607,7 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp) if (!intel_dp->is_mst) return -EINVAL; - WARN_ON_ONCE(intel_dp->active_mst_links < 0); + drm_WARN_ON_ONCE(&i915->drm, intel_dp->active_mst_links < 0); for (;;) { u8 esi[DP_DPRX_ESI_LEN] = {}; @@ -5963,7 +5969,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) u8 *dpcd = intel_dp->dpcd; u8 type; - if (WARN_ON(intel_dp_is_edp(intel_dp))) + if (drm_WARN_ON(&i915->drm, intel_dp_is_edp(intel_dp))) return connector_status_connected; if (lspcon->active) From cb7cbb4b2aeca663f7dfca95649d90f292866dc6 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:54 +0530 Subject: [PATCH 026/222] drm/i915/display/sdvo: Prefer drm_WARN* over WARN* struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN* over WARN* calls. changes since v1: - Added dev_priv local variable and used it in drm_WARN* calls (Jani) Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-4-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/display/intel_sdvo.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index bc6c26818e15..773523dcd107 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -411,6 +411,7 @@ static const char *sdvo_cmd_name(u8 cmd) static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd, const void *args, int args_len) { + struct drm_i915_private *dev_priv = to_i915(intel_sdvo->base.base.dev); const char *cmd_name; int i, pos = 0; char buffer[64]; @@ -431,7 +432,7 @@ static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd, else BUF_PRINT("(%02X)", cmd); - WARN_ON(pos >= sizeof(buffer) - 1); + drm_WARN_ON(&dev_priv->drm, pos >= sizeof(buffer) - 1); #undef BUF_PRINT DRM_DEBUG_KMS("%s: W: %02X %s\n", SDVO_NAME(intel_sdvo), cmd, buffer); @@ -533,6 +534,7 @@ static bool intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo, void *response, int response_len) { + struct drm_i915_private *dev_priv = to_i915(intel_sdvo->base.base.dev); const char *cmd_status; u8 retry = 15; /* 5 quick checks, followed by 10 long checks */ u8 status; @@ -597,7 +599,7 @@ static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo, BUF_PRINT(" %02X", ((u8 *)response)[i]); } - WARN_ON(pos >= sizeof(buffer) - 1); + drm_WARN_ON(&dev_priv->drm, pos >= sizeof(buffer) - 1); #undef BUF_PRINT DRM_DEBUG_KMS("%s: R: %s\n", SDVO_NAME(intel_sdvo), buffer); @@ -1081,6 +1083,7 @@ static bool intel_sdvo_compute_avi_infoframe(struct intel_sdvo *intel_sdvo, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(intel_sdvo->base.base.dev); struct hdmi_avi_infoframe *frame = &crtc_state->infoframes.avi.avi; const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -1106,7 +1109,7 @@ static bool intel_sdvo_compute_avi_infoframe(struct intel_sdvo *intel_sdvo, HDMI_QUANTIZATION_RANGE_FULL); ret = hdmi_avi_infoframe_check(frame); - if (WARN_ON(ret)) + if (drm_WARN_ON(&dev_priv->drm, ret)) return false; return true; @@ -1115,6 +1118,7 @@ static bool intel_sdvo_compute_avi_infoframe(struct intel_sdvo *intel_sdvo, static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(intel_sdvo->base.base.dev); u8 sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; const union hdmi_infoframe *frame = &crtc_state->infoframes.avi; ssize_t len; @@ -1123,11 +1127,12 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_AVI)) == 0) return true; - if (WARN_ON(frame->any.type != HDMI_INFOFRAME_TYPE_AVI)) + if (drm_WARN_ON(&dev_priv->drm, + frame->any.type != HDMI_INFOFRAME_TYPE_AVI)) return false; len = hdmi_infoframe_pack_only(frame, sdvo_data, sizeof(sdvo_data)); - if (WARN_ON(len < 0)) + if (drm_WARN_ON(&dev_priv->drm, len < 0)) return false; return intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_AVI_IF, @@ -1237,6 +1242,7 @@ intel_sdvo_get_preferred_input_mode(struct intel_sdvo *intel_sdvo, static void i9xx_adjust_sdvo_tv_clock(struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(pipe_config->uapi.crtc->dev); unsigned dotclock = pipe_config->port_clock; struct dpll *clock = &pipe_config->dpll; @@ -1257,7 +1263,8 @@ static void i9xx_adjust_sdvo_tv_clock(struct intel_crtc_state *pipe_config) clock->m1 = 12; clock->m2 = 8; } else { - WARN(1, "SDVO TV clock out of range: %i\n", dotclock); + drm_WARN(&dev_priv->drm, 1, + "SDVO TV clock out of range: %i\n", dotclock); } pipe_config->clock_set = true; @@ -2293,7 +2300,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, return 0; } - WARN_ON(1); + drm_WARN_ON(connector->dev, 1); *val = 0; } else if (property == intel_sdvo_connector->top || property == intel_sdvo_connector->bottom) From ef3929b64b9a1100a839ef06a68bb931d7303bfb Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:55 +0530 Subject: [PATCH 027/222] drm/i915/display/tc: Prefer drm_WARN_ON over WARN_ON struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN_ON over WARN_ON. Conversion is done with below sementic patch: @@ identifier func, T; @@ func(...) { ... struct drm_i915_private *T = ...; <+... -WARN_ON( +drm_WARN_ON(&T->drm, ...) ...+> } @@ identifier func, T; @@ func(struct intel_digital_port *T,...) { +struct drm_i915_private *i915 = to_i915(T->base.base.dev); <+... -WARN_ON( +drm_WARN_ON(&i915->drm, ...) ...+> } changes since v1: - Add i915 local variable and use it in drm_WARN_ON (Jani) Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-5-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/display/intel_tc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index b161c15baf86..5b5dc86a5737 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -360,12 +360,12 @@ static void icl_tc_phy_connect(struct intel_digital_port *dig_port, } if (!icl_tc_phy_set_safe_mode(dig_port, false) && - !WARN_ON(dig_port->tc_legacy_port)) + !drm_WARN_ON(&i915->drm, dig_port->tc_legacy_port)) goto out_set_tbt_alt_mode; max_lanes = intel_tc_port_fia_max_lane_count(dig_port); if (dig_port->tc_legacy_port) { - WARN_ON(max_lanes != 4); + drm_WARN_ON(&i915->drm, max_lanes != 4); dig_port->tc_mode = TC_PORT_LEGACY; return; @@ -445,18 +445,20 @@ static bool icl_tc_phy_is_connected(struct intel_digital_port *dig_port) static enum tc_port_mode intel_tc_port_get_current_mode(struct intel_digital_port *dig_port) { + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); u32 live_status_mask = tc_port_live_status_mask(dig_port); bool in_safe_mode = icl_tc_phy_is_in_safe_mode(dig_port); enum tc_port_mode mode; - if (in_safe_mode || WARN_ON(!icl_tc_phy_status_complete(dig_port))) + if (in_safe_mode || + drm_WARN_ON(&i915->drm, !icl_tc_phy_status_complete(dig_port))) return TC_PORT_TBT_ALT; mode = dig_port->tc_legacy_port ? TC_PORT_LEGACY : TC_PORT_DP_ALT; if (live_status_mask) { enum tc_port_mode live_mode = fls(live_status_mask) - 1; - if (!WARN_ON(live_mode == TC_PORT_TBT_ALT)) + if (!drm_WARN_ON(&i915->drm, live_mode == TC_PORT_TBT_ALT)) mode = live_mode; } @@ -505,7 +507,9 @@ static void intel_tc_port_link_init_refcount(struct intel_digital_port *dig_port, int refcount) { - WARN_ON(dig_port->tc_link_refcount); + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + + drm_WARN_ON(&i915->drm, dig_port->tc_link_refcount); dig_port->tc_link_refcount = refcount; } From 6db20e27f6885aa71ed454f0e67cdfbf8637452f Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:56 +0530 Subject: [PATCH 028/222] drm/i915/gem: Prefer drm_WARN* over WARN* struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN* over WARN* at places where struct drm_device pointer can be extracted. Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-6-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c0d59d48e198..e4fb6c372537 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1626,7 +1626,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, err = i915_vma_bind(target->vma, target->vma->obj->cache_level, PIN_GLOBAL, NULL); - if (WARN_ONCE(err, + if (drm_WARN_ONCE(&i915->drm, err, "Unexpected failure to bind target VMA!")) return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 698e22420dc5..da845ff0cacc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -29,7 +29,8 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) void *dst; int i; - if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) + if (drm_WARN_ON(obj->base.dev, + i915_gem_object_needs_bit17_swizzle(obj))) return -EINVAL; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 7ffd7afeb7a5..8b0708708671 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -235,7 +235,7 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, if (flags & I915_USERPTR_UNSYNCHRONIZED) return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - if (WARN_ON(obj->userptr.mm == NULL)) + if (drm_WARN_ON(obj->base.dev, obj->userptr.mm == NULL)) return -EINVAL; mn = i915_mmu_notifier_find(obj->userptr.mm); From 7bda10095aaa7ab45249812c4f6b8ea55c7a45a2 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:57 +0530 Subject: [PATCH 029/222] drm/i915/i915_drv: Prefer drm_WARN_ON over WARN_ON struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN_ON over WARN_ON. changes since v1: - Add parentheses around the dev_priv macro argument (Jani) Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-7-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a28cfe1741f8..0abbefa457f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1662,7 +1662,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0) /* Only valid when HAS_DISPLAY() is true */ -#define INTEL_DISPLAY_ENABLED(dev_priv) (WARN_ON(!HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display) +#define INTEL_DISPLAY_ENABLED(dev_priv) \ + (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display) static inline bool intel_vtd_active(void) { From bf07f6ebffefce2bbf3c318f9ce2f987774ea983 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:58 +0530 Subject: [PATCH 030/222] drm/i915/pmu: Prefer drm_WARN_ON over WARN_ON struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN_ON over WARN_ON. Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-8-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e991a707bdb7..f6f44ad5e335 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -441,7 +441,11 @@ static u64 count_interrupts(struct drm_i915_private *i915) static void i915_pmu_event_destroy(struct perf_event *event) { - WARN_ON(event->parent); + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + + drm_WARN_ON(&i915->drm, event->parent); + module_put(THIS_MODULE); } @@ -1058,8 +1062,10 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - WARN_ON(pmu->cpuhp.slot == CPUHP_INVALID); - WARN_ON(cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node)); + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + + drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID); + drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node)); cpuhp_remove_multi_state(pmu->cpuhp.slot); pmu->cpuhp.slot = CPUHP_INVALID; } From 19edeb388e9f5a0ba505bbf53059dc5a856a0c77 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:45:59 +0530 Subject: [PATCH 031/222] drm/i915/pm: Prefer drm_WARN_ON over WARN_ON struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN_ON over WARN_ON. Conversion is done with below sementic patch: @@ identifier func, T; @@ func(...) { ... struct intel_crtc *T = ...; +struct drm_i915_private *dev_priv = to_i915(T->base.dev); <+... -WARN_ON( +drm_WARN_ON(&dev_priv->drm, ...) ...+> } @@ identifier func, T; @@ func(struct intel_crtc_state *T,...) { +struct drm_i915_private *dev_priv = to_i915(T->uapi.crtc->dev); <+... -WARN_ON( +drm_WARN_ON(&dev_priv->drm, ...) ...+> } changes since v1: - Added dev_priv local variable and used it in drm_WARN_ON calls (Jani) Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-9-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 61 ++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5ebc1585622e..4d885ef0bac5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1437,6 +1437,7 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate; const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal; struct intel_atomic_state *intel_state = @@ -1465,8 +1466,8 @@ static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) max(optimal->wm.plane[plane_id], active->wm.plane[plane_id]); - WARN_ON(intermediate->wm.plane[plane_id] > - g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL)); + drm_WARN_ON(&dev_priv->drm, intermediate->wm.plane[plane_id] > + g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL)); } intermediate->sr.plane = max(optimal->sr.plane, @@ -1483,21 +1484,25 @@ static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) intermediate->hpll.fbc = max(optimal->hpll.fbc, active->hpll.fbc); - WARN_ON((intermediate->sr.plane > - g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) || - intermediate->sr.cursor > - g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) && - intermediate->cxsr); - WARN_ON((intermediate->sr.plane > - g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) || - intermediate->sr.cursor > - g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) && - intermediate->hpll_en); + drm_WARN_ON(&dev_priv->drm, + (intermediate->sr.plane > + g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) || + intermediate->sr.cursor > + g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) && + intermediate->cxsr); + drm_WARN_ON(&dev_priv->drm, + (intermediate->sr.plane > + g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) || + intermediate->sr.cursor > + g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) && + intermediate->hpll_en); - WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) && - intermediate->fbc_en && intermediate->cxsr); - WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) && - intermediate->fbc_en && intermediate->hpll_en); + drm_WARN_ON(&dev_priv->drm, + intermediate->sr.fbc > g4x_fbc_fifo_size(1) && + intermediate->fbc_en && intermediate->cxsr); + drm_WARN_ON(&dev_priv->drm, + intermediate->hpll.fbc > g4x_fbc_fifo_size(2) && + intermediate->fbc_en && intermediate->hpll_en); out: /* @@ -1681,6 +1686,7 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; @@ -1749,11 +1755,11 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) fifo_left -= plane_extra; } - WARN_ON(active_planes != 0 && fifo_left != 0); + drm_WARN_ON(&dev_priv->drm, active_planes != 0 && fifo_left != 0); /* give it all to the first plane if none are active */ if (active_planes == 0) { - WARN_ON(fifo_left != fifo_size); + drm_WARN_ON(&dev_priv->drm, fifo_left != fifo_size); fifo_state->plane[PLANE_PRIMARY] = fifo_left; } @@ -4333,11 +4339,13 @@ static uint_fixed_16_16_t skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; - if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) + if (drm_WARN_ON(&dev_priv->drm, + !intel_wm_plane_visible(crtc_state, plane_state))) return u32_to_fixed16(0); /* @@ -5031,6 +5039,7 @@ skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, static uint_fixed_16_16_t intel_get_linetime_us(const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 pixel_rate; u32 crtc_htotal; uint_fixed_16_16_t linetime_us; @@ -5040,7 +5049,7 @@ intel_get_linetime_us(const struct intel_crtc_state *crtc_state) pixel_rate = crtc_state->pixel_rate; - if (WARN_ON(pixel_rate == 0)) + if (drm_WARN_ON(&dev_priv->drm, pixel_rate == 0)) return u32_to_fixed16(0); crtc_htotal = crtc_state->hw.adjusted_mode.crtc_htotal; @@ -5053,11 +5062,13 @@ static u32 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u64 adjusted_pixel_rate; uint_fixed_16_16_t downscale_amount; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) + if (drm_WARN_ON(&dev_priv->drm, + !intel_wm_plane_visible(crtc_state, plane_state))) return 0; /* @@ -5493,6 +5504,7 @@ static int skl_build_plane_wm(struct intel_crtc_state *crtc_state, static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum plane_id plane_id = to_intel_plane(plane_state->uapi.plane)->id; int ret; @@ -5504,9 +5516,10 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id y_plane_id = plane_state->planar_linked_plane->id; - WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)); - WARN_ON(!fb->format->is_yuv || - fb->format->num_planes == 1); + drm_WARN_ON(&dev_priv->drm, + !intel_wm_plane_visible(crtc_state, plane_state)); + drm_WARN_ON(&dev_priv->drm, !fb->format->is_yuv || + fb->format->num_planes == 1); ret = skl_build_plane_wm_single(crtc_state, plane_state, y_plane_id, 0); From 649c10ff85b94f7685c3a478e7788018a360f6e6 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 4 May 2020 23:46:00 +0530 Subject: [PATCH 032/222] drm/i915/runtime_pm: Prefer drm_WARN* over WARN* struct drm_device specific drm_WARN* macros include device information in the backtrace, so we know what device the warnings originate from. Prefer drm_WARN* over WARN*. Conversion is done with below semantic patch: @@ identifier func, T; @@ func(struct intel_runtime_pm *T,...) { + struct drm_i915_private *i915 = container_of(T, struct drm_i915_private, runtime_pm); <+... ( -WARN( +drm_WARN(&i915->drm, ...) | -WARN_ON( +drm_WARN_ON(&i915->drm, ...) | -WARN_ONCE( +drm_WARN_ONCE(&i915->drm, ...) | -WARN_ON_ONCE( +drm_WARN_ON_ONCE(&i915->drm, ...) ) ...+> } Signed-off-by: Pankaj Bharadiya Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504181600.18503-10-pankaj.laxminarayan.bharadiya@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 39 ++++++++++++++++++------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index ad719c9602af..31ccd0559c55 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -116,6 +116,9 @@ track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, depot_stack_handle_t stack) { + struct drm_i915_private *i915 = container_of(rpm, + struct drm_i915_private, + runtime_pm); unsigned long flags, n; bool found = false; @@ -134,9 +137,9 @@ static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, } spin_unlock_irqrestore(&rpm->debug.lock, flags); - if (WARN(!found, - "Unmatched wakeref (tracking %lu), count %u\n", - rpm->debug.count, atomic_read(&rpm->wakeref_count))) { + if (drm_WARN(&i915->drm, !found, + "Unmatched wakeref (tracking %lu), count %u\n", + rpm->debug.count, atomic_read(&rpm->wakeref_count))) { char *buf; buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); @@ -355,10 +358,14 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock) static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm, bool wakelock) { + struct drm_i915_private *i915 = container_of(rpm, + struct drm_i915_private, + runtime_pm); int ret; ret = pm_runtime_get_sync(rpm->kdev); - WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); + drm_WARN_ONCE(&i915->drm, ret < 0, + "pm_runtime_get_sync() failed: %d\n", ret); intel_runtime_pm_acquire(rpm, wakelock); @@ -539,6 +546,9 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref) */ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) { + struct drm_i915_private *i915 = container_of(rpm, + struct drm_i915_private, + runtime_pm); struct device *kdev = rpm->kdev; /* @@ -565,7 +575,8 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) pm_runtime_dont_use_autosuspend(kdev); ret = pm_runtime_get_sync(kdev); - WARN(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); + drm_WARN(&i915->drm, ret < 0, + "pm_runtime_get_sync() failed: %d\n", ret); } else { pm_runtime_use_autosuspend(kdev); } @@ -580,11 +591,14 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) { + struct drm_i915_private *i915 = container_of(rpm, + struct drm_i915_private, + runtime_pm); struct device *kdev = rpm->kdev; /* Transfer rpm ownership back to core */ - WARN(pm_runtime_get_sync(kdev) < 0, - "Failed to pass rpm ownership back to core\n"); + drm_WARN(&i915->drm, pm_runtime_get_sync(kdev) < 0, + "Failed to pass rpm ownership back to core\n"); pm_runtime_dont_use_autosuspend(kdev); @@ -594,12 +608,15 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) { + struct drm_i915_private *i915 = container_of(rpm, + struct drm_i915_private, + runtime_pm); int count = atomic_read(&rpm->wakeref_count); - WARN(count, - "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n", - intel_rpm_raw_wakeref_count(count), - intel_rpm_wakelock_count(count)); + drm_WARN(&i915->drm, count, + "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n", + intel_rpm_raw_wakeref_count(count), + intel_rpm_wakelock_count(count)); untrack_all_intel_runtime_pm_wakerefs(rpm); } From 25c26f18ea796a56830c2e356f2b3e0c929b0a6f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:08:02 +0100 Subject: [PATCH 033/222] drm/i915/selftests: Measure dispatch latency A useful metric of the system's health is how fast we can tell the GPU to do various actions, so measure our latency. v2: Refactor all the instruction building into emitters. v3: Mark the error handling if not perfect, at least consistent. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519130802.4067-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_request.c | 823 ++++++++++++++++++ 1 file changed, 823 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 6014e8dfcbb1..92c628f18c60 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -24,16 +24,20 @@ #include #include +#include #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_random.h" #include "i915_selftest.h" +#include "igt_flush_test.h" #include "igt_live_test.h" #include "igt_spinner.h" #include "lib_sw_fence.h" @@ -1524,6 +1528,824 @@ struct perf_series { struct intel_context *ce[]; }; +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + return *a - *b; +} + +static u32 trifilter(u32 *a) +{ + u64 sum; + +#define TF_COUNT 5 + sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL); + + sum = mul_u32_u32(a[2], 2); + sum += a[1]; + sum += a[3]; + + GEM_BUG_ON(sum > U32_MAX); + return sum; +#define TF_BIAS 2 +} + +static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles) +{ + u64 ns = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); + + return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS); +} + +static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset) +{ + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base))); + *cs++ = offset; + *cs++ = 0; + + return cs; +} + +static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value) +{ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = value; + + return cs; +} + +static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset) +{ + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + mode; + *cs++ = value; + *cs++ = offset; + *cs++ = 0; + + return cs; +} + +static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value) +{ + return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset); +} + +static void semaphore_set(u32 *sema, u32 value) +{ + WRITE_ONCE(*sema, value); + wmb(); /* flush the update to the cache, and beyond */ +} + +static u32 *hwsp_scratch(const struct intel_context *ce) +{ + return memset32(ce->engine->status_page.addr + 1000, 0, 21); +} + +static u32 hwsp_offset(const struct intel_context *ce, u32 *dw) +{ + return (i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(dw)); +} + +static int measure_semaphore_response(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + u32 elapsed[TF_COUNT], cycles; + struct i915_request *rq; + u32 *cs; + int err; + int i; + + /* + * Measure how many cycles it takes for the HW to detect the change + * in a semaphore value. + * + * A: read CS_TIMESTAMP from CPU + * poke semaphore + * B: read CS_TIMESTAMP on GPU + * + * Semaphore latency: B - A + */ + + semaphore_set(sema, -1); + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed)); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err; + } + + cs = emit_store_dw(cs, offset, 0); + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + cs = emit_semaphore_poll_until(cs, offset, i); + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); + cs = emit_store_dw(cs, offset, 0); + } + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + if (wait_for(READ_ONCE(*sema) == 0, 50)) { + err = -EIO; + goto err; + } + + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + preempt_disable(); + cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); + semaphore_set(sema, i); + preempt_enable(); + + if (wait_for(READ_ONCE(*sema) == 0, 50)) { + err = -EIO; + goto err; + } + + elapsed[i - 1] = sema[i] - cycles; + } + + cycles = trifilter(elapsed); + pr_info("%s: semaphore response %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +static int measure_idle_dispatch(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + u32 elapsed[TF_COUNT], cycles; + u32 *cs; + int err; + int i; + + /* + * Measure how long it takes for us to submit a request while the + * engine is idle, but is resting in our context. + * + * A: read CS_TIMESTAMP from CPU + * submit request + * B: read CS_TIMESTAMP on GPU + * + * Submission latency: B - A + */ + + for (i = 0; i < ARRAY_SIZE(elapsed); i++) { + struct i915_request *rq; + + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); + if (err) + return err; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err; + } + + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); + + intel_ring_advance(rq, cs); + + preempt_disable(); + local_bh_disable(); + elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); + i915_request_add(rq); + local_bh_enable(); + preempt_enable(); + } + + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); + if (err) + goto err; + + for (i = 0; i < ARRAY_SIZE(elapsed); i++) + elapsed[i] = sema[i] - elapsed[i]; + + cycles = trifilter(elapsed); + pr_info("%s: idle dispatch latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +static int measure_busy_dispatch(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + u32 elapsed[TF_COUNT + 1], cycles; + u32 *cs; + int err; + int i; + + /* + * Measure how long it takes for us to submit a request while the + * engine is busy, polling on a semaphore in our context. With + * direct submission, this will include the cost of a lite restore. + * + * A: read CS_TIMESTAMP from CPU + * submit request + * B: read CS_TIMESTAMP on GPU + * + * Submission latency: B - A + */ + + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err; + } + + cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); + cs = emit_semaphore_poll_until(cs, offset, i); + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); + + intel_ring_advance(rq, cs); + + if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) { + err = -EIO; + goto err; + } + + preempt_disable(); + local_bh_disable(); + elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); + i915_request_add(rq); + local_bh_enable(); + semaphore_set(sema, i - 1); + preempt_enable(); + } + + wait_for(READ_ONCE(sema[i - 1]), 500); + semaphore_set(sema, i - 1); + + for (i = 1; i <= TF_COUNT; i++) { + GEM_BUG_ON(sema[i] == -1); + elapsed[i - 1] = sema[i] - elapsed[i]; + } + + cycles = trifilter(elapsed); + pr_info("%s: busy dispatch latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value) +{ + const u32 offset = + i915_ggtt_offset(engine->status_page.vma) + + offset_in_page(sema); + struct i915_request *rq; + u32 *cs; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + cs = emit_semaphore_poll(cs, mode, value, offset); + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + return 0; +} + +static int measure_inter_request(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + u32 elapsed[TF_COUNT + 1], cycles; + struct i915_sw_fence *submit; + int i, err; + + /* + * Measure how long it takes to advance from one request into the + * next. Between each request we flush the GPU caches to memory, + * update the breadcrumbs, and then invalidate those caches. + * We queue up all the requests to be submitted in one batch so + * it should be one set of contiguous measurements. + * + * A: read CS_TIMESTAMP on GPU + * advance request + * B: read CS_TIMESTAMP on GPU + * + * Request latency: B - A + */ + + err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); + if (err) + return err; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + semaphore_set(sema, 1); + return -ENOMEM; + } + + intel_engine_flush_submission(ce->engine); + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + struct i915_request *rq; + u32 *cs; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_submit; + } + + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + submit, + GFP_KERNEL); + if (err < 0) { + i915_request_add(rq); + goto err_submit; + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err_submit; + } + + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); + + intel_ring_advance(rq, cs); + i915_request_add(rq); + } + local_bh_disable(); + i915_sw_fence_commit(submit); + local_bh_enable(); + intel_engine_flush_submission(ce->engine); + heap_fence_put(submit); + + semaphore_set(sema, 1); + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); + if (err) + goto err; + + for (i = 1; i <= TF_COUNT; i++) + elapsed[i - 1] = sema[i + 1] - sema[i]; + + cycles = trifilter(elapsed); + pr_info("%s: inter-request latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err_submit: + i915_sw_fence_commit(submit); + heap_fence_put(submit); + semaphore_set(sema, 1); +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +static int measure_context_switch(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + struct i915_request *fence = NULL; + u32 elapsed[TF_COUNT + 1], cycles; + int i, j, err; + u32 *cs; + + /* + * Measure how long it takes to advance from one request in one + * context to a request in another context. This allows us to + * measure how long the context save/restore take, along with all + * the inter-context setup we require. + * + * A: read CS_TIMESTAMP on GPU + * switch context + * B: read CS_TIMESTAMP on GPU + * + * Context switch latency: B - A + */ + + err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); + if (err) + return err; + + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + struct intel_context *arr[] = { + ce, ce->engine->kernel_context + }; + u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32); + + for (j = 0; j < ARRAY_SIZE(arr); j++) { + struct i915_request *rq; + + rq = i915_request_create(arr[j]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_fence; + } + + if (fence) { + err = i915_request_await_dma_fence(rq, + &fence->fence); + if (err) { + i915_request_add(rq); + goto err_fence; + } + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err_fence; + } + + cs = emit_timestamp_store(cs, ce, addr); + addr += sizeof(u32); + + intel_ring_advance(rq, cs); + + i915_request_put(fence); + fence = i915_request_get(rq); + + i915_request_add(rq); + } + } + i915_request_put(fence); + intel_engine_flush_submission(ce->engine); + + semaphore_set(sema, 1); + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); + if (err) + goto err; + + for (i = 1; i <= TF_COUNT; i++) + elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1]; + + cycles = trifilter(elapsed); + pr_info("%s: context switch latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err_fence: + i915_request_put(fence); + semaphore_set(sema, 1); +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +static int measure_preemption(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + u32 elapsed[TF_COUNT], cycles; + u32 *cs; + int err; + int i; + + /* + * We measure two latencies while triggering preemption. The first + * latency is how long it takes for us to submit a preempting request. + * The second latency is how it takes for us to return from the + * preemption back to the original context. + * + * A: read CS_TIMESTAMP from CPU + * submit preemption + * B: read CS_TIMESTAMP on GPU (in preempting context) + * context switch + * C: read CS_TIMESTAMP on GPU (in original context) + * + * Preemption dispatch latency: B - A + * Preemption switch latency: C - B + */ + + if (!intel_engine_has_preemption(ce->engine)) + return 0; + + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + u32 addr = offset + 2 * i * sizeof(u32); + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err; + } + + cs = emit_store_dw(cs, addr, -1); + cs = emit_semaphore_poll_until(cs, offset, i); + cs = emit_timestamp_store(cs, ce, addr + sizeof(u32)); + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) { + err = -EIO; + goto err; + } + + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err; + } + + cs = emit_timestamp_store(cs, ce, addr); + cs = emit_store_dw(cs, offset, i); + + intel_ring_advance(rq, cs); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + + elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); + i915_request_add(rq); + } + + if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) { + err = -EIO; + goto err; + } + + for (i = 1; i <= TF_COUNT; i++) + elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1]; + + cycles = trifilter(elapsed); + pr_info("%s: preemption dispatch latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + for (i = 1; i <= TF_COUNT; i++) + elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0]; + + cycles = trifilter(elapsed); + pr_info("%s: preemption switch latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +struct signal_cb { + struct dma_fence_cb base; + bool seen; +}; + +static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct signal_cb *s = container_of(cb, typeof(*s), base); + + smp_store_mb(s->seen, true); /* be safe, be strong */ +} + +static int measure_completion(struct intel_context *ce) +{ + u32 *sema = hwsp_scratch(ce); + const u32 offset = hwsp_offset(ce, sema); + u32 elapsed[TF_COUNT], cycles; + u32 *cs; + int err; + int i; + + /* + * Measure how long it takes for the signal (interrupt) to be + * sent from the GPU to be processed by the CPU. + * + * A: read CS_TIMESTAMP on GPU + * signal + * B: read CS_TIMESTAMP from CPU + * + * Completion latency: B - A + */ + + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { + struct signal_cb cb = { .seen = false }; + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto err; + } + + cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); + cs = emit_semaphore_poll_until(cs, offset, i); + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); + + intel_ring_advance(rq, cs); + + dma_fence_add_callback(&rq->fence, &cb.base, signal_cb); + + local_bh_disable(); + i915_request_add(rq); + local_bh_enable(); + + if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { + err = -EIO; + goto err; + } + + preempt_disable(); + semaphore_set(sema, i); + while (!READ_ONCE(cb.seen)) + cpu_relax(); + + elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); + preempt_enable(); + } + + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); + if (err) + goto err; + + for (i = 0; i < ARRAY_SIZE(elapsed); i++) { + GEM_BUG_ON(sema[i + 1] == -1); + elapsed[i] = elapsed[i] - sema[i + 1]; + } + + cycles = trifilter(elapsed); + pr_info("%s: completion latency %d cycles, %lluns\n", + ce->engine->name, cycles >> TF_BIAS, + cycles_to_ns(ce->engine, cycles)); + + return intel_gt_wait_for_idle(ce->engine->gt, HZ); + +err: + intel_gt_set_wedged(ce->engine->gt); + return err; +} + +static void rps_pin(struct intel_gt *gt) +{ + /* Pin the frequency to max */ + atomic_inc(>->rps.num_waiters); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + mutex_lock(>->rps.lock); + intel_rps_set(>->rps, gt->rps.max_freq); + mutex_unlock(>->rps.lock); +} + +static void rps_unpin(struct intel_gt *gt) +{ + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + atomic_dec(>->rps.num_waiters); +} + +static void engine_heartbeat_disable(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; +} + +static int perf_request_latency(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct pm_qos_request qos; + int err = 0; + + if (INTEL_GEN(i915) < 8) /* per-engine CS timestamp, semaphores */ + return 0; + + cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ + + for_each_uabi_engine(engine, i915) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + engine_heartbeat_disable(engine); + rps_pin(engine->gt); + + if (err == 0) + err = measure_semaphore_response(ce); + if (err == 0) + err = measure_idle_dispatch(ce); + if (err == 0) + err = measure_busy_dispatch(ce); + if (err == 0) + err = measure_inter_request(ce); + if (err == 0) + err = measure_context_switch(ce); + if (err == 0) + err = measure_preemption(ce); + if (err == 0) + err = measure_completion(ce); + + rps_unpin(engine->gt); + engine_heartbeat_enable(engine); + + intel_context_unpin(ce); + intel_context_put(ce); + if (err) + goto out; + } + +out: + if (igt_flush_test(i915)) + err = -EIO; + + cpu_latency_qos_remove_request(&qos); + return err; +} + static int s_sync0(void *arg) { struct perf_series *ps = arg; @@ -2042,6 +2864,7 @@ static int perf_parallel_engines(void *arg) int i915_request_perf_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(perf_request_latency), SUBTEST(perf_series_engines), SUBTEST(perf_parallel_engines), }; From b86fc6e5e89e5645b43f57171c26740ef38f9f4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:12 +0100 Subject: [PATCH 034/222] drm/i915: Don't set queue-priority hint when supressing the reschedule We recorded the execlists->queue_priority_hint update for the inflight request without kicking the tasklet. The next submitted request then failed to be scheduled as it had a lower priority than the hint, leaving the HW running with only the inflight request. Fixes: 6cebcf746f3f ("drm/i915: Tweak scheduler's kick_submission()") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_scheduler.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f4ea318781f0..cbb880b10c65 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -209,14 +209,6 @@ static void kick_submission(struct intel_engine_cs *engine, if (!inflight) goto unlock; - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); - engine->execlists.queue_priority_hint = prio; - /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. @@ -224,6 +216,14 @@ static void kick_submission(struct intel_engine_cs *engine, if (inflight->context == rq->context) goto unlock; + ENGINE_TRACE(engine, + "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", + prio, + rq->fence.context, rq->fence.seqno, + inflight->fence.context, inflight->fence.seqno, + inflight->sched.attr.priority); + + engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); From ad6586850b6d718b1245bad6702028c1c773cfb9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:13 +0100 Subject: [PATCH 035/222] drm/i915/selftests: Change priority overflow detection Check for integer overflow in the priority chain, rather than against a type-constricted max-priority check. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 94854a467e66..3e042fa4b94b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2735,12 +2735,12 @@ static int live_preempt_gang(void *arg) /* Submit each spinner at increasing priority */ engine->schedule(rq, &attr); + if (prio < attr.priority) + break; + if (prio <= I915_PRIORITY_MAX) continue; - if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) - break; - if (__igt_timeout(end_time, NULL)) break; } while (1); From 3a230a554dbbc6cd5016cf1b56ee77cfcd48c7d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:14 +0100 Subject: [PATCH 036/222] drm/i915/selftests: Restore to default heartbeat Since we temporarily disable the heartbeat and restore back to the default value, we can use the stored defaults on the engine and avoid using a local. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 25 +++---- drivers/gpu/drm/i915/gt/selftest_lrc.c | 67 +++++++------------ drivers/gpu/drm/i915/gt/selftest_rps.c | 69 ++++++++------------ drivers/gpu/drm/i915/gt/selftest_timeline.c | 15 ++--- 4 files changed, 67 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2b2efff6e19d..4aa4cc917d8b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -310,22 +310,20 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static int igt_hang_sanitycheck(void *arg) @@ -473,7 +471,6 @@ static int igt_reset_nop_engine(void *arg) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count, count; struct intel_context *ce; - unsigned long heartbeat; IGT_TIMEOUT(end_time); int err; @@ -485,7 +482,7 @@ static int igt_reset_nop_engine(void *arg) reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -529,7 +526,7 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -564,7 +561,6 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; - unsigned long heartbeat; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -580,7 +576,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -632,7 +628,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) break; @@ -789,7 +785,6 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; - unsigned long heartbeat; IGT_TIMEOUT(end_time); if (flags & TEST_ACTIVE && @@ -832,7 +827,7 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -906,7 +901,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3e042fa4b94b..b71f04db9c6e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -51,22 +51,20 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static bool is_active(struct i915_request *rq) @@ -224,7 +222,6 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; - unsigned long saved; int n; if (prio && !intel_engine_has_preemption(engine)) @@ -237,7 +234,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; @@ -345,7 +342,7 @@ err_ce: intel_context_put(ce[n]); } - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (igt_live_test_end(&t)) err = -EIO; if (err) @@ -466,7 +463,6 @@ static int live_hold_reset(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - unsigned long heartbeat; struct i915_request *rq; ce = intel_context_create(engine); @@ -475,7 +471,7 @@ static int live_hold_reset(void *arg) break; } - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -535,7 +531,7 @@ static int live_hold_reset(void *arg) i915_request_put(rq); out: - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); intel_context_put(ce); if (err) break; @@ -580,10 +576,9 @@ static int live_error_interrupt(void *arg) for_each_engine(engine, gt, id) { const struct error_phase *p; - unsigned long heartbeat; int err = 0; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); for (p = phases; p->error[0] != GOOD; p++) { struct i915_request *client[ARRAY_SIZE(phases->error)]; @@ -682,7 +677,7 @@ out: } } - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) { intel_gt_set_wedged(gt); return err; @@ -895,16 +890,14 @@ static int live_timeslice_preempt(void *arg) enum intel_engine_id id; for_each_engine(engine, gt, id) { - unsigned long saved; - if (!intel_engine_has_preemption(engine)) continue; memset(vaddr, 0, PAGE_SIZE); - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); err = slice_semaphore_queue(engine, vma, count); - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (err) goto err_pin; @@ -1009,7 +1002,6 @@ static int live_timeslice_rewind(void *arg) enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; - unsigned long heartbeat; unsigned long timeslice; int i, err = 0; u32 *slot; @@ -1028,7 +1020,7 @@ static int live_timeslice_rewind(void *arg) * Expect execution/evaluation order XZY */ - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); slot = memset32(engine->status_page.addr + 1000, 0, 4); @@ -1122,7 +1114,7 @@ err: wmb(); engine->props.timeslice_duration_ms = timeslice; - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); for (i = 0; i < 3; i++) i915_request_put(rq[i]); if (igt_flush_test(gt->i915)) @@ -1202,12 +1194,11 @@ static int live_timeslice_queue(void *arg) .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; struct i915_request *rq, *nop; - unsigned long saved; if (!intel_engine_has_preemption(engine)) continue; - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ @@ -1284,7 +1275,7 @@ static int live_timeslice_queue(void *arg) err_rq: i915_request_put(rq); err_heartbeat: - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (err) break; } @@ -4145,7 +4136,6 @@ static int reset_virtual_engine(struct intel_gt *gt, { struct intel_engine_cs *engine; struct intel_context *ve; - unsigned long *heartbeat; struct igt_spinner spin; struct i915_request *rq; unsigned int n; @@ -4157,15 +4147,9 @@ static int reset_virtual_engine(struct intel_gt *gt, * descendents are not executed while the capture is in progress. */ - heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); - if (!heartbeat) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; - if (igt_spinner_init(&spin, gt)) { - err = -ENOMEM; - goto out_free; - } - ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); @@ -4173,7 +4157,7 @@ static int reset_virtual_engine(struct intel_gt *gt, } for (n = 0; n < nsibling; n++) - engine_heartbeat_disable(siblings[n], &heartbeat[n]); + engine_heartbeat_disable(siblings[n]); rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -4244,13 +4228,11 @@ out_rq: i915_request_put(rq); out_heartbeat: for (n = 0; n < nsibling; n++) - engine_heartbeat_enable(siblings[n], heartbeat[n]); + engine_heartbeat_enable(siblings[n]); intel_context_put(ve); out_spin: igt_spinner_fini(&spin); -out_free: - kfree(heartbeat); return err; } @@ -4918,9 +4900,7 @@ static int live_lrc_gpr(void *arg) return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); err = __live_lrc_gpr(engine, scratch, false); if (err) @@ -4931,7 +4911,7 @@ static int live_lrc_gpr(void *arg) goto err; err: - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -5078,10 +5058,9 @@ static int live_lrc_timestamp(void *arg) */ for_each_engine(data.engine, gt, id) { - unsigned long heartbeat; int i, err = 0; - engine_heartbeat_disable(data.engine, &heartbeat); + engine_heartbeat_disable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { struct intel_context *tmp; @@ -5114,7 +5093,7 @@ static int live_lrc_timestamp(void *arg) } err: - engine_heartbeat_enable(data.engine, heartbeat); + engine_heartbeat_enable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { if (!data.ce[i]) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 6275d69aa9cc..5049c3dd08a6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -20,24 +20,20 @@ /* Try to isolate the impact of cstates from determing frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ -static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - unsigned long old; - - old = fetch_and_zero(&engine->props.heartbeat_interval_ms); + engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); - - return old; } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static void dummy_rps_work(struct work_struct *wrk) @@ -246,7 +242,6 @@ int live_rps_clock_interval(void *arg) intel_gt_check_clock_frequency(gt); for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; u32 cycles; u64 dt; @@ -254,13 +249,13 @@ int live_rps_clock_interval(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -271,7 +266,7 @@ int live_rps_clock_interval(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -327,7 +322,7 @@ int live_rps_clock_interval(void *arg) intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); @@ -405,7 +400,6 @@ int live_rps_control(void *arg) intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; ktime_t min_dt, max_dt; int f, limit; @@ -414,7 +408,7 @@ int live_rps_control(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, @@ -430,7 +424,7 @@ int live_rps_control(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -440,7 +434,7 @@ int live_rps_control(void *arg) pr_err("%s: could not set minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -457,7 +451,7 @@ int live_rps_control(void *arg) pr_err("%s: could not restore minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -472,7 +466,7 @@ int live_rps_control(void *arg) min_dt = ktime_sub(ktime_get(), min_dt); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", engine->name, @@ -635,7 +629,6 @@ int live_rps_frequency_cs(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct i915_vma *vma; u32 *cancel, *cntr; @@ -644,14 +637,14 @@ int live_rps_frequency_cs(void *arg) int freq; } min, max; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, false, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); break; } @@ -732,7 +725,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -778,7 +771,6 @@ int live_rps_frequency_srm(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct i915_vma *vma; u32 *cancel, *cntr; @@ -787,14 +779,14 @@ int live_rps_frequency_srm(void *arg) int freq; } min, max; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, true, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); break; } @@ -874,7 +866,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -1066,16 +1058,14 @@ int live_rps_interrupt(void *arg) for_each_engine(engine, gt, id) { /* Keep the engine busy with a spinner; expect an UP! */ if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { - unsigned long saved_heartbeat; - intel_gt_pm_wait_for_idle(engine->gt); GEM_BUG_ON(intel_rps_is_active(rps)); - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); err = __rps_up_interrupt(rps, engine, &spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err) goto out; @@ -1084,15 +1074,13 @@ int live_rps_interrupt(void *arg) /* Keep the engine awake but idle and check for DOWN */ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { - unsigned long saved_heartbeat; - - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); intel_rc6_disable(>->rc6); err = __rps_down_interrupt(rps, engine); intel_rc6_enable(>->rc6); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err) goto out; } @@ -1168,7 +1156,6 @@ int live_rps_power(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct { u64 power; @@ -1178,13 +1165,13 @@ int live_rps_power(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -1195,7 +1182,7 @@ int live_rps_power(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -1208,7 +1195,7 @@ int live_rps_power(void *arg) min.power = measure_power_at(rps, &min.freq); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", engine->name, diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index c2578a0f2f14..ef1c35073dc0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -751,22 +751,20 @@ out_free: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static int live_hwsp_rollover_kernel(void *arg) @@ -785,10 +783,9 @@ static int live_hwsp_rollover_kernel(void *arg) struct intel_context *ce = engine->kernel_context; struct intel_timeline *tl = ce->timeline; struct i915_request *rq[3] = {}; - unsigned long heartbeat; int i; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); if (intel_gt_wait_for_idle(gt, HZ / 2)) { err = -EIO; goto out; @@ -839,7 +836,7 @@ static int live_hwsp_rollover_kernel(void *arg) out: for (i = 0; i < ARRAY_SIZE(rq); i++) i915_request_put(rq[i]); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) break; } From f73fbb5a59a7060f81afe4f924a69ab6bff0c46d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:15 +0100 Subject: [PATCH 037/222] drm/i915/selftests: Check for an initial-breadcrumb in wait_for_submit() When we look at i915_request_is_started() we must be careful in case we are using a request that does not have the initial-breadcrumb and instead the is-started is being compared against the end of the previous request. This will make wait_for_submit() declare that a request has been already submitted too early. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b71f04db9c6e..f6949cd55e92 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -75,7 +75,7 @@ static bool is_active(struct i915_request *rq) if (i915_request_on_hold(rq)) return true; - if (i915_request_started(rq)) + if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) return true; return false; From 4fe13f28d66ab568bb7303130b79347aa47f7ceb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:20:44 +0100 Subject: [PATCH 038/222] drm/i915/selftests: Add tests for timeslicing virtual engines Make sure that we can execute a virtual request on an already busy engine, and conversely that we can execute a normal request if the engines are already fully occupied by virtual requests. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200519132046.22443-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 200 ++++++++++++++++++++++++- 1 file changed, 197 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index f6949cd55e92..ef38dd52945c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -3591,9 +3591,11 @@ out: return err; } -static unsigned int select_siblings(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings) +static unsigned int +__select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings, + bool (*filter)(const struct intel_engine_cs *)) { unsigned int n = 0; unsigned int inst; @@ -3602,12 +3604,23 @@ static unsigned int select_siblings(struct intel_gt *gt, if (!gt->engine_class[class][inst]) continue; + if (filter && !filter(gt->engine_class[class][inst])) + continue; + siblings[n++] = gt->engine_class[class][inst]; } return n; } +static unsigned int +select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings) +{ + return __select_siblings(gt, class, siblings, NULL); +} + static int live_virtual_engine(void *arg) { struct intel_gt *gt = arg; @@ -3762,6 +3775,186 @@ static int live_virtual_mask(void *arg) return 0; } +static long slice_timeout(struct intel_engine_cs *engine) +{ + long timeout; + + /* Enough time for a timeslice to kick in, and kick out */ + timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); + + /* Enough time for the nop request to complete */ + timeout += HZ / 5; + + return timeout; +} + +static int slicein_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + const long timeout = slice_timeout(siblings[0]); + struct intel_context *ce; + struct i915_request *rq; + struct igt_spinner spin; + unsigned int n; + int err = 0; + + /* + * Virtual requests must take part in timeslicing on the target engines. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for (n = 0; n < nsibling; n++) { + ce = intel_context_create(siblings[n]); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_add(rq); + } + + ce = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) { + GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", + __func__, rq->engine->name); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + } + i915_request_put(rq); + +out: + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + igt_spinner_fini(&spin); + return err; +} + +static int sliceout_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + const long timeout = slice_timeout(siblings[0]); + struct intel_context *ce; + struct i915_request *rq; + struct igt_spinner spin; + unsigned int n; + int err = 0; + + /* + * Virtual requests must allow others a fair timeslice. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + /* XXX We do not handle oversubscription and fairness with normal rq */ + for (n = 0; n < nsibling; n++) { + ce = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_add(rq); + } + + for (n = 0; !err && n < nsibling; n++) { + ce = intel_context_create(siblings[n]); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) { + GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", + __func__, siblings[n]->name); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + } + i915_request_put(rq); + } + +out: + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + igt_spinner_fini(&spin); + return err; +} + +static int live_virtual_slice(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + int err; + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + unsigned int nsibling; + + nsibling = __select_siblings(gt, class, siblings, + intel_engine_has_timeslices); + if (nsibling < 2) + continue; + + err = slicein_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + + err = sliceout_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + static int preserved_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) @@ -4297,6 +4490,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), SUBTEST(live_virtual_preserved), + SUBTEST(live_virtual_slice), SUBTEST(live_virtual_bond), SUBTEST(live_virtual_reset), }; From 1ee05f9e6d32a43c029d31958ff91e5ca186ad4d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:20:45 +0100 Subject: [PATCH 039/222] drm/i915/gt: Kick virtual siblings on timeslice out If we decide to timeslice out the current virtual request, we will unsubmit it while it is still busy (ve->context.inflight == sibling[0]). If the virtual tasklet and then the other sibling tasklets run before we completely schedule out the active virtual request for the preemption, those other tasklets will see that the virtul request is still inflight on sibling[0] and leave it be. Therefore when we finally schedule-out the virtual request and if we see that we have passed it back to the virtual engine, reschedule the virtual tasklet so that it may be resubmitted on any of the siblings. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200519132046.22443-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d7ef3f8640d2..7ee89d58258a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1402,7 +1402,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); - if (next && next->execution_mask & ~rq->execution_mask) + if (next == rq || (next && next->execution_mask & ~rq->execution_mask)) tasklet_hi_schedule(&ve->base.execlists.tasklet); } From 6ad249ba59badc7ff157d4db1f835748f0e2c9b6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:20:46 +0100 Subject: [PATCH 040/222] drm/i915/gt: Incorporate the virtual engine into timeslicing It was quite the oversight to only factor in the normal queue to decide the timeslicing switch priority. By leaving out the next virtual request from the priority decision, we would not timeslice the current engine if there was an available virtual request. Testcase: igt/gem_exec_balancer/sliced Fixes: 3df2deed411e ("drm/i915/execlists: Enable timeslice on partial virtual engine dequeue") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200519132046.22443-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7ee89d58258a..de5be57ed6d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1895,7 +1895,8 @@ static void defer_active(struct intel_engine_cs *engine) static bool need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + const struct rb_node *rb) { int hint; @@ -1903,6 +1904,24 @@ need_timeslice(const struct intel_engine_cs *engine, return false; hint = engine->execlists.queue_priority_hint; + + if (rb) { + const struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + const struct intel_engine_cs *inflight = + intel_context_inflight(&ve->context); + + if (!inflight || inflight == engine) { + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + hint = max(hint, rq_prio(next)); + rcu_read_unlock(); + } + } + if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); @@ -1977,10 +1996,9 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, duration); } -static void start_timeslice(struct intel_engine_cs *engine) +static void start_timeslice(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists *execlists = &engine->execlists; - const int prio = queue_prio(execlists); unsigned long duration; if (!intel_engine_has_timeslices(engine)) @@ -2140,7 +2158,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last) && + } else if (need_timeslice(engine, last, rb) && timeslice_expired(execlists, last)) { if (i915_request_completed(last)) { tasklet_hi_schedule(&execlists->tasklet); @@ -2188,7 +2206,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - start_timeslice(engine); + start_timeslice(engine, queue_prio(execlists)); return; } } @@ -2223,7 +2241,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - start_timeslice(engine); + start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } From 123f62de419f2a49449629ef822ed2c393a4781c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:06 -0700 Subject: [PATCH 041/222] drm/i915/rkl: Add RKL platform info and PCI ids Introduce the basic platform definition, macros, and PCI IDs. Bspec: 44501 Cc: Lucas De Marchi Cc: Caz Yokoyama Cc: Aditya Swarup Signed-off-by: Matt Roper Acked-by: Caz Yokoyama Reviewed-by: Anusha Srivatsa Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++++++ drivers/gpu/drm/i915/i915_pci.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + include/drm/i915_pciids.h | 9 +++++++++ 5 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0abbefa457f8..1fd7fdbed553 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1414,6 +1414,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) #define IS_ELKHARTLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE) #define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_TIGERLAKE) +#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ROCKETLAKE) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -1527,6 +1528,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_TGL_REVID(p, since, until) \ (IS_TIGERLAKE(p) && IS_REVID(p, since, until)) +#define RKL_REVID_A0 0x0 +#define RKL_REVID_B0 0x1 +#define RKL_REVID_C0 0x4 + +#define IS_RKL_REVID(p, since, until) \ + (IS_ROCKETLAKE(p) && IS_REVID(p, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 193048ce3c3a..eb6d4a0c9196 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -863,6 +863,15 @@ static const struct intel_device_info tgl_info = { BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), }; +static const struct intel_device_info rkl_info = { + GEN12_FEATURES, + PLATFORM(INTEL_ROCKETLAKE), + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .require_force_probe = 1, + .engine_mask = + BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0), +}; + #define GEN12_DGFX_FEATURES \ GEN12_FEATURES, \ .is_dgfx = 1 @@ -941,6 +950,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_ICL_11_IDS(&icl_info), INTEL_EHL_IDS(&ehl_info), INTEL_TGL_12_IDS(&tgl_info), + INTEL_RKL_IDS(&rkl_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 8a635bd4d5d8..e5e6836f8fa0 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -61,6 +61,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(ICELAKE), PLATFORM_NAME(ELKHARTLAKE), PLATFORM_NAME(TIGERLAKE), + PLATFORM_NAME(ROCKETLAKE), }; #undef PLATFORM_NAME diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 62e03ffa377e..c912acd06109 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -80,6 +80,7 @@ enum intel_platform { INTEL_ELKHARTLAKE, /* gen12 */ INTEL_TIGERLAKE, + INTEL_ROCKETLAKE, INTEL_MAX_PLATFORMS }; diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 662d8351c87a..bc989de2aac2 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -605,4 +605,13 @@ INTEL_VGA_DEVICE(0x9AD9, info), \ INTEL_VGA_DEVICE(0x9AF8, info) +/* RKL */ +#define INTEL_RKL_IDS(info) \ + INTEL_VGA_DEVICE(0x4C80, info), \ + INTEL_VGA_DEVICE(0x4C8A, info), \ + INTEL_VGA_DEVICE(0x4C8B, info), \ + INTEL_VGA_DEVICE(0x4C8C, info), \ + INTEL_VGA_DEVICE(0x4C90, info), \ + INTEL_VGA_DEVICE(0x4C9A, info) + #endif /* _I915_PCIIDS_H */ From d727e0b5a2fccf7d938f7465620e9f76a732d498 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:08 -0700 Subject: [PATCH 042/222] drm/i915/rkl: Re-use TGL GuC/HuC firmware RKL uses the same GuC and HuC as TGL and should load the same firmwares. Bspec: 50668 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index e1caae93996d..9b6218128d09 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -47,8 +47,11 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas * between 33.0 and 35.2 are only related to new additions to support new Gen12 * features. + * + * Note that RKL uses the same firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ From 3a303a3570d7265f7dba7b8a55bbd364afa6507f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:09 -0700 Subject: [PATCH 043/222] drm/i915/rkl: Load DMC firmware for Rocket Lake Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_csr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index 3112572cfb7d..319932b03e88 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -40,6 +40,10 @@ #define GEN12_CSR_MAX_FW_SIZE ICL_CSR_MAX_FW_SIZE +#define RKL_CSR_PATH "i915/rkl_dmc_ver2_01.bin" +#define RKL_CSR_VERSION_REQUIRED CSR_VERSION(2, 1) +MODULE_FIRMWARE(RKL_CSR_PATH); + #define TGL_CSR_PATH "i915/tgl_dmc_ver2_06.bin" #define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 6) #define TGL_CSR_MAX_FW_SIZE 0x6000 @@ -682,7 +686,11 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) */ intel_csr_runtime_pm_get(dev_priv); - if (INTEL_GEN(dev_priv) >= 12) { + if (IS_ROCKETLAKE(dev_priv)) { + csr->fw_path = RKL_CSR_PATH; + csr->required_version = RKL_CSR_VERSION_REQUIRED; + csr->max_fw_size = GEN12_CSR_MAX_FW_SIZE; + } else if (INTEL_GEN(dev_priv) >= 12) { csr->fw_path = TGL_CSR_PATH; csr->required_version = TGL_CSR_VERSION_REQUIRED; /* Allow to load fw via parameter using the last known size */ From a09e89e9782747f79abc342f3409fef9f23eae9a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:10 -0700 Subject: [PATCH 044/222] drm/i915/rkl: Add PCH support Rocket Lake can pair with either TGP or CMP. Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_pch.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 20ab9a5023b5..102b03d24f90 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -88,7 +88,8 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) case INTEL_PCH_CMP_DEVICE_ID_TYPE: case INTEL_PCH_CMP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n"); - drm_WARN_ON(&dev_priv->drm, !IS_COFFEELAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, !IS_COFFEELAKE(dev_priv) && + !IS_ROCKETLAKE(dev_priv)); /* CometPoint is CNP Compatible */ return PCH_CNP; case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: @@ -107,7 +108,8 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) case INTEL_PCH_TGP_DEVICE_ID_TYPE: case INTEL_PCH_TGP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n"); - drm_WARN_ON(&dev_priv->drm, !IS_TIGERLAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, !IS_TIGERLAKE(dev_priv) && + !IS_ROCKETLAKE(dev_priv)); return PCH_TGP; case INTEL_PCH_JSP_DEVICE_ID_TYPE: case INTEL_PCH_JSP2_DEVICE_ID_TYPE: @@ -141,7 +143,7 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv) * make an educated guess as to which PCH is really there. */ - if (IS_TIGERLAKE(dev_priv)) + if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv)) id = INTEL_PCH_TGP_DEVICE_ID_TYPE; else if (IS_ELKHARTLAKE(dev_priv)) id = INTEL_PCH_MCC_DEVICE_ID_TYPE; From f2c1061a3677b400a945d9238f17bf33d669acff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 May 2020 08:30:48 +0100 Subject: [PATCH 045/222] drm/i915/gt: Remove errant assertion in __intel_context_do_pin This assertion was removed in commit b412c63f1cba ("drm/i915/gt: Report context-is-closed prior to pinning"), but accidentally restored by a cherry-pick into drm-next and now has percolated back to drm-intel-next-queued. Fixes: 2e46a2a0b014 ("drm/i915: Use explicit flag to mark unreachable intel_context") Fixes: 2b703bbda271 ("Merge drm/drm-next into drm-intel-next-queued") References: b412c63f1cba ("drm/i915/gt: Report context-is-closed prior to pinning") Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20200520073048.2394034-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 74ddb49b2941..e4aece20bc80 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -97,8 +97,6 @@ int __intel_context_do_pin(struct intel_context *ce) { int err; - GEM_BUG_ON(intel_context_is_closed(ce)); - if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) From efbee021ad02f786106c1ef1a5b89fd9045283cc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:07 -0700 Subject: [PATCH 046/222] x86/gpu: add RKL stolen memory support RKL re-uses the same stolen memory registers as TGL and ICL. Bspec: 52055 Bspec: 49589 Bspec: 49636 Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Acked-by: Borislav Petkov Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-3-matthew.d.roper@intel.com --- arch/x86/kernel/early-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 2f9ec14be3b1..a4b5af03dcc1 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -550,6 +550,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_ICL_11_IDS(&gen11_early_ops), INTEL_EHL_IDS(&gen11_early_ops), INTEL_TGL_12_IDS(&gen11_early_ops), + INTEL_RKL_IDS(&gen11_early_ops), }; struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); From affd7bb6169ef72985fa657b05c519f730d260f7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:11 -0700 Subject: [PATCH 047/222] drm/i915/rkl: Update memory bandwidth parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RKL platform has different memory characteristics from past platforms. Update the values used by our memory bandwidth calculations accordingly. Bspec: 53998 Cc: James Ausmus Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index fef04e2d954e..98bbe719cf4f 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -199,6 +199,12 @@ static const struct intel_sa_info tgl_sa_info = { .displayrtids = 256, }; +static const struct intel_sa_info rkl_sa_info = { + .deburst = 16, + .deprogbwlimit = 20, /* GB/s */ + .displayrtids = 128, +}; + static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; @@ -309,7 +315,9 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv) if (!HAS_DISPLAY(dev_priv)) return; - if (IS_GEN(dev_priv, 12)) + if (IS_ROCKETLAKE(dev_priv)) + icl_get_bw_info(dev_priv, &rkl_sa_info); + else if (IS_GEN(dev_priv, 12)) icl_get_bw_info(dev_priv, &tgl_sa_info); else if (IS_GEN(dev_priv, 11)) icl_get_bw_info(dev_priv, &icl_sa_info); From 99e2d8bcb88763fe265ca0e99708eb55e1d44455 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:12 -0700 Subject: [PATCH 048/222] drm/i915/rkl: Limit number of universal planes to 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RKL only has five universal planes, plus a cursor. Since the bottom-most universal plane is considered the primary plane, set the number of sprites available on this platform to 4. In general, the plane capabilities of the remaining planes stay the same as TGL. However the NV12 Y-plane support moves down to the new top two planes and now only the bottom three planes can be used for NV12 UV. Bspec: 49181 Bspec: 49251 Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 6 +++++- drivers/gpu/drm/i915/display/intel_sprite.c | 17 ++++++++++++++++- drivers/gpu/drm/i915/display/intel_sprite.h | 11 ++--------- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_device_info.c | 5 ++++- 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3da4491bcd2b..f22d11f127c8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12505,7 +12505,7 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) continue; for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, linked) { - if (!icl_is_nv12_y_plane(linked->id)) + if (!icl_is_nv12_y_plane(dev_priv, linked->id)) continue; if (crtc_state->active_planes & BIT(linked->id)) @@ -12551,6 +12551,10 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) plane_state->cus_ctl |= PLANE_CUS_PLANE_7; else if (linked->id == PLANE_SPRITE4) plane_state->cus_ctl |= PLANE_CUS_PLANE_6; + else if (linked->id == PLANE_SPRITE3) + plane_state->cus_ctl |= PLANE_CUS_PLANE_5_RKL; + else if (linked->id == PLANE_SPRITE2) + plane_state->cus_ctl |= PLANE_CUS_PLANE_4_RKL; else MISSING_CASE(linked->id); } diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 0000ec7055f7..571c36f929bd 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -333,6 +333,21 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) return 0; } +static u8 icl_nv12_y_plane_mask(struct drm_i915_private *i915) +{ + if (IS_ROCKETLAKE(i915)) + return BIT(PLANE_SPRITE2) | BIT(PLANE_SPRITE3); + else + return BIT(PLANE_SPRITE4) | BIT(PLANE_SPRITE5); +} + +bool icl_is_nv12_y_plane(struct drm_i915_private *dev_priv, + enum plane_id plane_id) +{ + return INTEL_GEN(dev_priv) >= 11 && + icl_nv12_y_plane_mask(dev_priv) & BIT(plane_id); +} + bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id) { return INTEL_GEN(dev_priv) >= 11 && @@ -3003,7 +3018,7 @@ static const u32 *icl_get_plane_formats(struct drm_i915_private *dev_priv, if (icl_is_hdr_plane(dev_priv, plane_id)) { *num_formats = ARRAY_SIZE(icl_hdr_plane_formats); return icl_hdr_plane_formats; - } else if (icl_is_nv12_y_plane(plane_id)) { + } else if (icl_is_nv12_y_plane(dev_priv, plane_id)) { *num_formats = ARRAY_SIZE(icl_sdr_y_plane_formats); return icl_sdr_y_plane_formats; } else { diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h index 5eeaa92420d1..cd2104ba1ca1 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.h +++ b/drivers/gpu/drm/i915/display/intel_sprite.h @@ -32,21 +32,14 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id); -static inline bool icl_is_nv12_y_plane(enum plane_id id) -{ - /* Don't need to do a gen check, these planes are only available on gen11 */ - if (id == PLANE_SPRITE4 || id == PLANE_SPRITE5) - return true; - - return false; -} - static inline u8 icl_hdr_plane_mask(void) { return BIT(PLANE_PRIMARY) | BIT(PLANE_SPRITE0) | BIT(PLANE_SPRITE1); } +bool icl_is_nv12_y_plane(struct drm_i915_private *dev_priv, + enum plane_id plane_id); bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id); int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4dc601dffc08..95996db46939 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2254,7 +2254,9 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 11) + if (IS_ROCKETLAKE(dev_priv)) + return RKL_DE_PIPE_IRQ_FAULT_ERRORS; + else if (INTEL_GEN(dev_priv) >= 11) return GEN11_DE_PIPE_IRQ_FAULT_ERRORS; else if (INTEL_GEN(dev_priv) >= 9) return GEN9_DE_PIPE_IRQ_FAULT_ERRORS; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f774ec2bcc99..e9d50fe0f375 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6912,6 +6912,8 @@ enum { #define _PLANE_CUS_CTL_1_A 0x701c8 #define _PLANE_CUS_CTL_2_A 0x702c8 #define PLANE_CUS_ENABLE (1 << 31) +#define PLANE_CUS_PLANE_4_RKL (0 << 30) +#define PLANE_CUS_PLANE_5_RKL (1 << 30) #define PLANE_CUS_PLANE_6 (0 << 30) #define PLANE_CUS_PLANE_7 (1 << 30) #define PLANE_CUS_HPHASE_SIGN_NEGATIVE (1 << 19) @@ -7578,6 +7580,9 @@ enum { GEN11_PIPE_PLANE7_FAULT | \ GEN11_PIPE_PLANE6_FAULT | \ GEN11_PIPE_PLANE5_FAULT) +#define RKL_DE_PIPE_IRQ_FAULT_ERRORS \ + (GEN9_DE_PIPE_IRQ_FAULT_ERRORS | \ + GEN11_PIPE_PLANE5_FAULT) #define GEN8_DE_PORT_ISR _MMIO(0x44440) #define GEN8_DE_PORT_IMR _MMIO(0x44444) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index e5e6836f8fa0..c245c10c9bee 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -934,7 +934,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(BITS_PER_TYPE(intel_engine_mask_t) < I915_NUM_ENGINES); - if (INTEL_GEN(dev_priv) >= 11) + if (IS_ROCKETLAKE(dev_priv)) + for_each_pipe(dev_priv, pipe) + runtime->num_sprites[pipe] = 4; + else if (INTEL_GEN(dev_priv) >= 11) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 6; else if (IS_GEN(dev_priv, 10) || IS_GEMINILAKE(dev_priv)) From 93e2323b5c85a2b7ac4260d720de26ca5d5ad796 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:13 -0700 Subject: [PATCH 049/222] drm/i915/rkl: Add power well support RKL power wells are similar to TGL power wells, but have some important differences: * PG1 now has pipe A's VDSC (rather than sticking it in PG2) * PG2 no longer exists * DDI-C (aka TC-1) moves from PG1 -> PG3 * PG5 no longer exists due to the lack of a fourth pipe Also note that what we refer to as 'DDI-C' and 'DDI-D' need to actually be programmed as TC-1 and TC-2 even though this platform doesn't have TC outputs. Bspec: 49234 Cc: Imre Deak Cc: Lucas De Marchi Cc: Anshuman Gupta Signed-off-by: Matt Roper Reviewed-by: Anshuman Gupta Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-9-matthew.d.roper@intel.com --- .../drm/i915/display/intel_display_power.c | 185 +++++++++++++++++- drivers/gpu/drm/i915/display/intel_vdsc.c | 4 +- 2 files changed, 186 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 7762e5d0e1b0..72312b67b57a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2923,6 +2923,53 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_I_TBT) | \ BIT_ULL(POWER_DOMAIN_TC_COLD_OFF)) +#define RKL_PW_4_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define RKL_PW_3_POWER_DOMAINS ( \ + RKL_PW_4_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_E) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +/* + * There is no PW_2/PG_2 on RKL. + * + * RKL PW_1/PG_1 domains (under HW/DMC control): + * - DBUF function (note: registers are in PW0) + * - PIPE_A and its planes and VDSC/joining, except VGA + * - transcoder A + * - DDI_A and DDI_B + * - FBC + * + * RKL PW_0/PG_0 domains (under HW/DMC control): + * - PCI + * - clocks except port PLL + * - shared functions: + * * interrupts except pipe interrupts + * * MBus except PIPE_MBUS_DBOX_CTL + * * DBUF registers + * - central power except FBC + * - top-level GTC (DDI-level GTC is in the well associated with the DDI) + */ + +#define RKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + RKL_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, @@ -4293,6 +4340,140 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }; +static const struct i915_power_well_desc rkl_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = RKL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_DC_OFF, + }, + { + .name = "power well 3", + .domains = RKL_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_3, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + }, + { + .name = "power well 4", + .domains = RKL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + } + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_A, + } + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_B, + } + }, + { + .name = "DDI D TC1 IO", + .domains = TGL_DDI_IO_D_TC1_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC1, + }, + }, + { + .name = "DDI E TC2 IO", + .domains = TGL_DDI_IO_E_TC2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC2, + }, + }, + { + .name = "AUX A", + .domains = ICL_AUX_A_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + }, + }, + { + .name = "AUX B", + .domains = ICL_AUX_B_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + }, + }, + { + .name = "AUX D TC1", + .domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC1, + }, + }, + { + .name = "AUX E TC2", + .domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC2, + }, + }, +}; + static int sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, int disable_power_well) @@ -4441,7 +4622,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) * The enabling order will be from lower to higher indexed wells, * the disabling order is reversed. */ - if (IS_GEN(dev_priv, 12)) { + if (IS_ROCKETLAKE(dev_priv)) { + err = set_power_wells(power_domains, rkl_power_wells); + } else if (IS_GEN(dev_priv, 12)) { err = set_power_wells(power_domains, tgl_power_wells); } else if (IS_GEN(dev_priv, 11)) { err = set_power_wells(power_domains, icl_power_wells); diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 95ad87d4ccb3..d145fe2bed81 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -476,13 +476,13 @@ intel_dsc_power_domain(const struct intel_crtc_state *crtc_state) * POWER_DOMAIN_TRANSCODER_VDSC_PW2 power domain in two cases: * * - ICL eDP/DSI transcoder - * - TGL pipe A + * - Gen12+ (except RKL) pipe A * * For any other pipe, VDSC/joining uses the power well associated with * the pipe in use. Hence another reference on the pipe power domain * will suffice. (Except no VDSC/joining on ICL pipe A.) */ - if (INTEL_GEN(i915) >= 12 && pipe == PIPE_A) + if (INTEL_GEN(i915) >= 12 && !IS_ROCKETLAKE(i915) && pipe == PIPE_A) return POWER_DOMAIN_TRANSCODER_VDSC_PW2; else if (is_pipe_dsc(crtc_state)) return POWER_DOMAIN_PIPE(pipe); From 01f953e78e23d2c48fab96bc6f246af0c33464e8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 4 May 2020 15:52:15 -0700 Subject: [PATCH 050/222] drm/i915/rkl: RKL only uses PHY_MISC for PHY's A and B Since the number of platforms with this restriction are growing, let's separate out the platform logic into a has_phy_misc() function. Bspec: 50107 Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200504225227.464666-11-matthew.d.roper@intel.com --- .../gpu/drm/i915/display/intel_combo_phy.c | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 9ff05ec12115..43d8784f6fa0 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -181,11 +181,25 @@ static void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, CHICKEN_MISC_2, val); } +static bool has_phy_misc(struct drm_i915_private *i915, enum phy phy) +{ + /* + * Some platforms only expect PHY_MISC to be programmed for PHY-A and + * PHY-B and may not even have instances of the register for the + * other combo PHY's. + */ + if (IS_ELKHARTLAKE(i915) || + IS_ROCKETLAKE(i915)) + return phy < PHY_C; + + return true; +} + static bool icl_combo_phy_enabled(struct drm_i915_private *dev_priv, enum phy phy) { /* The PHY C added by EHL has no PHY_MISC register */ - if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_C) + if (!has_phy_misc(dev_priv, phy)) return intel_de_read(dev_priv, ICL_PORT_COMP_DW0(phy)) & COMP_INIT; else return !(intel_de_read(dev_priv, ICL_PHY_MISC(phy)) & @@ -317,12 +331,7 @@ static void icl_combo_phys_init(struct drm_i915_private *dev_priv) continue; } - /* - * Although EHL adds a combo PHY C, there's no PHY_MISC - * register for it and no need to program the - * DE_IO_COMP_PWR_DOWN setting on PHY C. - */ - if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_C) + if (!has_phy_misc(dev_priv, phy)) goto skip_phy_misc; /* @@ -376,12 +385,7 @@ static void icl_combo_phys_uninit(struct drm_i915_private *dev_priv) "Combo PHY %c HW state changed unexpectedly\n", phy_name(phy)); - /* - * Although EHL adds a combo PHY C, there's no PHY_MISC - * register for it and no need to program the - * DE_IO_COMP_PWR_DOWN setting on PHY C. - */ - if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_C) + if (!has_phy_misc(dev_priv, phy)) goto skip_phy_misc; val = intel_de_read(dev_priv, ICL_PHY_MISC(phy)); From 537af0b0caf4b18bf0414ff5ffb4d3d86633403c Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Tue, 19 May 2020 09:25:34 -0700 Subject: [PATCH 051/222] drm/i915/ehl: Wa_22010271021 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reflect recent Bspec changes. Bspec: 33451 Signed-off-by: Matt Atwood Signed-off-by: José Roberto de Souza Reviewed-by: Swathi Dhanavanthri Link: https://patchwork.freedesktop.org/patch/msgid/20200519162534.10035-1-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 90a2b9e399b0..fa1e15657663 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1484,6 +1484,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* Wa_22010271021:ehl */ + if (IS_ELKHARTLAKE(i915)) + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); } if (IS_GEN_RANGE(i915, 9, 12)) { From 1a5392479207a9e514c111ebb881b677bda48c20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 08:10:59 +0100 Subject: [PATCH 052/222] drm/i915/selftests: Measure CS_TIMESTAMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Count the number of CS_TIMESTAMP ticks and check that it matches our expectations. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200521071059.31726-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 132 +++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 242181a5214c..6180a47c1b51 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -5,10 +5,141 @@ * Copyright © 2019 Intel Corporation */ +#include + +#include "intel_gt_clock_utils.h" + #include "selftest_llc.h" #include "selftest_rc6.h" #include "selftest_rps.h" +static int cmp_u64(const void *A, const void *B) +{ + const u64 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static void measure_clocks(struct intel_engine_cs *engine, + u32 *out_cycles, ktime_t *out_dt) +{ + ktime_t dt[5]; + u32 cycles[5]; + int i; + + for (i = 0; i < 5; i++) { + preempt_disable(); + cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP); + dt[i] = ktime_get(); + + udelay(1000); + + dt[i] = ktime_sub(ktime_get(), dt[i]); + cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP); + preempt_enable(); + } + + /* Use the median of both cycle/dt; close enough */ + sort(cycles, 5, sizeof(*cycles), cmp_u32, NULL); + *out_cycles = (cycles[1] + 2 * cycles[2] + cycles[3]) / 4; + + sort(dt, 5, sizeof(*dt), cmp_u64, NULL); + *out_dt = div_u64(dt[1] + 2 * dt[2] + dt[3], 4); +} + +static int live_gt_clocks(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!RUNTIME_INFO(gt->i915)->cs_timestamp_frequency_hz) { /* unknown */ + pr_info("CS_TIMESTAMP frequency unknown\n"); + return 0; + } + + if (INTEL_GEN(gt->i915) < 4) /* Any CS_TIMESTAMP? */ + return 0; + + if (IS_GEN(gt->i915, 5)) + /* + * XXX CS_TIMESTAMP low dword is dysfunctional? + * + * Ville's experiments indicate the high dword still works, + * but at a correspondingly reduced frequency. + */ + return 0; + + if (IS_GEN(gt->i915, 4)) + /* + * XXX CS_TIMESTAMP appears gibberish + * + * Ville's experiments indicate that it mostly appears 'stuck' + * in that we see the register report the same cycle count + * for a couple of reads. + */ + return 0; + + intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + u32 cycles; + u32 expected; + u64 time; + u64 dt; + + if (INTEL_GEN(engine->i915) < 7 && engine->id != RCS0) + continue; + + measure_clocks(engine, &cycles, &dt); + + time = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); + expected = i915_cs_timestamp_ns_to_ticks(engine->i915, dt); + + pr_info("%s: TIMESTAMP %d cycles [%lldns] in %lldns [%d cycles], using CS clock frequency of %uKHz\n", + engine->name, cycles, time, dt, expected, + RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz / 1000); + + if (9 * time < 8 * dt || 8 * time > 9 * dt) { + pr_err("%s: CS ticks did not match walltime!\n", + engine->name); + err = -EINVAL; + break; + } + + if (9 * expected < 8 * cycles || 8 * expected > 9 * cycles) { + pr_err("%s: walltime did not match CS ticks!\n", + engine->name); + err = -EINVAL; + break; + } + } + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_gt_pm_put(gt); + + return err; +} + static int live_gt_resume(void *arg) { struct intel_gt *gt = arg; @@ -52,6 +183,7 @@ static int live_gt_resume(void *arg) int intel_gt_pm_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_gt_clocks), SUBTEST(live_rc6_manual), SUBTEST(live_rps_clock_interval), SUBTEST(live_rps_control), From 4f44afe85243f5d97db2dd14528870c09b3cd887 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 13:43:04 +0100 Subject: [PATCH 053/222] drm/i915/selftests: Flush the submission, not cancel it! Use intel_engine_flush_submission() when we want to ensure that the tasklet is run. tasklet_kill(), while it may ensure that an ongoing tasklet is completed, also prevents the tasklet from running if it's already scheduled and hasn't yet been run. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1874 Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521124304.3157692-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ef38dd52945c..66f710b1b61e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -332,7 +332,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) i915_request_put(rq[0]); err_ce: - tasklet_kill(&engine->execlists.tasklet); /* flush submission */ + intel_engine_flush_submission(engine); igt_spinner_end(&spin); for (n = 0; n < ARRAY_SIZE(ce); n++) { if (IS_ERR_OR_NULL(ce[n])) From 0eb670aac27b1d615004c29efec595616e3e091a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:06:16 +0100 Subject: [PATCH 054/222] drm/i915: Disable semaphore inter-engine sync without timeslicing Since the removal of the no-semaphore boosting, we rely on timeslicing to reorder passed inter-dependency hogs across the engines. However, we require preemption to support timeslicing into user payloads, and not all machine support preemption so we do not universally enable timeslicing, even when it would correctly preempt our own inter-engine semaphores. Since timeslicing and semaphore priority deboosting is now disabled on Broadwell/Braswell, we have to follow suite and not use semaphores. Testcase: igt/gem_exec_schedule/semaphore-codependency # bdw/bsw Fixes: 18e4af04d218 ("drm/i915: Drop no-semaphore boosting") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521140617.30015-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 900ea8b7fc8f..f5d59d18cd5b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -230,7 +230,7 @@ static void intel_context_set_gem(struct intel_context *ce, ce->timeline = intel_timeline_get(ctx->timeline); if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) + intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); } @@ -1969,7 +1969,7 @@ static int __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; - if (!intel_engine_has_semaphores(ce->engine)) + if (!intel_engine_has_timeslices(ce->engine)) return 0; if (ctx->sched.priority >= I915_PRIORITY_NORMAL) From 32a4605b38c30689a6a18f3f4c7d3133ac9d3277 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:06:17 +0100 Subject: [PATCH 055/222] drm/i915: Avoid using rq->engine after free during i915_fence_release In order to be valid to dereference during the i915_fence_release, after retiring the fence and releasing its refererences, we assume that rq->engine can only be a real engine (that stay intact until the device is shutdown after all fences have been flushed). However, due to a quirk of preempt-to-busy, we may retire a request that still belongs to a virtual engine and so eventually free it with rq->engine being invalid. To avoid dereferencing that invalid engine, we look at the execution_mask which if it indicates it may be executed on more than one engine, we know it originated on a virtual engine and may still be on one. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1906 Fixes: 43acd6516ca9 ("drm/i915: Keep a per-engine request pool") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521140617.30015-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 35 +++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 526c1e9acbd5..c282719ad3ac 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -121,8 +121,39 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->submit); i915_sw_fence_fini(&rq->semaphore); - /* Keep one request on each engine for reserved use under mempressure */ - if (!cmpxchg(&rq->engine->request_pool, NULL, rq)) + /* + * Keep one request on each engine for reserved use under mempressure + * + * We do not hold a reference to the engine here and so have to be + * very careful in what rq->engine we poke. The virtual engine is + * referenced via the rq->context and we released that ref during + * i915_request_retire(), ergo we must not dereference a virtual + * engine here. Not that we would want to, as the only consumer of + * the reserved engine->request_pool is the power management parking, + * which must-not-fail, and that is only run on the physical engines. + * + * Since the request must have been executed to be have completed, + * we know that it will have been processed by the HW and will + * not be unsubmitted again, so rq->engine and rq->execution_mask + * at this point is stable. rq->execution_mask will be a single + * bit if the last and _only_ engine it could execution on was a + * physical engine, if it's multiple bits then it started on and + * could still be on a virtual engine. Thus if the mask is not a + * power-of-two we assume that rq->engine may still be a virtual + * engine and so a dangling invalid pointer that we cannot dereference + * + * For example, consider the flow of a bonded request through a virtual + * engine. The request is created with a wide engine mask (all engines + * that we might execute on). On processing the bond, the request mask + * is reduced to one or more engines. If the request is subsequently + * bound to a single engine, it will then be constrained to only + * execute on that engine and never returned to the virtual engine + * after timeslicing away, see __unwind_incomplete_requests(). Thus we + * know that if the rq->execution_mask is a single bit, rq->engine + * can be a physical engine with the exact corresponding mask. + */ + if (is_power_of_2(rq->execution_mask) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) return; kmem_cache_free(global.slab_requests, rq); From aedbe0a1af585edc91221890f3a2a9ea2a319336 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:49:49 +0100 Subject: [PATCH 056/222] drm/i915: Remove PIN_UPDATE for i915_vma_pin As we no longer use PIN_UPDATE (since commit 7d0aa0db4375 ("drm/i915/gem: Unbind all current vma on changing cache-level")) we can remove PIN_UPDATE itself. The benefit is just in simplifing the vma bind. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200521144949.25357-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 142 ------------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/i915_vma.c | 9 +- 3 files changed, 3 insertions(+), 149 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c9988b6d5c88..a0ed2fab0ff3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1409,147 +1409,6 @@ out: return err; } -static int igt_ppgtt_pin_update(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *dev_priv = ctx->i915; - unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct drm_i915_gem_object *obj; - struct i915_gem_engines_iter it; - struct i915_address_space *vm; - struct intel_context *ce; - struct i915_vma *vma; - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - unsigned int n; - int first, last; - int err = 0; - - /* - * Make sure there's no funny business when doing a PIN_UPDATE -- in the - * past we had a subtle issue with being able to incorrectly do multiple - * alloc va ranges on the same object when doing a PIN_UPDATE, which - * resulted in some pretty nasty bugs, though only when using - * huge-gtt-pages. - */ - - vm = i915_gem_context_get_vm_rcu(ctx); - if (!i915_vm_is_4lvl(vm)) { - pr_info("48b PPGTT not supported, skipping\n"); - goto out_vm; - } - - first = ilog2(I915_GTT_PAGE_SIZE_64K); - last = ilog2(I915_GTT_PAGE_SIZE_2M); - - for_each_set_bit_from(first, &supported, last + 1) { - unsigned int page_size = BIT(first); - - obj = i915_gem_object_create_internal(dev_priv, page_size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_vm; - } - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, SZ_2M, 0, flags); - if (err) - goto out_put; - - if (vma->page_sizes.sg < page_size) { - pr_info("Unable to allocate page-size %x, finishing test early\n", - page_size); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_unpin; - - if (vma->page_sizes.gtt != page_size) { - dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); - - /* - * The only valid reason for this to ever fail would be - * if the dma-mapper screwed us over when we did the - * dma_map_sg(), since it has the final say over the dma - * address. - */ - if (IS_ALIGNED(addr, page_size)) { - pr_err("page_sizes.gtt=%u, expected=%u\n", - vma->page_sizes.gtt, page_size); - err = -EINVAL; - } else { - pr_info("dma address misaligned, finishing test early\n"); - } - - goto out_unpin; - } - - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); - if (err) - goto out_unpin; - - i915_vma_unpin(vma); - i915_gem_object_put(obj); - } - - obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_vm; - } - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_put; - - /* - * Make sure we don't end up with something like where the pde is still - * pointing to the 2M page, and the pt we just filled-in is dangling -- - * we can check this by writing to the first page where it would then - * land in the now stale 2M page. - */ - - n = 0; - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - if (!intel_engine_can_store_dword(ce->engine)) - continue; - - err = gpu_write(ce, vma, n++, 0xdeadbeaf); - if (err) - break; - } - i915_gem_context_unlock_engines(ctx); - if (err) - goto out_unpin; - - while (n--) { - err = cpu_check(obj, n, 0xdeadbeaf); - if (err) - goto out_unpin; - } - -out_unpin: - i915_vma_unpin(vma); -out_put: - i915_gem_object_put(obj); -out_vm: - i915_vm_put(vm); - - return err; -} - static int igt_tmpfs_fallback(void *arg) { struct i915_gem_context *ctx = arg; @@ -1760,7 +1619,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_shrink_thp), - SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f6226df9f972..c9b0ee5e1d23 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -42,7 +42,6 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_UPDATE BIT_ULL(9) #define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ #define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fc14ebf9a0b7..22198b758459 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -397,17 +397,15 @@ int i915_vma_bind(struct i915_vma *vma, vma_flags = atomic_read(&vma->flags); vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; - if (flags & PIN_UPDATE) - bind_flags |= vma_flags; - else - bind_flags &= ~vma_flags; + + bind_flags &= ~vma_flags; if (bind_flags == 0) return 0; GEM_BUG_ON(!vma->pages); trace_i915_vma_bind(vma, bind_flags); - if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { + if (work && bind_flags & vma->vm->bind_async_flags) { struct dma_fence *prev; work->vma = vma; @@ -868,7 +866,6 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - GEM_BUG_ON(flags & PIN_UPDATE); GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL))); /* First try and grab the pin without rebinding the vma */ From c93b9b2c79296e7ea0fe31f1eafda20bb288dafd Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Tue, 19 May 2020 16:11:11 +0300 Subject: [PATCH 057/222] drm/i915: Decouple cdclk calculation from modeset checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to calculate cdclk after watermarks/ddb has been calculated as with recent hw CDCLK needs to be adjusted accordingly to DBuf requirements, which is not possible with current code organization. Setting CDCLK according to DBuf BW requirements and not just rejecting if it doesn't satisfy BW requirements, will allow us to save power when it is possible and gain additional bandwidth when it's needed - i.e boosting both our power management and perfomance capabilities. This patch is preparation for that, first we now extract modeset calculation from modeset checks, in order to call it after wm/ddb has been calculated. v2: - Extract only intel_modeset_calc_cdclk from intel_modeset_checks (Ville Syrjälä) v3: - Clear plls after intel_modeset_calc_cdclk v4: - Added r-b from previous revision to commit message Reviewed-by: Ville Syrjälä Signed-off-by: Stanislav Lisovskiy Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200519131117.17190-2-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 22 +++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f22d11f127c8..58e2c067d0ad 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14567,12 +14567,6 @@ static int intel_modeset_checks(struct intel_atomic_state *state) return ret; } - ret = intel_modeset_calc_cdclk(state); - if (ret) - return ret; - - intel_modeset_clear_plls(state); - if (IS_HASWELL(dev_priv)) return hsw_mode_set_planes_workaround(state); @@ -14904,10 +14898,6 @@ static int intel_atomic_check(struct drm_device *dev, goto fail; } - ret = intel_atomic_check_crtcs(state); - if (ret) - goto fail; - intel_fbc_choose_crtc(dev_priv, state); ret = calc_watermark_data(state); if (ret) @@ -14917,6 +14907,18 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + if (any_ms) { + ret = intel_modeset_calc_cdclk(state); + if (ret) + return ret; + + intel_modeset_clear_plls(state); + } + + ret = intel_atomic_check_crtcs(state); + if (ret) + goto fail; + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (!needs_modeset(new_crtc_state) && From 4f0b4352bd26eff85f972ccf12bcdf9236a42175 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Tue, 19 May 2020 16:11:12 +0300 Subject: [PATCH 058/222] drm/i915: Extract cdclk requirements checking to separate function In Gen11+ whenever we might exceed DBuf bandwidth we might need to recalculate CDCLK which DBuf bandwidth is scaled with. Total Dbuf bw used might change based on particular plane needs. Thus to calculate if cdclk needs to be changed it is not enough anymore to check plane configuration and plane min cdclk, per DBuf bw can be calculated only after wm/ddb calculation is done and all required planes are added into the state. In order to keep all min_cdclk related checks in one place let's extract it into separate function, checking and modifying any_ms. Reviewed-by: Manasi Navare Signed-off-by: Stanislav Lisovskiy Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200519131117.17190-3-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 30 ++++++++++++++------ 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 58e2c067d0ad..e2f9ec8dae31 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14646,8 +14646,7 @@ static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv) IS_IVYBRIDGE(dev_priv); } -static int intel_atomic_check_planes(struct intel_atomic_state *state, - bool *need_cdclk_calc) +static int intel_atomic_check_planes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state, *new_crtc_state; @@ -14697,6 +14696,22 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state, return ret; } + return 0; +} + +static int intel_atomic_check_cdclk(struct intel_atomic_state *state, + bool *need_cdclk_calc) +{ + struct intel_cdclk_state *new_cdclk_state; + int i; + struct intel_plane_state *plane_state; + struct intel_plane *plane; + int ret; + + new_cdclk_state = intel_atomic_get_new_cdclk_state(state); + if (new_cdclk_state && new_cdclk_state->force_min_cdclk_changed) + *need_cdclk_calc = true; + /* * active_planes bitmask has been updated, and potentially * affected planes are part of the state. We can now @@ -14759,7 +14774,6 @@ static int intel_atomic_check(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_atomic_state *state = to_intel_atomic_state(_state); struct intel_crtc_state *old_crtc_state, *new_crtc_state; - struct intel_cdclk_state *new_cdclk_state; struct intel_crtc *crtc; int ret, i; bool any_ms = false; @@ -14870,14 +14884,10 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = intel_atomic_check_planes(state, &any_ms); + ret = intel_atomic_check_planes(state); if (ret) goto fail; - new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - if (new_cdclk_state && new_cdclk_state->force_min_cdclk_changed) - any_ms = true; - /* * distrust_bios_wm will force a full dbuf recomputation * but the hardware state will only get updated accordingly @@ -14907,6 +14917,10 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + ret = intel_atomic_check_cdclk(state, &any_ms); + if (ret) + goto fail; + if (any_ms) { ret = intel_modeset_calc_cdclk(state); if (ret) From 9877c37e05c8428ed80cc9652d41322ff5fa59ed Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 20 May 2020 17:58:27 +0300 Subject: [PATCH 059/222] drm/i915: Check plane configuration properly Checking with hweight8 if plane configuration had changed seems to be wrong as different plane configs can result in a same hamming weight. So lets check the bitmask itself. v2: Fixed "from" field which got corrupted for some weird reason Reviewed-by: Manasi Navare Signed-off-by: Stanislav Lisovskiy Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200520145827.15887-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e2f9ec8dae31..19543045266f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14688,7 +14688,13 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state) old_active_planes = old_crtc_state->active_planes & ~BIT(PLANE_CURSOR); new_active_planes = new_crtc_state->active_planes & ~BIT(PLANE_CURSOR); - if (hweight8(old_active_planes) == hweight8(new_active_planes)) + /* + * Not only the number of planes, but if the plane configuration had + * changed might already mean we need to recompute min CDCLK, + * because different planes might consume different amount of Dbuf bandwidth + * according to formula: Bw per plane = Pixel rate * bpp * pipe/plane scale factor + */ + if (old_active_planes == new_active_planes) continue; ret = intel_crtc_add_planes_to_state(state, crtc, new_active_planes); From cf129762ba15ae645cbfb4fc916f24e5538b9cee Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 20 May 2020 17:59:45 +0300 Subject: [PATCH 060/222] drm/i915: Plane configuration affects CDCLK in Gen11+ So lets support it. v2: - Fixed "from" field which got corrupted for some weird reason Reviewed-by: Manasi Navare Signed-off-by: Stanislav Lisovskiy Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200520145945.15997-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 19543045266f..e1e6ec38f83b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14643,7 +14643,7 @@ static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv) /* See {hsw,vlv,ivb}_plane_ratio() */ return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) || IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) || - IS_IVYBRIDGE(dev_priv); + IS_IVYBRIDGE(dev_priv) || (INTEL_GEN(dev_priv) >= 11); } static int intel_atomic_check_planes(struct intel_atomic_state *state) From 8435576b3f69451127fd6149affb94741a82293e Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Tue, 19 May 2020 16:11:15 +0300 Subject: [PATCH 061/222] drm/i915: Introduce for_each_dbuf_slice_in_mask macro We quite often need now to iterate only particular dbuf slices in mask, whether they are active or related to particular crtc. v2: - Minor code refactoring v3: - Use enum for max slices instead of macro Let's make our life a bit easier and use a macro for that. Reviewed-by: Manasi Navare Signed-off-by: Stanislav Lisovskiy Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200519131117.17190-6-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_display.h | 7 +++++++ drivers/gpu/drm/i915/display/intel_display_power.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index efb4da205ea2..b7a6d56bac5f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -187,6 +187,13 @@ enum plane_id { for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ for_each_if((__crtc)->plane_ids_mask & BIT(__p)) +#define for_each_dbuf_slice_in_mask(__slice, __mask) \ + for ((__slice) = DBUF_S1; (__slice) < I915_MAX_DBUF_SLICES; (__slice)++) \ + for_each_if((BIT(__slice)) & (__mask)) + +#define for_each_dbuf_slice(__slice) \ + for_each_dbuf_slice_in_mask(__slice, BIT(I915_MAX_DBUF_SLICES) - 1) + enum port { PORT_NONE = -1, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index dc766af41e9b..54c20c76057e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -314,6 +314,7 @@ intel_display_power_put_async(struct drm_i915_private *i915, enum dbuf_slice { DBUF_S1, DBUF_S2, + I915_MAX_DBUF_SLICES }; void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, From cd19154608610ab4cdd6c039e9214b8dd281845c Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 20 May 2020 18:00:58 +0300 Subject: [PATCH 062/222] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs According to BSpec max BW per slice is calculated using formula Max BW = CDCLK * 64. Currently when calculating min CDCLK we account only per plane requirements, however in order to avoid FIFO underruns we need to estimate accumulated BW consumed by all planes(ddb entries basically) residing on that particular DBuf slice. This will allow us to put CDCLK lower and save power when we don't need that much bandwidth or gain additional performance once plane consumption grows. v2: - Fix long line warning - Limited new DBuf bw checks to only gens >= 11 v3: - Lets track used Dbuf bw per slice and per crtc in bw state (or may be in DBuf state in future), that way we don't need to have all crtcs in state and those only if we detect if are actually going to change cdclk, just same way as we do with other stuff, i.e intel_atomic_serialize_global_state and co. Just as per Ville's paradigm. - Made dbuf bw calculation procedure look nicer by introducing for_each_dbuf_slice_in_mask - we often will now need to iterate slices using mask. - According to experimental results CDCLK * 64 accounts for overall bandwidth across all dbufs, not per dbuf. v4: - Fixed missing const(Ville) - Removed spurious whitespaces(Ville) - Fixed local variable init(reduced scope where not needed) - Added some comments about data rate for planar formats - Changed struct intel_crtc_bw to intel_dbuf_bw - Moved dbuf bw calculation to intel_compute_min_cdclk(Ville) v5: - Removed unneeded macro v6: - Prevent too frequent CDCLK switching back and forth: Always switch to higher CDCLK when needed to prevent bandwidth issues, however don't switch to lower CDCLK earlier than once in 30 minutes in order to prevent constant modeset blinking. We could of course not switch back at all, however this is bad from power consumption point of view. v7: - Fixed to track cdclk using bw_state, modeset will be now triggered only when CDCLK change is really needed. v8: - Lock global state if bw_state->min_cdclk is changed. - Try getting bw_state only if there are crtcs in the commit (need to have read-locked global state) v9: - Do not do Dbuf bw check for gens < 9 - triggers WARN as ddb_size is 0. v10: - Lock global state for older gens as well. v11: - Define new bw_calc_min_cdclk hook, instead of using a condition(Manasi Navare) v12: - Fixed rebase conflict v13: - Added spaces after declarations to make checkpatch happy. Signed-off-by: Stanislav Lisovskiy Reviewed-by: Manasi Navare Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200520150058.16123-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 121 ++++++++++++++++++- drivers/gpu/drm/i915/display/intel_bw.h | 10 ++ drivers/gpu/drm/i915/display/intel_cdclk.c | 28 ++++- drivers/gpu/drm/i915/display/intel_cdclk.h | 1 - drivers/gpu/drm/i915/display/intel_display.c | 39 +++++- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 31 ++++- drivers/gpu/drm/i915/intel_pm.h | 4 + 8 files changed, 220 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 98bbe719cf4f..8d2f58e39595 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -6,11 +6,12 @@ #include #include "intel_bw.h" +#include "intel_pm.h" #include "intel_display_types.h" #include "intel_sideband.h" #include "intel_atomic.h" #include "intel_pm.h" - +#include "intel_cdclk.h" /* Parameters for Qclk Geyserville (QGV) */ struct intel_qgv_point { @@ -351,7 +352,6 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_ return data_rate; } - void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state) { @@ -428,6 +428,123 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state) return to_intel_bw_state(bw_state); } +int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + int i; + const struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int max_bw = 0; + int slice_id; + struct intel_bw_state *new_bw_state = NULL; + struct intel_bw_state *old_bw_state = NULL; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + enum plane_id plane_id; + struct intel_dbuf_bw *crtc_bw; + + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); + + crtc_bw = &new_bw_state->dbuf_bw[crtc->pipe]; + + memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw)); + + for_each_plane_id_on_crtc(crtc, plane_id) { + const struct skl_ddb_entry *plane_alloc = + &crtc_state->wm.skl.plane_ddb_y[plane_id]; + const struct skl_ddb_entry *uv_plane_alloc = + &crtc_state->wm.skl.plane_ddb_uv[plane_id]; + unsigned int data_rate = crtc_state->data_rate[plane_id]; + unsigned int dbuf_mask = 0; + + dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc); + dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc); + + /* + * FIXME: To calculate that more properly we probably need to + * to split per plane data_rate into data_rate_y and data_rate_uv + * for multiplanar formats in order not to get accounted those twice + * if they happen to reside on different slices. + * However for pre-icl this would work anyway because we have only single + * slice and for icl+ uv plane has non-zero data rate. + * So in worst case those calculation are a bit pessimistic, which + * shouldn't pose any significant problem anyway. + */ + for_each_dbuf_slice_in_mask(slice_id, dbuf_mask) + crtc_bw->used_bw[slice_id] += data_rate; + } + + for_each_dbuf_slice(slice_id) { + /* + * Current experimental observations show that contrary to BSpec + * we get underruns once we exceed 64 * CDCLK for slices in total. + * As a temporary measure in order not to keep CDCLK bumped up all the + * time we calculate CDCLK according to this formula for overall bw + * consumed by slices. + */ + max_bw += crtc_bw->used_bw[slice_id]; + } + + new_bw_state->min_cdclk = max_bw / 64; + + old_bw_state = intel_atomic_get_old_bw_state(state); + } + + if (!old_bw_state) + return 0; + + if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { + int ret = intel_atomic_lock_global_state(&new_bw_state->base); + + if (ret) + return ret; + } + + return 0; +} + +int intel_bw_calc_min_cdclk(struct intel_atomic_state *state) +{ + int i; + const struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int min_cdclk = 0; + struct intel_bw_state *new_bw_state = NULL; + struct intel_bw_state *old_bw_state = NULL; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + struct intel_cdclk_state *cdclk_state; + + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); + + cdclk_state = intel_atomic_get_cdclk_state(state); + if (IS_ERR(cdclk_state)) + return PTR_ERR(cdclk_state); + + min_cdclk = max(cdclk_state->min_cdclk[crtc->pipe], min_cdclk); + + new_bw_state->min_cdclk = min_cdclk; + + old_bw_state = intel_atomic_get_old_bw_state(state); + } + + if (!old_bw_state) + return 0; + + if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { + int ret = intel_atomic_lock_global_state(&new_bw_state->base); + + if (ret) + return ret; + } + + return 0; +} + int intel_bw_atomic_check(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index bbcaaa73ec1b..af1a981dec84 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -10,13 +10,19 @@ #include "intel_display.h" #include "intel_global_state.h" +#include "intel_display_power.h" struct drm_i915_private; struct intel_atomic_state; struct intel_crtc_state; +struct intel_dbuf_bw { + int used_bw[I915_MAX_DBUF_SLICES]; +}; + struct intel_bw_state { struct intel_global_state base; + struct intel_dbuf_bw dbuf_bw[I915_MAX_PIPES]; /* * Contains a bit mask, used to determine, whether correspondent @@ -36,6 +42,8 @@ struct intel_bw_state { /* bitmask of active pipes */ u8 active_pipes; + + int min_cdclk; }; #define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base) @@ -56,5 +64,7 @@ void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state); int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, u32 points_mask); +int intel_bw_calc_min_cdclk(struct intel_atomic_state *state); +int skl_bw_calc_min_cdclk(struct intel_atomic_state *state); #endif /* __INTEL_BW_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 9419a4724357..4c38d20db9c3 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -21,10 +21,12 @@ * DEALINGS IN THE SOFTWARE. */ +#include #include "intel_atomic.h" #include "intel_cdclk.h" #include "intel_display_types.h" #include "intel_sideband.h" +#include "intel_bw.h" /** * DOC: CDCLK / RAWCLK @@ -2093,11 +2095,9 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) { struct intel_atomic_state *state = cdclk_state->base.state; - struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; int min_cdclk, i; - enum pipe pipe; for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { int ret; @@ -2117,8 +2117,18 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) } min_cdclk = cdclk_state->force_min_cdclk; - for_each_pipe(dev_priv, pipe) - min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + struct intel_bw_state *bw_state; + + min_cdclk = max(cdclk_state->min_cdclk[crtc->pipe], min_cdclk); + + bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(bw_state)) + return PTR_ERR(bw_state); + + min_cdclk = max(bw_state->min_cdclk, min_cdclk); + } return min_cdclk; } @@ -2790,25 +2800,30 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) >= 12) { dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = tgl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_ELKHARTLAKE(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = ehl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = icl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = bxt_calc_voltage_level; @@ -2817,18 +2832,23 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) else dev_priv->cdclk.table = bxt_cdclk_table; } else if (IS_GEN9_BC(dev_priv)) { + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; } else if (IS_BROADWELL(dev_priv)) { + dev_priv->display.bw_calc_min_cdclk = intel_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bdw_set_cdclk; dev_priv->display.modeset_calc_cdclk = bdw_modeset_calc_cdclk; } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.bw_calc_min_cdclk = intel_bw_calc_min_cdclk; dev_priv->display.set_cdclk = chv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->display.bw_calc_min_cdclk = intel_bw_calc_min_cdclk; dev_priv->display.set_cdclk = vlv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; } else { + dev_priv->display.bw_calc_min_cdclk = intel_bw_calc_min_cdclk; dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk; } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 5731806e4cee..d62e11d620c0 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -7,7 +7,6 @@ #define __INTEL_CDCLK_H__ #include - #include "i915_drv.h" #include "intel_display.h" #include "intel_global_state.h" diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e1e6ec38f83b..c8993c51c935 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14708,16 +14708,14 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state) static int intel_atomic_check_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc) { - struct intel_cdclk_state *new_cdclk_state; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); int i; struct intel_plane_state *plane_state; struct intel_plane *plane; int ret; - - new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - if (new_cdclk_state && new_cdclk_state->force_min_cdclk_changed) - *need_cdclk_calc = true; - + struct intel_cdclk_state *new_cdclk_state; + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; /* * active_planes bitmask has been updated, and potentially * affected planes are part of the state. We can now @@ -14729,6 +14727,35 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, return ret; } + new_cdclk_state = intel_atomic_get_new_cdclk_state(state); + + if (new_cdclk_state && new_cdclk_state->force_min_cdclk_changed) + *need_cdclk_calc = true; + + ret = dev_priv->display.bw_calc_min_cdclk(state); + if (ret) + return ret; + + if (!new_cdclk_state) + return 0; + + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + struct intel_bw_state *bw_state; + int min_cdclk = 0; + + min_cdclk = max(new_cdclk_state->min_cdclk[crtc->pipe], min_cdclk); + + bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(bw_state)) + return PTR_ERR(bw_state); + + /* + * Currently do this change only if we need to increase + */ + if (bw_state->min_cdclk > min_cdclk) + *need_cdclk_calc = true; + } + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1fd7fdbed553..7436bd9f7f20 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -273,6 +273,7 @@ struct drm_i915_display_funcs { void (*set_cdclk)(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *cdclk_config, enum pipe pipe); + int (*bw_calc_min_cdclk)(struct intel_atomic_state *state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane); int (*compute_pipe_wm)(struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4d885ef0bac5..937d91c28de9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4031,10 +4031,9 @@ icl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask, return offset; } -static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv) +u16 intel_get_ddb_size(struct drm_i915_private *dev_priv) { u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size; - drm_WARN_ON(&dev_priv->drm, ddb_size == 0); if (INTEL_GEN(dev_priv) < 11) @@ -4043,6 +4042,34 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv) return ddb_size; } +u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry *entry) +{ + u32 slice_mask = 0; + u16 ddb_size = intel_get_ddb_size(dev_priv); + u16 num_supported_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; + u16 slice_size = ddb_size / num_supported_slices; + u16 start_slice; + u16 end_slice; + + if (!skl_ddb_entry_size(entry)) + return 0; + + start_slice = entry->start / slice_size; + end_slice = (entry->end - 1) / slice_size; + + /* + * Per plane DDB entry can in a really worst case be on multiple slices + * but single entry is anyway contigious. + */ + while (start_slice <= end_slice) { + slice_mask |= BIT(start_slice); + start_slice++; + } + + return slice_mask; +} + static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state, u8 active_pipes); diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 6636d2a057cd..d665bf77ae80 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -39,6 +39,10 @@ u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv); void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct skl_ddb_entry *ddb_y, struct skl_ddb_entry *ddb_uv); +void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv); +u16 intel_get_ddb_size(struct drm_i915_private *dev_priv); +u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry *entry); void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); From 82ea174dc5425d4e85e25d0c4ba961a2e494392a Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Tue, 19 May 2020 16:11:17 +0300 Subject: [PATCH 063/222] drm/i915: Remove unneeded hack now for CDCLK No need to bump up CDCLK now, as it is now correctly calculated, accounting for DBuf BW as BSpec says. Reviewed-by: Manasi Navare Signed-off-by: Stanislav Lisovskiy Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200519131117.17190-8-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 4c38d20db9c3..dda9bae02a86 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2070,18 +2070,6 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) /* Account for additional needs from the planes */ min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); - /* - * HACK. Currently for TGL platforms we calculate - * min_cdclk initially based on pixel_rate divided - * by 2, accounting for also plane requirements, - * however in some cases the lowest possible CDCLK - * doesn't work and causing the underruns. - * Explicitly stating here that this seems to be currently - * rather a Hack, than final solution. - */ - if (IS_TIGERLAKE(dev_priv)) - min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); - if (min_cdclk > dev_priv->max_cdclk_freq) { drm_dbg_kms(&dev_priv->drm, "required cdclk (%d kHz) exceeds max (%d kHz)\n", From 2992b543b8f4df60a1009dfac9390a141b6fd6c4 Mon Sep 17 00:00:00 2001 From: Swathi Dhanavanthri Date: Wed, 20 May 2020 23:44:48 -0700 Subject: [PATCH 064/222] drm/i915/ehl: Extend w/a 14010685332 to JSP/MCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a permanent w/a for JSL/EHL.This is to be applied to the PCH types on JSL/EHL ie JSP/MCC Bspec: 52888 v2: Fixed the wrong usage of logical OR(ville) v3: Removed extra braces, changed the check(jose) Signed-off-by: Swathi Dhanavanthri Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200521064448.29522-1-swathi.dhanavanthri@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 95996db46939..efdd4c7b8e92 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2904,8 +2904,10 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); - /* Wa_14010685332:icl */ - if (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) { + /* Wa_14010685332:icl,jsl,ehl */ + if (INTEL_PCH_TYPE(dev_priv) == PCH_ICP || + INTEL_PCH_TYPE(dev_priv) == PCH_JSP || + INTEL_PCH_TYPE(dev_priv) == PCH_MCC) { intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); intel_uncore_rmw(uncore, SOUTH_CHICKEN1, From b975abbd382fe442713a4c233549abb90e57c22b Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Fri, 22 May 2020 09:34:51 +0100 Subject: [PATCH 065/222] agp/intel: Fix a memory leak on module initialisation failure In intel_gtt_setup_scratch_page(), pointer "page" is not released if pci_dma_mapping_error() return an error, leading to a memory leak on module initialisation failure. Simply fix this issue by freeing "page" before return. Fixes: 0e87d2b06cb46 ("intel-gtt: initialize our own scratch page") Signed-off-by: Qiushi Wu Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200522083451.7448-1-chris@chris-wilson.co.uk --- drivers/char/agp/intel-gtt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 4b34a5195c65..5bfdf222d5f9 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -304,8 +304,10 @@ static int intel_gtt_setup_scratch_page(void) if (intel_private.needs_dmar) { dma_addr = pci_map_page(intel_private.pcidev, page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(intel_private.pcidev, dma_addr)) + if (pci_dma_mapping_error(intel_private.pcidev, dma_addr)) { + __free_page(page); return -EINVAL; + } intel_private.scratch_page_dma = dma_addr; } else From cac91e671ad5dc86ff71e81f5c1ec0ac149c32b9 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 22 May 2020 16:18:43 +0300 Subject: [PATCH 066/222] drm/i915: Fix includes and local vars order Removed duplicate include and fixed comment > 80 chars. v2: Added newline after system include and between functions Reviewed-by: Chris Wilson Signed-off-by: Stanislav Lisovskiy Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200522131843.20477-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 46 ++++++++++++---------- drivers/gpu/drm/i915/display/intel_bw.h | 2 +- drivers/gpu/drm/i915/display/intel_cdclk.c | 3 +- drivers/gpu/drm/i915/display/intel_cdclk.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_pm.h | 2 +- 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 8d2f58e39595..a79bd7aeb03b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -5,13 +5,12 @@ #include -#include "intel_bw.h" -#include "intel_pm.h" -#include "intel_display_types.h" -#include "intel_sideband.h" #include "intel_atomic.h" -#include "intel_pm.h" +#include "intel_bw.h" #include "intel_cdclk.h" +#include "intel_display_types.h" +#include "intel_pm.h" +#include "intel_sideband.h" /* Parameters for Qclk Geyserville (QGV) */ struct intel_qgv_point { @@ -352,6 +351,7 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_ return data_rate; } + void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state) { @@ -431,13 +431,13 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state) int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int i; + struct intel_bw_state *new_bw_state = NULL; + struct intel_bw_state *old_bw_state = NULL; const struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; int max_bw = 0; int slice_id; - struct intel_bw_state *new_bw_state = NULL; - struct intel_bw_state *old_bw_state = NULL; + int i; for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { enum plane_id plane_id; @@ -463,14 +463,17 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc); /* - * FIXME: To calculate that more properly we probably need to - * to split per plane data_rate into data_rate_y and data_rate_uv - * for multiplanar formats in order not to get accounted those twice - * if they happen to reside on different slices. - * However for pre-icl this would work anyway because we have only single - * slice and for icl+ uv plane has non-zero data rate. - * So in worst case those calculation are a bit pessimistic, which - * shouldn't pose any significant problem anyway. + * FIXME: To calculate that more properly we probably + * need to to split per plane data_rate into data_rate_y + * and data_rate_uv for multiplanar formats in order not + * to get accounted those twice if they happen to reside + * on different slices. + * However for pre-icl this would work anyway because + * we have only single slice and for icl+ uv plane has + * non-zero data rate. + * So in worst case those calculation are a bit + * pessimistic, which shouldn't pose any significant + * problem anyway. */ for_each_dbuf_slice_in_mask(slice_id, dbuf_mask) crtc_bw->used_bw[slice_id] += data_rate; @@ -478,11 +481,12 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) for_each_dbuf_slice(slice_id) { /* - * Current experimental observations show that contrary to BSpec - * we get underruns once we exceed 64 * CDCLK for slices in total. - * As a temporary measure in order not to keep CDCLK bumped up all the - * time we calculate CDCLK according to this formula for overall bw - * consumed by slices. + * Current experimental observations show that contrary + * to BSpec we get underruns once we exceed 64 * CDCLK + * for slices in total. + * As a temporary measure in order not to keep CDCLK + * bumped up all the time we calculate CDCLK according + * to this formula for overall bw consumed by slices. */ max_bw += crtc_bw->used_bw[slice_id]; } diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index af1a981dec84..46c6eecbd917 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -9,8 +9,8 @@ #include #include "intel_display.h" -#include "intel_global_state.h" #include "intel_display_power.h" +#include "intel_global_state.h" struct drm_i915_private; struct intel_atomic_state; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index dda9bae02a86..f9b0fc7317de 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -22,11 +22,12 @@ */ #include + #include "intel_atomic.h" +#include "intel_bw.h" #include "intel_cdclk.h" #include "intel_display_types.h" #include "intel_sideband.h" -#include "intel_bw.h" /** * DOC: CDCLK / RAWCLK diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index d62e11d620c0..5731806e4cee 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -7,6 +7,7 @@ #define __INTEL_CDCLK_H__ #include + #include "i915_drv.h" #include "intel_display.h" #include "intel_global_state.h" diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 937d91c28de9..b134a1b9d738 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -33,6 +33,7 @@ #include #include "display/intel_atomic.h" +#include "display/intel_bw.h" #include "display/intel_display_types.h" #include "display/intel_fbc.h" #include "display/intel_sprite.h" @@ -43,7 +44,6 @@ #include "i915_fixed.h" #include "i915_irq.h" #include "i915_trace.h" -#include "display/intel_bw.h" #include "intel_pm.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index d665bf77ae80..a2473594c2db 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -8,10 +8,10 @@ #include +#include "display/intel_bw.h" #include "display/intel_global_state.h" #include "i915_reg.h" -#include "display/intel_bw.h" struct drm_device; struct drm_i915_private; From 957ad9a02be6faa87594c58ac09460cd3d190d0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 May 2020 14:27:06 +0100 Subject: [PATCH 067/222] drm/i915/gem: Avoid iterating an empty list Our __sgt_iter assumes that the scattergather list has at least one element. But during construction we may fail in allocating the first page, and so mark the first element as the terminator. This is unexpected! [22555.524752] RIP: 0010:shmem_get_pages+0x506/0x710 [i915] [22555.524759] Code: 49 8b 2c 24 31 c0 66 89 44 24 40 48 85 ed 0f 84 62 01 00 00 4c 8b 75 00 8b 5d 08 44 8b 7d 0c 48 8b 0d 7e 34 07 e2 49 83 e6 fc <49> 8b 16 41 01 df 48 89 cf 48 89 d0 48 c1 e8 2d 48 85 c9 0f 84 c8 [22555.524765] RSP: 0018:ffffc9000053f9d0 EFLAGS: 00010246 [22555.524770] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8881ffffa000 [22555.524774] RDX: fffffffffffffff4 RSI: ffffffffffffffff RDI: ffffffff821efe00 [22555.524778] RBP: ffff8881b099ab00 R08: 0000000000000000 R09: 00000000fffffff4 [22555.524782] R10: 0000000000000002 R11: 00000000ffec0a02 R12: ffff8881cd3c8d60 [22555.524786] R13: 00000000fffffff4 R14: 0000000000000000 R15: 0000000000000000 [22555.524790] FS: 00007f4fbeb9b9c0(0000) GS:ffff8881f8580000(0000) knlGS:0000000000000000 [22555.524795] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22555.524799] CR2: 0000000000000000 CR3: 00000001ec7f0004 CR4: 00000000001606e0 [22555.524803] Call Trace: [22555.524919] __i915_gem_object_get_pages+0x4f/0x60 [i915] Fixes: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL iterators") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: # v4.8+ Reviewed-by: Matthew Auld Reviewed-by: Maciej Patelczyk Link: https://patchwork.freedesktop.org/patch/msgid/20200522132706.5133-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d5d7eef3f43..7aff3514d97a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); unsigned int sg_page_sizes; - struct pagevec pvec; gfp_t noreclaim; int ret; @@ -192,13 +191,17 @@ err_sg: sg_mark_end(sg); err_pages: mapping_clear_unevictable(mapping); - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, st) { - if (!pagevec_add(&pvec, page)) + if (sg != st->sgl) { + struct pagevec pvec; + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) check_release_pagevec(&pvec); } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); sg_free_table(st); kfree(st); From afeda4f3b1c88f9e8721b4d4cf48af6f72c6b732 Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Wed, 20 May 2020 18:37:37 +0530 Subject: [PATCH 068/222] drm/i915/dsb: Pre allocate and late cleanup of cmd buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-allocate command buffer in atomic_commit using intel_dsb_prepare function which also includes pinning and map in cpu domain. No functional change is dsb write/commit functions. Now dsb get/put function is removed and ref-count mechanism is not needed. Below dsb api added to do respective job mentioned below. intel_dsb_prepare - Allocate, pin and map the buffer. intel_dsb_cleanup - Unpin and release the gem object. RFC: Initial patch for design review. v2: included _init() part in _prepare(). [Daniel, Ville] v3: dsb_cleanup called after cleanup_planes. [Daniel] v4: dsb structure is moved to intel_crtc_state from intel_crtc. [Maarten] v5: dsb get/put/ref-count mechanism removed. [Maarten] v6: Based on review feedback following changes are added, - replaced intel_dsb structure by pointer in intel_crtc_state. [Maarten] - passing intel_crtc_state to dsp-api to simplify the code. [Maarten] - few dsb functions prototype modified to simplify code. v7: added few cosmetic changes suggested by Jani and null check for crtc_state in dsb_cleanup removed as suggested by Maarten. v8: changed the function parameter to intel_crtc_state* of ivb_load_lut_ext_max() from intel_crtc. [Maarten] v9: error handling improved in _write() and prepare(). [Maarten] Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Jani Nikula Cc: Daniel Vetter Acked-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Animesh Manna Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200520130737.11240-1-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_atomic.c | 3 + drivers/gpu/drm/i915/display/intel_color.c | 66 ++--- drivers/gpu/drm/i915/display/intel_display.c | 58 +++- .../drm/i915/display/intel_display_types.h | 6 +- drivers/gpu/drm/i915/display/intel_dsb.c | 250 ++++++++---------- drivers/gpu/drm/i915/display/intel_dsb.h | 17 +- 6 files changed, 206 insertions(+), 194 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index d043057d2fa0..3cb866f22e74 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -252,6 +252,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; + crtc_state->dsb = NULL; return &crtc_state->uapi; } @@ -292,6 +293,8 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, { struct intel_crtc_state *crtc_state = to_intel_crtc_state(state); + drm_WARN_ON(crtc->dev, crtc_state->dsb); + __drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi); intel_crtc_free_hw_state(crtc_state); kfree(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 98ece9cd7cdd..945bb03bdd4d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -714,16 +714,16 @@ static void bdw_load_lut_10(struct intel_crtc *crtc, intel_de_write(dev_priv, PREC_PAL_INDEX(pipe), 0); } -static void ivb_load_lut_ext_max(struct intel_crtc *crtc) +static void ivb_load_lut_ext_max(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Program the max register to clamp values > 1.0. */ - intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); - intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); - intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(crtc_state, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); + intel_dsb_reg_write(crtc_state, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); + intel_dsb_reg_write(crtc_state, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); /* * Program the gc max 2 register to clamp values > 1.0. @@ -731,15 +731,13 @@ static void ivb_load_lut_ext_max(struct intel_crtc *crtc) * from 3.0 to 7.0 */ if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 0), + intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16); - intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 1), + intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16); - intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 2), + intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16); } - - intel_dsb_put(dsb); } static void ivb_load_luts(const struct intel_crtc_state *crtc_state) @@ -753,7 +751,7 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state) } else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) { ivb_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE | PAL_PREC_INDEX_VALUE(0)); - ivb_load_lut_ext_max(crtc); + ivb_load_lut_ext_max(crtc_state); ivb_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE | PAL_PREC_INDEX_VALUE(512)); } else { @@ -761,7 +759,7 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state) ivb_load_lut_10(crtc, blob, PAL_PREC_INDEX_VALUE(0)); - ivb_load_lut_ext_max(crtc); + ivb_load_lut_ext_max(crtc_state); } } @@ -776,7 +774,7 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state) } else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) { bdw_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE | PAL_PREC_INDEX_VALUE(0)); - ivb_load_lut_ext_max(crtc); + ivb_load_lut_ext_max(crtc_state); bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE | PAL_PREC_INDEX_VALUE(512)); } else { @@ -784,7 +782,7 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state) bdw_load_lut_10(crtc, blob, PAL_PREC_INDEX_VALUE(0)); - ivb_load_lut_ext_max(crtc); + ivb_load_lut_ext_max(crtc_state); } } @@ -877,7 +875,7 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) ilk_load_lut_8(crtc, gamma_lut); } else { bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); - ivb_load_lut_ext_max(crtc); + ivb_load_lut_ext_max(crtc_state); } } @@ -900,14 +898,12 @@ icl_load_gcmax(const struct intel_crtc_state *crtc_state, const struct drm_color_lut *color) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* FIXME LUT entries are 16 bit only, so we can prog 0xFFFF max */ - intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 0), color->red); - intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 1), color->green); - intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 2), color->blue); - intel_dsb_put(dsb); + intel_dsb_reg_write(crtc_state, PREC_PAL_GC_MAX(pipe, 0), color->red); + intel_dsb_reg_write(crtc_state, PREC_PAL_GC_MAX(pipe, 1), color->green); + intel_dsb_reg_write(crtc_state, PREC_PAL_GC_MAX(pipe, 2), color->blue); } static void @@ -916,7 +912,6 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct drm_property_blob *blob = crtc_state->hw.gamma_lut; const struct drm_color_lut *lut = blob->data; - struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; int i; @@ -927,19 +922,17 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) * 9 entries, corresponding to values 0, 1/(8 * 128 * 256), * 2/(8 * 128 * 256) ... 8/(8 * 128 * 256). */ - intel_dsb_reg_write(dsb, PREC_PAL_MULTI_SEG_INDEX(pipe), + intel_dsb_reg_write(crtc_state, PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); for (i = 0; i < 9; i++) { const struct drm_color_lut *entry = &lut[i]; - intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + intel_dsb_indexed_reg_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), ilk_lut_12p4_ldw(entry)); - intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + intel_dsb_indexed_reg_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), ilk_lut_12p4_udw(entry)); } - - intel_dsb_put(dsb); } static void @@ -949,7 +942,6 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) const struct drm_property_blob *blob = crtc_state->hw.gamma_lut; const struct drm_color_lut *lut = blob->data; const struct drm_color_lut *entry; - struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; int i; @@ -963,12 +955,13 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], * seg2[0] being unused by the hardware. */ - intel_dsb_reg_write(dsb, PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + intel_dsb_reg_write(crtc_state, PREC_PAL_INDEX(pipe), + PAL_PREC_AUTO_INCREMENT); for (i = 1; i < 257; i++) { entry = &lut[i * 8]; - intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + intel_dsb_indexed_reg_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + intel_dsb_indexed_reg_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); } @@ -986,24 +979,22 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) */ for (i = 0; i < 256; i++) { entry = &lut[i * 8 * 128]; - intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + intel_dsb_indexed_reg_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + intel_dsb_indexed_reg_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); } /* The last entry in the LUT is to be programmed in GCMAX */ entry = &lut[256 * 8 * 128]; icl_load_gcmax(crtc_state, entry); - ivb_load_lut_ext_max(crtc); - intel_dsb_put(dsb); + ivb_load_lut_ext_max(crtc_state); } static void icl_load_luts(const struct intel_crtc_state *crtc_state) { const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_dsb *dsb = intel_dsb_get(crtc); if (crtc_state->hw.degamma_lut) glk_load_degamma_lut(crtc_state); @@ -1018,11 +1009,10 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) break; default: bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); - ivb_load_lut_ext_max(crtc); + ivb_load_lut_ext_max(crtc_state); } - intel_dsb_commit(dsb); - intel_dsb_put(dsb); + intel_dsb_commit(crtc_state); } static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c8993c51c935..9f1464624336 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14996,8 +14996,24 @@ static int intel_atomic_check(struct drm_device *dev, static int intel_atomic_prepare_commit(struct intel_atomic_state *state) { - return drm_atomic_helper_prepare_planes(state->base.dev, - &state->base); + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i, ret; + + ret = drm_atomic_helper_prepare_planes(state->base.dev, &state->base); + if (ret < 0) + return ret; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + bool mode_changed = needs_modeset(crtc_state); + + if (mode_changed || crtc_state->update_pipe || + crtc_state->uapi.color_mgmt_changed) { + intel_dsb_prepare(crtc_state); + } + } + + return 0; } u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) @@ -15427,15 +15443,27 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat &wait_reset); } +static void intel_cleanup_dsbs(struct intel_atomic_state *state) +{ + struct intel_crtc_state *old_crtc_state, *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) + intel_dsb_cleanup(old_crtc_state); +} + static void intel_atomic_cleanup_work(struct work_struct *work) { - struct drm_atomic_state *state = - container_of(work, struct drm_atomic_state, commit_work); - struct drm_i915_private *i915 = to_i915(state->dev); + struct intel_atomic_state *state = + container_of(work, struct intel_atomic_state, base.commit_work); + struct drm_i915_private *i915 = to_i915(state->base.dev); - drm_atomic_helper_cleanup_planes(&i915->drm, state); - drm_atomic_helper_commit_cleanup_done(state); - drm_atomic_state_put(state); + intel_cleanup_dsbs(state); + drm_atomic_helper_cleanup_planes(&i915->drm, &state->base); + drm_atomic_helper_commit_cleanup_done(&state->base); + drm_atomic_state_put(&state->base); intel_atomic_helper_free_state(i915); } @@ -15565,6 +15593,13 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) modeset_put_power_domains(dev_priv, put_domains[i]); intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state); + + /* + * DSB cleanup is done in cleanup_work aligning with framebuffer + * cleanup. So copy and reset the dsb structure to sync with + * commit_done and later do dsb cleanup in cleanup_work. + */ + old_crtc_state->dsb = fetch_and_zero(&new_crtc_state->dsb); } /* Underruns don't always raise interrupts, so check manually */ @@ -15714,8 +15749,15 @@ static int intel_atomic_commit(struct drm_device *dev, intel_atomic_swap_global_state(state); if (ret) { + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + i915_sw_fence_commit(&state->commit_ready); + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) + intel_dsb_cleanup(new_crtc_state); + drm_atomic_helper_cleanup_planes(dev, &state->base); intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 323f8da7f698..b24266c624fa 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1067,6 +1067,9 @@ struct intel_crtc_state { /* Only valid on TGL+ */ enum transcoder mst_master_transcoder; + + /* For DSB related info */ + struct intel_dsb *dsb; }; enum intel_pipe_crc_source { @@ -1136,9 +1139,6 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; - /* per pipe DSB related info */ - struct intel_dsb dsb; - #ifdef CONFIG_DEBUG_FS struct intel_pipe_crc pipe_crc; #endif diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 29fec6a92d17..475106e91fa7 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -34,152 +34,52 @@ #define DSB_BYTE_EN_SHIFT 20 #define DSB_REG_VALUE_MASK 0xfffff -static bool is_dsb_busy(struct intel_dsb *dsb) +static bool is_dsb_busy(struct drm_i915_private *i915, enum pipe pipe, + enum dsb_id id) { - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; - - return DSB_STATUS & intel_de_read(dev_priv, DSB_CTRL(pipe, dsb->id)); + return DSB_STATUS & intel_de_read(i915, DSB_CTRL(pipe, id)); } -static bool intel_dsb_enable_engine(struct intel_dsb *dsb) +static bool intel_dsb_enable_engine(struct drm_i915_private *i915, + enum pipe pipe, enum dsb_id id) { - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; u32 dsb_ctrl; - dsb_ctrl = intel_de_read(dev_priv, DSB_CTRL(pipe, dsb->id)); + dsb_ctrl = intel_de_read(i915, DSB_CTRL(pipe, id)); if (DSB_STATUS & dsb_ctrl) { - drm_dbg_kms(&dev_priv->drm, "DSB engine is busy.\n"); + drm_dbg_kms(&i915->drm, "DSB engine is busy.\n"); return false; } dsb_ctrl |= DSB_ENABLE; - intel_de_write(dev_priv, DSB_CTRL(pipe, dsb->id), dsb_ctrl); + intel_de_write(i915, DSB_CTRL(pipe, id), dsb_ctrl); - intel_de_posting_read(dev_priv, DSB_CTRL(pipe, dsb->id)); + intel_de_posting_read(i915, DSB_CTRL(pipe, id)); return true; } -static bool intel_dsb_disable_engine(struct intel_dsb *dsb) +static bool intel_dsb_disable_engine(struct drm_i915_private *i915, + enum pipe pipe, enum dsb_id id) { - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; u32 dsb_ctrl; - dsb_ctrl = intel_de_read(dev_priv, DSB_CTRL(pipe, dsb->id)); + dsb_ctrl = intel_de_read(i915, DSB_CTRL(pipe, id)); if (DSB_STATUS & dsb_ctrl) { - drm_dbg_kms(&dev_priv->drm, "DSB engine is busy.\n"); + drm_dbg_kms(&i915->drm, "DSB engine is busy.\n"); return false; } dsb_ctrl &= ~DSB_ENABLE; - intel_de_write(dev_priv, DSB_CTRL(pipe, dsb->id), dsb_ctrl); + intel_de_write(i915, DSB_CTRL(pipe, id), dsb_ctrl); - intel_de_posting_read(dev_priv, DSB_CTRL(pipe, dsb->id)); + intel_de_posting_read(i915, DSB_CTRL(pipe, id)); return true; } -/** - * intel_dsb_get() - Allocate DSB context and return a DSB instance. - * @crtc: intel_crtc structure to get pipe info. - * - * This function provides handle of a DSB instance, for the further DSB - * operations. - * - * Returns: address of Intel_dsb instance requested for. - * Failure: Returns the same DSB instance, but without a command buffer. - */ - -struct intel_dsb * -intel_dsb_get(struct intel_crtc *crtc) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *i915 = to_i915(dev); - struct intel_dsb *dsb = &crtc->dsb; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 *buf; - intel_wakeref_t wakeref; - - if (!HAS_DSB(i915)) - return dsb; - - if (dsb->refcount++ != 0) - return dsb; - - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - obj = i915_gem_object_create_internal(i915, DSB_BUF_SIZE); - if (IS_ERR(obj)) { - drm_err(&i915->drm, "Gem object creation failed\n"); - goto out; - } - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - drm_err(&i915->drm, "Vma creation failed\n"); - i915_gem_object_put(obj); - goto out; - } - - buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); - if (IS_ERR(buf)) { - drm_err(&i915->drm, "Command buffer creation failed\n"); - goto out; - } - - dsb->id = DSB1; - dsb->vma = vma; - dsb->cmd_buf = buf; - -out: - /* - * On error dsb->cmd_buf will continue to be NULL, making the writes - * pass-through. Leave the dangling ref to be removed later by the - * corresponding intel_dsb_put(): the important error message will - * already be logged above. - */ - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - - return dsb; -} - -/** - * intel_dsb_put() - To destroy DSB context. - * @dsb: intel_dsb structure. - * - * This function destroys the DSB context allocated by a dsb_get(), by - * unpinning and releasing the VMA object associated with it. - */ - -void intel_dsb_put(struct intel_dsb *dsb) -{ - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - - if (!HAS_DSB(i915)) - return; - - if (drm_WARN_ON(&i915->drm, dsb->refcount == 0)) - return; - - if (--dsb->refcount == 0) { - i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP); - dsb->cmd_buf = NULL; - dsb->free_pos = 0; - dsb->ins_start_offset = 0; - } -} - /** * intel_dsb_indexed_reg_write() -Write to the DSB context for auto * increment register. - * @dsb: intel_dsb structure. + * @crtc_state: intel_crtc_state structure * @reg: register address. * @val: value. * @@ -189,19 +89,20 @@ void intel_dsb_put(struct intel_dsb *dsb) * is done through mmio write. */ -void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, - u32 val) +void intel_dsb_indexed_reg_write(const struct intel_crtc_state *crtc_state, + i915_reg_t reg, u32 val) { - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct intel_dsb *dsb = crtc_state->dsb; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 *buf = dsb->cmd_buf; + u32 *buf; u32 reg_val; - if (!buf) { + if (!dsb) { intel_de_write(dev_priv, reg, val); return; } - + buf = dsb->cmd_buf; if (drm_WARN_ON(&dev_priv->drm, dsb->free_pos >= DSB_BUF_SIZE)) { drm_dbg_kms(&dev_priv->drm, "DSB buffer overflow\n"); return; @@ -256,7 +157,7 @@ void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, /** * intel_dsb_reg_write() -Write to the DSB context for normal * register. - * @dsb: intel_dsb structure. + * @crtc_state: intel_crtc_state structure * @reg: register address. * @val: value. * @@ -265,17 +166,19 @@ void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, * and rest all erroneous condition register programming is done * through mmio write. */ -void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) +void intel_dsb_reg_write(const struct intel_crtc_state *crtc_state, + i915_reg_t reg, u32 val) { - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct intel_dsb *dsb = crtc_state->dsb; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 *buf = dsb->cmd_buf; - if (!buf) { + if (!dsb) { intel_de_write(dev_priv, reg, val); return; } - + buf = dsb->cmd_buf; if (drm_WARN_ON(&dev_priv->drm, dsb->free_pos >= DSB_BUF_SIZE)) { drm_dbg_kms(&dev_priv->drm, "DSB buffer overflow\n"); return; @@ -290,26 +193,27 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) /** * intel_dsb_commit() - Trigger workload execution of DSB. - * @dsb: intel_dsb structure. + * @crtc_state: intel_crtc_state structure * * This function is used to do actual write to hardware using DSB. * On errors, fall back to MMIO. Also this function help to reset the context. */ -void intel_dsb_commit(struct intel_dsb *dsb) +void intel_dsb_commit(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct intel_dsb *dsb = crtc_state->dsb; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe = crtc->pipe; u32 tail; - if (!dsb->free_pos) + if (!(dsb && dsb->free_pos)) return; - if (!intel_dsb_enable_engine(dsb)) + if (!intel_dsb_enable_engine(dev_priv, pipe, dsb->id)) goto reset; - if (is_dsb_busy(dsb)) { + if (is_dsb_busy(dev_priv, pipe, dsb->id)) { drm_err(&dev_priv->drm, "HEAD_PTR write failed - dsb engine is busy.\n"); goto reset; @@ -322,7 +226,7 @@ void intel_dsb_commit(struct intel_dsb *dsb) memset(&dsb->cmd_buf[dsb->free_pos], 0, (tail - dsb->free_pos * 4)); - if (is_dsb_busy(dsb)) { + if (is_dsb_busy(dev_priv, pipe, dsb->id)) { drm_err(&dev_priv->drm, "TAIL_PTR write failed - dsb engine is busy.\n"); goto reset; @@ -332,7 +236,7 @@ void intel_dsb_commit(struct intel_dsb *dsb) i915_ggtt_offset(dsb->vma), tail); intel_de_write(dev_priv, DSB_TAIL(pipe, dsb->id), i915_ggtt_offset(dsb->vma) + tail); - if (wait_for(!is_dsb_busy(dsb), 1)) { + if (wait_for(!is_dsb_busy(dev_priv, pipe, dsb->id), 1)) { drm_err(&dev_priv->drm, "Timed out waiting for DSB workload completion.\n"); goto reset; @@ -341,5 +245,79 @@ void intel_dsb_commit(struct intel_dsb *dsb) reset: dsb->free_pos = 0; dsb->ins_start_offset = 0; - intel_dsb_disable_engine(dsb); + intel_dsb_disable_engine(dev_priv, pipe, dsb->id); +} + +/** + * intel_dsb_prepare() - Allocate, pin and map the DSB command buffer. + * @crtc_state: intel_crtc_state structure to prepare associated dsb instance. + * + * This function prepare the command buffer which is used to store dsb + * instructions with data. + */ +void intel_dsb_prepare(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_dsb *dsb; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *buf; + intel_wakeref_t wakeref; + + if (!HAS_DSB(i915)) + return; + + dsb = kmalloc(sizeof(*dsb), GFP_KERNEL); + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + obj = i915_gem_object_create_internal(i915, DSB_BUF_SIZE); + if (IS_ERR(obj)) { + drm_err(&i915->drm, "Gem object creation failed\n"); + kfree(dsb); + goto out; + } + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + drm_err(&i915->drm, "Vma creation failed\n"); + i915_gem_object_put(obj); + kfree(dsb); + goto out; + } + + buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(buf)) { + drm_err(&i915->drm, "Command buffer creation failed\n"); + i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP); + kfree(dsb); + goto out; + } + + dsb->id = DSB1; + dsb->vma = vma; + dsb->cmd_buf = buf; + dsb->free_pos = 0; + dsb->ins_start_offset = 0; + crtc_state->dsb = dsb; +out: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); +} + +/** + * intel_dsb_cleanup() - To cleanup DSB context. + * @crtc_state: intel_crtc_state structure to cleanup associated dsb instance. + * + * This function cleanup the DSB context by unpinning and releasing + * the VMA object associated with it. + */ +void intel_dsb_cleanup(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->dsb) + return; + + i915_vma_unpin_and_release(&crtc_state->dsb->vma, I915_VMA_RELEASE_MAP); + kfree(crtc_state->dsb); + crtc_state->dsb = NULL; } diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index 395ef9ce558e..654a11f24b80 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -10,7 +10,7 @@ #include "i915_reg.h" -struct intel_crtc; +struct intel_crtc_state; struct i915_vma; enum dsb_id { @@ -22,7 +22,6 @@ enum dsb_id { }; struct intel_dsb { - long refcount; enum dsb_id id; u32 *cmd_buf; struct i915_vma *vma; @@ -41,12 +40,12 @@ struct intel_dsb { u32 ins_start_offset; }; -struct intel_dsb * -intel_dsb_get(struct intel_crtc *crtc); -void intel_dsb_put(struct intel_dsb *dsb); -void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); -void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, - u32 val); -void intel_dsb_commit(struct intel_dsb *dsb); +void intel_dsb_prepare(struct intel_crtc_state *crtc_state); +void intel_dsb_cleanup(struct intel_crtc_state *crtc_state); +void intel_dsb_reg_write(const struct intel_crtc_state *crtc_state, + i915_reg_t reg, u32 val); +void intel_dsb_indexed_reg_write(const struct intel_crtc_state *crtc_state, + i915_reg_t reg, u32 val); +void intel_dsb_commit(const struct intel_crtc_state *crtc_state); #endif From 1d93949eb4f870bd189b3e2a435afc327b45dc3a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 15:19:56 +0100 Subject: [PATCH 069/222] drm/i915/gt: Cancel the flush worker more thoroughly Since the worker may rearm, we currently are only guaranteed to flush the work if we cancel the timer. If the work was running at the time we try and cancel it, we will wait for it to complete, but it may leave items in the pool and requeue the work. If we rearrange the immediate discard of the pool then cancel the work, we know that the work cannot rearm and so our flush will be final. <0> [314.146044] i915_mod-1321 2.... 299799443us : intel_gt_fini_buffer_pool: intel_gt_fini_buffer_pool:227 GEM_BUG_ON(!list_empty(&pool->cache_list[n])) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1920 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200525141957.3061-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 1495054a4305..418ae184cecf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -212,8 +212,9 @@ void intel_gt_flush_buffer_pool(struct intel_gt *gt) { struct intel_gt_buffer_pool *pool = >->buffer_pool; - if (cancel_delayed_work_sync(&pool->work)) + do { pool_free_imm(pool); + } while (cancel_delayed_work_sync(&pool->work)); } void intel_gt_fini_buffer_pool(struct intel_gt *gt) From ea97c4ca54e45c9458f73e9dbef66439c2ae11f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 15:19:57 +0100 Subject: [PATCH 070/222] drm/i915/gem: Suppress some random warnings Leave the error propagation in place, but limit the warnings to only show up in CI if the unlikely errors are hit. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200525141957.3061-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e4fb6c372537..219a36995b96 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1626,8 +1626,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, err = i915_vma_bind(target->vma, target->vma->obj->cache_level, PIN_GLOBAL, NULL); - if (drm_WARN_ONCE(&i915->drm, err, - "Unexpected failure to bind target VMA!")) + if (err) return err; } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index da845ff0cacc..021c747b5387 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -29,8 +29,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) void *dst; int i; - if (drm_WARN_ON(obj->base.dev, - i915_gem_object_needs_bit17_swizzle(obj))) + if (GEM_WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) return -EINVAL; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 7aff3514d97a..7cf8548ff708 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -147,8 +147,7 @@ rebuild_st: last_pfn = page_to_pfn(page); /* Check that the i965g/gm workaround works. */ - drm_WARN_ON(&i915->drm, - (gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); + GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL); } if (sg) { /* loop terminated early; short sg table */ sg_page_sizes |= sg->length; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 8b0708708671..2226146b01c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -235,7 +235,7 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, if (flags & I915_USERPTR_UNSYNCHRONIZED) return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - if (drm_WARN_ON(obj->base.dev, obj->userptr.mm == NULL)) + if (GEM_WARN_ON(!obj->userptr.mm)) return -EINVAL; mn = i915_mmu_notifier_find(obj->userptr.mm); From bf0840cdb3043ebfa40ac28e19be2886efcd5886 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 08:53:36 +0100 Subject: [PATCH 071/222] drm/i915/gt: Stop cross-polluting PIN_GLOBAL with PIN_USER with no-ppgtt In order to keep userptr distinct from ggtt mmaps in the eyes of lockdep, we need to avoid marking those userptr vma as PIN_GLOBAL. (So long as we comply with only using them as local PIN_USER!) References: https://gitlab.freedesktop.org/drm/intel/-/issues/1880 Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200525075347.582-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 66165b10256e..8c275f8588c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -424,22 +424,17 @@ static int ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; + if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) + return 0; + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - /* - * Without aliasing PPGTT there's no difference between - * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally - * upgrade to both bound if we bind either to avoid double-binding. - */ - atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); - return 0; } From 6f081dbfdd4e54d50161c437f2cea176384501d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 00:39:00 +0100 Subject: [PATCH 072/222] drm/i915/display: Fix early deref of 'dsb' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/display/intel_dsb.c:177 intel_dsb_reg_write() warn: variable dereferenced before check 'dsb' (see line 175) Fixes: afeda4f3b1c8 ("drm/i915/dsb: Pre allocate and late cleanup of cmd buffer") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Animesh Manna Cc: Uma Shankar Reviewed-by: Animesh Manna Link: https://patchwork.freedesktop.org/patch/msgid/20200524233900.25598-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_dsb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 475106e91fa7..24e6d63e2d47 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -169,15 +169,17 @@ void intel_dsb_indexed_reg_write(const struct intel_crtc_state *crtc_state, void intel_dsb_reg_write(const struct intel_crtc_state *crtc_state, i915_reg_t reg, u32 val) { - struct intel_dsb *dsb = crtc_state->dsb; struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 *buf = dsb->cmd_buf; + struct intel_dsb *dsb; + u32 *buf; + dsb = crtc_state->dsb; if (!dsb) { intel_de_write(dev_priv, reg, val); return; } + buf = dsb->cmd_buf; if (drm_WARN_ON(&dev_priv->drm, dsb->free_pos >= DSB_BUF_SIZE)) { drm_dbg_kms(&dev_priv->drm, "DSB buffer overflow\n"); From 7c4541a37bbbf83c0f16f779e85eb61d9348ed29 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 16:14:59 +0100 Subject: [PATCH 073/222] drm/i915/gt: Force the GT reset on shutdown Before we return control to the system, and letting it reuse all the pages being accessed by HW, we must disable the HW. At the moment, we dare not reset the GPU if it will clobber the display, but once we know the display has been disabled, we can proceed with the reset as we shutdown the module. We know the next user must reinitialise the HW for their purpose. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/489 Signed-off-by: Chris Wilson Cc: stable@kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200525151459.12083-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f069551e412f..ebc29b6ee86c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -616,6 +616,11 @@ void intel_gt_driver_unregister(struct intel_gt *gt) void intel_gt_driver_release(struct intel_gt *gt) { struct i915_address_space *vm; + intel_wakeref_t wakeref; + + /* Scrub all HW state upon release */ + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + __intel_gt_reset(gt, ALL_ENGINES); vm = fetch_and_zero(>->vm); if (vm) /* FIXME being called twice on error paths :( */ From 9ae6c4ef7b62e619f1c49dcc0ceb2b2de2a29f04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 08:53:39 +0100 Subject: [PATCH 074/222] drm/i915/execlists: Shortcircuit queue_prio() for no internal levels If there are no internal levels and the user priority-shift is zero, we can help the compiler eliminate some dead code: Function old new delta start_timeslice 169 154 -15 __execlists_submission_tasklet 4696 4659 -37 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200525075347.582-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index de5be57ed6d2..3214a4ecc31a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -446,6 +446,9 @@ static int queue_prio(const struct intel_engine_execlists *execlists) * we have to flip the index value to become priority. */ p = to_priolist(rb); + if (!I915_USER_PRIORITY_SHIFT) + return p->priority; + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); } From fc0e127022162653195788680973eefe72806595 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 12:20:51 +0100 Subject: [PATCH 075/222] drm/i915: Improve execute_cb struct packing Reduce the irq_work llist for attaching the callbacks to the signal for both smaller structs (two fewer pointers!) and simpler [debug] code: Function old new delta irq_execute_cb 35 34 -1 __igt_breadcrumbs_smoketest 1684 1682 -2 i915_request_retire 2003 1996 -7 __i915_request_create 1047 1040 -7 __notify_execute_cb 135 126 -9 __i915_request_ctor 188 178 -10 __await_execution.part.constprop 451 440 -11 igt_wait_request 924 714 -210 One minor artifact is that the order of cb exection is reversed. No current use cases are affected by that change. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200526112051.10229-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 23 ++++++++++++++--------- drivers/gpu/drm/i915/i915_request.h | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c282719ad3ac..877eaaccba81 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -42,7 +42,6 @@ #include "intel_pm.h" struct execute_cb { - struct list_head link; struct irq_work work; struct i915_sw_fence *fence; void (*hook)(struct i915_request *rq, struct dma_fence *signal); @@ -189,14 +188,14 @@ static void irq_execute_cb_hook(struct irq_work *wrk) static void __notify_execute_cb(struct i915_request *rq) { - struct execute_cb *cb; + struct execute_cb *cb, *cn; lockdep_assert_held(&rq->lock); - if (list_empty(&rq->execute_cb)) + if (llist_empty(&rq->execute_cb)) return; - list_for_each_entry(cb, &rq->execute_cb, link) + llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode) irq_work_queue(&cb->work); /* @@ -209,7 +208,7 @@ static void __notify_execute_cb(struct i915_request *rq) * preempt-to-idle cycle on the target engine, all the while the * master execute_cb may refire. */ - INIT_LIST_HEAD(&rq->execute_cb); + init_llist_head(&rq->execute_cb); } static inline void @@ -327,7 +326,7 @@ bool i915_request_retire(struct i915_request *rq) set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); __notify_execute_cb(rq); } - GEM_BUG_ON(!list_empty(&rq->execute_cb)); + GEM_BUG_ON(!llist_empty(&rq->execute_cb)); spin_unlock_irq(&rq->lock); remove_from_client(rq); @@ -357,6 +356,12 @@ void i915_request_retire_upto(struct i915_request *rq) } while (i915_request_retire(tmp) && tmp != rq); } +static void __llist_add(struct llist_node *node, struct llist_head *head) +{ + node->next = head->first; + head->first = node; +} + static int __await_execution(struct i915_request *rq, struct i915_request *signal, @@ -395,7 +400,7 @@ __await_execution(struct i915_request *rq, i915_sw_fence_complete(cb->fence); kmem_cache_free(global.slab_execute_cbs, cb); } else { - list_add_tail(&cb->link, &signal->execute_cb); + __llist_add(&cb->work.llnode, &signal->execute_cb); } spin_unlock_irq(&signal->lock); @@ -704,7 +709,7 @@ static void __i915_request_ctor(void *arg) rq->file_priv = NULL; rq->capture_list = NULL; - INIT_LIST_HEAD(&rq->execute_cb); + init_llist_head(&rq->execute_cb); } struct i915_request * @@ -794,7 +799,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->batch = NULL; GEM_BUG_ON(rq->file_priv); GEM_BUG_ON(rq->capture_list); - GEM_BUG_ON(!list_empty(&rq->execute_cb)); + GEM_BUG_ON(!llist_empty(&rq->execute_cb)); /* * Reserve space in the ring buffer for all the commands required to diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8ec7ee4dbadc..5d4709a3dace 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -214,7 +214,7 @@ struct i915_request { ktime_t emitted; } duration; }; - struct list_head execute_cb; + struct llist_head execute_cb; struct i915_sw_fence semaphore; /* From 22da5d846d54dd13183b57874b9d5611d583d7c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 May 2020 13:49:12 +0100 Subject: [PATCH 076/222] drm/i915/display: Only query DP state of a DDI encoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid a NULL dereference for a mismatched encoder type, hit when probing state for all encoders. This is a band aid to prevent the OOPS as the right fix is "probably to swap the psr vs infoframes.enable checks, or outright disappear from this function" (Ville). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1892 Signed-off-by: Chris Wilson Acked-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200525124912.16019-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 1768731678a1..4749d2fe2324 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5212,6 +5212,9 @@ void intel_read_dp_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, unsigned int type) { + if (encoder->type != INTEL_OUTPUT_DDI) + return; + switch (type) { case DP_SDP_VSC: intel_read_dp_vsc_sdp(encoder, crtc_state, From ffb0c600c240103f6f34e07892a7e0a75502b243 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 10:07:52 +0100 Subject: [PATCH 077/222] drm/i915: Reorder await_execution before await_request Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 284 ++++++++++++++-------------- 1 file changed, 142 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 877eaaccba81..e64d82f7c830 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1058,148 +1058,6 @@ await_fence: I915_FENCE_GFP); } -static int -i915_request_await_request(struct i915_request *to, struct i915_request *from) -{ - int ret; - - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); - - if (i915_request_completed(from)) { - i915_sw_fence_set_error_once(&to->submit, from->fence.error); - return 0; - } - - if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, - &from->sched, - I915_DEPENDENCY_EXTERNAL); - if (ret < 0) - return ret; - } - - if (to->engine == from->engine) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); - else - ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); - if (ret < 0) - return ret; - - return 0; -} - -static void mark_external(struct i915_request *rq) -{ - /* - * The downside of using semaphores is that we lose metadata passing - * along the signaling chain. This is particularly nasty when we - * need to pass along a fatal error such as EFAULT or EDEADLK. For - * fatal errors we want to scrub the request before it is executed, - * which means that we cannot preload the request onto HW and have - * it wait upon a semaphore. - */ - rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; -} - -static int -__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) -{ - mark_external(rq); - return i915_sw_fence_await_dma_fence(&rq->submit, fence, - i915_fence_context_timeout(rq->i915, - fence->context), - I915_FENCE_GFP); -} - -static int -i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) -{ - struct dma_fence *iter; - int err = 0; - - if (!to_dma_fence_chain(fence)) - return __i915_request_await_external(rq, fence); - - dma_fence_chain_for_each(iter, fence) { - struct dma_fence_chain *chain = to_dma_fence_chain(iter); - - if (!dma_fence_is_i915(chain->fence)) { - err = __i915_request_await_external(rq, iter); - break; - } - - err = i915_request_await_dma_fence(rq, chain->fence); - if (err < 0) - break; - } - - dma_fence_put(iter); - return err; -} - -int -i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) -{ - struct dma_fence **child = &fence; - unsigned int nchild = 1; - int ret; - - /* - * Note that if the fence-array was created in signal-on-any mode, - * we should *not* decompose it into its individual fences. However, - * we don't currently store which mode the fence-array is operating - * in. Fortunately, the only user of signal-on-any is private to - * amdgpu and we should not see any incoming fence-array from - * sync-file being in signal-on-any mode. - */ - if (dma_fence_is_array(fence)) { - struct dma_fence_array *array = to_dma_fence_array(fence); - - child = array->fences; - nchild = array->num_fences; - GEM_BUG_ON(!nchild); - } - - do { - fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); - continue; - } - - /* - * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_request_add() which ensures - * that requests are submitted in-order through each ring. - */ - if (fence->context == rq->fence.context) - continue; - - /* Squash repeated waits to the same timelines */ - if (fence->context && - intel_timeline_sync_is_later(i915_request_timeline(rq), - fence)) - continue; - - if (dma_fence_is_i915(fence)) - ret = i915_request_await_request(rq, to_request(fence)); - else - ret = i915_request_await_external(rq, fence); - if (ret < 0) - return ret; - - /* Record the latest fence used against each timeline */ - if (fence->context) - intel_timeline_sync_set(i915_request_timeline(rq), - fence); - } while (--nchild); - - return 0; -} - static bool intel_timeline_sync_has_start(struct intel_timeline *tl, struct dma_fence *fence) { @@ -1287,6 +1145,55 @@ __i915_request_await_execution(struct i915_request *to, &from->fence); } +static void mark_external(struct i915_request *rq) +{ + /* + * The downside of using semaphores is that we lose metadata passing + * along the signaling chain. This is particularly nasty when we + * need to pass along a fatal error such as EFAULT or EDEADLK. For + * fatal errors we want to scrub the request before it is executed, + * which means that we cannot preload the request onto HW and have + * it wait upon a semaphore. + */ + rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; +} + +static int +__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + mark_external(rq); + return i915_sw_fence_await_dma_fence(&rq->submit, fence, + i915_fence_context_timeout(rq->i915, + fence->context), + I915_FENCE_GFP); +} + +static int +i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence *iter; + int err = 0; + + if (!to_dma_fence_chain(fence)) + return __i915_request_await_external(rq, fence); + + dma_fence_chain_for_each(iter, fence) { + struct dma_fence_chain *chain = to_dma_fence_chain(iter); + + if (!dma_fence_is_i915(chain->fence)) { + err = __i915_request_await_external(rq, iter); + break; + } + + err = i915_request_await_dma_fence(rq, chain->fence); + if (err < 0) + break; + } + + dma_fence_put(iter); + return err; +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence, @@ -1335,6 +1242,99 @@ i915_request_await_execution(struct i915_request *rq, return 0; } +static int +i915_request_await_request(struct i915_request *to, struct i915_request *from) +{ + int ret; + + GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); + + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); + return 0; + } + + if (to->engine->schedule) { + ret = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_EXTERNAL); + if (ret < 0) + return ret; + } + + if (to->engine == READ_ONCE(from->engine)) + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); + if (ret < 0) + return ret; + + return 0; +} + +int +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence **child = &fence; + unsigned int nchild = 1; + int ret; + + /* + * Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + + child = array->fences; + nchild = array->num_fences; + GEM_BUG_ON(!nchild); + } + + do { + fence = *child++; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + i915_sw_fence_set_error_once(&rq->submit, fence->error); + continue; + } + + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_request_add() which ensures + * that requests are submitted in-order through each ring. + */ + if (fence->context == rq->fence.context) + continue; + + /* Squash repeated waits to the same timelines */ + if (fence->context && + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) + continue; + + if (dma_fence_is_i915(fence)) + ret = i915_request_await_request(rq, to_request(fence)); + else + ret = i915_request_await_external(rq, fence); + if (ret < 0) + return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context) + intel_timeline_sync_set(i915_request_timeline(rq), + fence); + } while (--nchild); + + return 0; +} + /** * i915_request_await_object - set this request to (async) wait upon a bo * @to: request we are wishing to use From 511b6d9aed417739b6aa49d0b6b4354ad21020f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 10:07:53 +0100 Subject: [PATCH 078/222] drm/i915/gt: Do not schedule normal requests immediately along virtual When we push a virtual request onto the HW, we update the rq->engine to point to the physical engine. A request that is then submitted by the user that waits upon the virtual engine, but along the physical engine in use, will then see that it is due to be submitted to the same engine and take a shortcut (and be queued without waiting for the completion fence). However, the virtual request may be preempted (either by higher priority users, or by timeslicing) and removed from the physical engine to be migrated over to one of its siblings. The dependent normal request however is oblivious to the removal of the virtual request and remains queued to execute on HW, believing that once it reaches the head of its queue all of its predecessors will have completed executing! v2: Beware restriction of signal->execution_mask prior to submission. Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine") Testcase: igt/gem_exec_balancer/sliced Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.3+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e64d82f7c830..0d810a62ff46 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1242,6 +1242,25 @@ i915_request_await_execution(struct i915_request *rq, return 0; } +static int +await_request_submit(struct i915_request *to, struct i915_request *from) +{ + /* + * If we are waiting on a virtual engine, then it may be + * constrained to execute on a single engine *prior* to submission. + * When it is submitted, it will be first submitted to the virtual + * engine and then passed to the physical engine. We cannot allow + * the waiter to be submitted immediately to the physical engine + * as it may then bypass the virtual request. + */ + if (to->engine == READ_ONCE(from->engine)) + return i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + return __i915_request_await_execution(to, from, NULL); +} + static int i915_request_await_request(struct i915_request *to, struct i915_request *from) { @@ -1263,10 +1282,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (to->engine == READ_ONCE(from->engine)) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); + if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); if (ret < 0) From 0109a16ef391b2ebfbfdf08250c1dfb5dbf83d1e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 16:07:39 +0100 Subject: [PATCH 079/222] drm/i915/gt: Clear LOCAL_BIND from shared GGTT on resume We only restore GLOBAL binds upon resume as we expect these to be pinned for use by HW, whereas the LOCAL binds can be recreated on demand once userspace is resumed. For the LOCAL bind to be recreated in the global GTT (for old systems without ppgtt), we need to clear its presence flag on deciding not to restore the mapping upon resume. Fixes: bf0840cdb304 ("drm/i915/gt: Stop cross-polluting PIN_GLOBAL with PIN_USER with no-ppgtt") Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200526150739.26147-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8c275f8588c3..317172ad5ef3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1161,6 +1161,11 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) ggtt->invalidate(ggtt); } +static unsigned int clear_bind(struct i915_vma *vma) +{ + return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags); +} + void i915_ggtt_resume(struct i915_ggtt *ggtt) { struct i915_vma *vma; @@ -1179,10 +1184,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + if (!(clear_bind(vma) & I915_VMA_GLOBAL_BIND)) continue; - clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, PIN_GLOBAL, NULL)); From 6ec81b82732e2b4a5ac0853fd33919ff1ca94238 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:08 +0200 Subject: [PATCH 080/222] drm/i915/pmu: avoid an maybe-uninitialized warning Conditional spinlocks make it hard for gcc and for lockdep to follow the code flow. This one causes a warning with at least gcc-9 and higher: In file included from include/linux/irq.h:14, from drivers/gpu/drm/i915/i915_pmu.c:7: drivers/gpu/drm/i915/i915_pmu.c: In function 'i915_sample': include/linux/spinlock.h:289:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized] 289 | _raw_spin_unlock_irqrestore(lock, flags); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/i915_pmu.c:288:17: note: 'flags' was declared here 288 | unsigned long flags; | ^~~~~ Split out the part between the locks into a separate function for readability and to let the compiler figure out what the logic actually is. Fixes: d79e1bd676f0 ("drm/i915/pmu: Only use exclusive mmio access for gen7") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-1-arnd@arndb.de --- drivers/gpu/drm/i915/i915_pmu.c | 84 ++++++++++++++++----------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index f6f44ad5e335..802837de1767 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) return IS_GEN(i915, 7); } +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +{ + struct intel_engine_pmu *pmu = &engine->pmu; + bool busy; + u32 val; + + val = ENGINE_READ_FW(engine, RING_CTL); + if (val == 0) /* powerwell off => engine idle */ + return; + + if (val & RING_WAIT) + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); + if (val & RING_WAIT_SEMAPHORE) + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + return; + + /* + * While waiting on a semaphore or event, MI_MODE reports the + * ring as idle. However, previously using the seqno, and with + * execlists sampling, we account for the ring waiting as the + * engine being busy. Therefore, we record the sample as being + * busy if either waiting or !idle. + */ + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); + if (!busy) { + val = ENGINE_READ_FW(engine, RING_MI_MODE); + busy = !(val & MODE_IDLE); + } + if (busy) + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); +} + static void engines_sample(struct intel_gt *gt, unsigned int period_ns) { struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned long flags; if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; @@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) return; for_each_engine(engine, gt, id) { - struct intel_engine_pmu *pmu = &engine->pmu; - spinlock_t *mmio_lock; - unsigned long flags; - bool busy; - u32 val; - if (!intel_engine_pm_get_if_awake(engine)) continue; - mmio_lock = NULL; - if (exclusive_mmio_access(i915)) - mmio_lock = &engine->uncore->lock; - - if (unlikely(mmio_lock)) - spin_lock_irqsave(mmio_lock, flags); - - val = ENGINE_READ_FW(engine, RING_CTL); - if (val == 0) /* powerwell off => engine idle */ - goto skip; - - if (val & RING_WAIT) - add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); - if (val & RING_WAIT_SEMAPHORE) - add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); - - /* No need to sample when busy stats are supported. */ - if (intel_engine_supports_stats(engine)) - goto skip; - - /* - * While waiting on a semaphore or event, MI_MODE reports the - * ring as idle. However, previously using the seqno, and with - * execlists sampling, we account for the ring waiting as the - * engine being busy. Therefore, we record the sample as being - * busy if either waiting or !idle. - */ - busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); - if (!busy) { - val = ENGINE_READ_FW(engine, RING_MI_MODE); - busy = !(val & MODE_IDLE); + if (exclusive_mmio_access(i915)) { + spin_lock_irqsave(&engine->uncore->lock, flags); + engine_sample(engine, period_ns); + spin_unlock_irqrestore(&engine->uncore->lock, flags); + } else { + engine_sample(engine, period_ns); } - if (busy) - add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); -skip: - if (unlikely(mmio_lock)) - spin_unlock_irqrestore(mmio_lock, flags); intel_engine_pm_put_async(engine); } } From cc649a9eafc1ef5c40db023084cb94422d08aa84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:09 +0200 Subject: [PATCH 081/222] drm/i915: work around false-positive maybe-uninitialized warning gcc-9 gets confused by the code flow in check_dirty_whitelist: drivers/gpu/drm/i915/gt/selftest_workarounds.c: In function 'check_dirty_whitelist': drivers/gpu/drm/i915/gt/selftest_workarounds.c:492:17: error: 'rsvd' may be used uninitialized in this function [-Werror=maybe-uninitialized] I could not figure out a good way to do this in a way that gcc understands better, so initialize the variable to zero, as last resort. Fixes: aee20aaed887 ("drm/i915: Implement read-only support in whitelist selftest") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-2-arnd@arndb.de --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 5ed323254ee1..32785463ec9e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -623,6 +623,8 @@ err_request: err = -EINVAL; goto out_unpin; } + } else { + rsvd = 0; } expect = results[0]; From b72f02d78e4f257761ed003444ae52083f962076 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 May 2020 17:24:18 +0100 Subject: [PATCH 082/222] drm/i915/gt: Prevent timeslicing into unpreemptable requests We have a I915_REQUEST_NOPREEMPT flag that we set when we must prevent the HW from preempting during the course of this request. We need to honour this flag and protect the HW even if we have a heartbeat request, or other maximum priority barrier, pending. As such, restrict the timeslicing check to avoid preempting into the topmost priority band, leaving the unpreemptable requests in blissful peace running uninterrupted on the HW. v2: Set the I915_PRIORITY_BARRIER to be less than I915_PRIORITY_UNPREEMPTABLE so that we never submit a request (heartbeat or barrier) that can legitimately preempt the current non-premptable request. Fixes: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200527162418.24755-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/selftest_lrc.c | 118 ++++++++++++++++++++- drivers/gpu/drm/i915/i915_priolist_types.h | 2 +- 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3214a4ecc31a..69fff36ec0cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1928,6 +1928,7 @@ need_timeslice(const struct intel_engine_cs *engine, if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); + GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); return hint >= effective_prio(rq); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 66f710b1b61e..3e35a45d6218 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -823,7 +823,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, } } - err = release_queue(outer, vma, n, INT_MAX); + err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); if (err) goto out; @@ -1289,6 +1289,121 @@ err_obj: return err; } +static int live_timeslice_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We should not timeslice into a request that is marked with + * I915_REQUEST_NOPREEMPT. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + unsigned long timeslice; + + if (!intel_engine_has_preemption(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + /* Create an unpreemptible spinner */ + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); + i915_request_put(rq); + + /* Followed by a maximum priority barrier (heartbeat) */ + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + /* + * Wait until the barrier is in ELSP, and we know timeslicing + * will have been activated. + */ + if (wait_for_submit(engine, rq, HZ / 2)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + /* + * Since the ELSP[0] request is unpreemptible, it should not + * allow the maximum priority barrier through. Wait long + * enough to see if it is timesliced in by mistake. + */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { + pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", + engine->name); + err = -EINVAL; + } + i915_request_put(rq); + +out_spin: + igt_spinner_end(&spin); +out_heartbeat: + xchg(&engine->props.timeslice_duration_ms, timeslice); + engine_heartbeat_enable(engine); + if (err) + break; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + return err; +} + static int live_busywait_preempt(void *arg) { struct intel_gt *gt = arg; @@ -4475,6 +4590,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_rewind), SUBTEST(live_timeslice_queue), + SUBTEST(live_timeslice_nopreempt), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 5003a71113cb..8aa7866ec6b6 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -42,7 +42,7 @@ enum { * active request. */ #define I915_PRIORITY_UNPREEMPTABLE INT_MAX -#define I915_PRIORITY_BARRIER INT_MAX +#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) struct i915_priolist { struct list_head requests[I915_PRIORITY_COUNT]; From dc6cd912c7cd83ec9859429c552b2986c0386b90 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 May 2020 16:04:52 +0100 Subject: [PATCH 083/222] drm/i915/gt: Restore both GGTT bindings on resume We should be able to skip restoring LOCAL (user) binds within the GGTT on resume and let them be restored upon demand. However, our consistency checks demand that the bind flags match the node state, and we cannot simply clear the flags, we need to evict as well. For now, make sure we restore the bind flags exactly upon resume. Fixes: 0109a16ef391 ("drm/i915/gt: Clear LOCAL_BIND from shared GGTT on resume") Fixes: bf0840cdb304 ("drm/i915/gt: Stop cross-polluting PIN_GLOBAL with PIN_USER with no-ppgtt") Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200528150452.7880-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 317172ad5ef3..ffe285b0b3bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1183,13 +1183,11 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - - if (!(clear_bind(vma) & I915_VMA_GLOBAL_BIND)) - continue; + unsigned int was_bound = clear_bind(vma); WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, - PIN_GLOBAL, NULL)); + was_bound, NULL)); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; From bffa18dd0bca90112746bafd333386c71fe55efe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 May 2020 09:24:27 +0100 Subject: [PATCH 084/222] drm/i915/gt: Remove local entries from GGTT on suspend Across suspend/resume, we clear the entire GGTT and rebuild from scratch. In particular, we want to only preserve the global entries for use by the HW, and delay reinstating the local binds until required by the user. This means that we can evict any local binds in the global GTT, saving any time in preserving their state, as they will be rebound on demand. References: https://gitlab.freedesktop.org/drm/intel/-/issues/1947 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200528082427.21402-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 ++++++++++- drivers/gpu/drm/i915/i915_vma.c | 59 +++++++++++++++------------- drivers/gpu/drm/i915/i915_vma.h | 1 + 3 files changed, 54 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index ffe285b0b3bd..323c328d444a 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -108,13 +108,32 @@ static bool needs_idle_maps(struct drm_i915_private *i915) void i915_ggtt_suspend(struct i915_ggtt *ggtt) { - struct i915_vma *vma; + struct i915_vma *vma, *vn; + int open; - list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) + mutex_lock(&ggtt->vm.mutex); + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); + + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); i915_vma_wait_for_bind(vma); + if (i915_vma_is_pinned(vma)) + continue; + + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + __i915_vma_evict(vma); + drm_mm_remove_node(&vma->node); + } + } + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); ggtt->invalidate(ggtt); + atomic_set(&ggtt->vm.open, open); + + mutex_unlock(&ggtt->vm.mutex); intel_gt_check_and_clear_faults(ggtt->vm.gt); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 22198b758459..9b30ddc49e4b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1229,31 +1229,9 @@ int i915_vma_move_to_active(struct i915_vma *vma, return 0; } -int __i915_vma_unbind(struct i915_vma *vma) +void __i915_vma_evict(struct i915_vma *vma) { - int ret; - - lockdep_assert_held(&vma->vm->mutex); - - if (i915_vma_is_pinned(vma)) { - vma_print_allocator(vma, "is pinned"); - return -EAGAIN; - } - - /* - * After confirming that no one else is pinning this vma, wait for - * any laggards who may have crept in during the wait (through - * a residual pin skipping the vm->mutex) to complete. - */ - ret = i915_vma_sync(vma); - if (ret) - return ret; - - if (!drm_mm_node_allocated(&vma->node)) - return 0; - GEM_BUG_ON(i915_vma_is_pinned(vma)); - GEM_BUG_ON(i915_vma_is_active(vma)); if (i915_vma_is_map_and_fenceable(vma)) { /* Force a pagefault for domain tracking on next user access */ @@ -1292,6 +1270,33 @@ int __i915_vma_unbind(struct i915_vma *vma) i915_vma_detach(vma); vma_unbind_pages(vma); +} + +int __i915_vma_unbind(struct i915_vma *vma) +{ + int ret; + + lockdep_assert_held(&vma->vm->mutex); + + if (!drm_mm_node_allocated(&vma->node)) + return 0; + + if (i915_vma_is_pinned(vma)) { + vma_print_allocator(vma, "is pinned"); + return -EAGAIN; + } + + /* + * After confirming that no one else is pinning this vma, wait for + * any laggards who may have crept in during the wait (through + * a residual pin skipping the vm->mutex) to complete. + */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); + __i915_vma_evict(vma); drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */ return 0; @@ -1303,13 +1308,13 @@ int i915_vma_unbind(struct i915_vma *vma) intel_wakeref_t wakeref = 0; int err; - if (!drm_mm_node_allocated(&vma->node)) - return 0; - /* Optimistic wait before taking the mutex */ err = i915_vma_sync(vma); if (err) - goto out_rpm; + return err; + + if (!drm_mm_node_allocated(&vma->node)) + return 0; if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8ad1daabcd58..d0d01f909548 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -203,6 +203,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); +void __i915_vma_evict(struct i915_vma *vma); int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); From ba03a63d76ac8131fad58c34fb793d18b0a8964c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 May 2020 08:41:00 +0100 Subject: [PATCH 085/222] drm/i915/gt: Don't declare hangs if engine is stalled If the ring submission is stalled on an external request, nothing can be submitted, not even the heartbeat in the kernel context. Since nothing is running, resetting the engine/device does not unblock the system and is pointless. We can see if the heartbeat is supposed to be running before declaring foul. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200528074109.28235-2-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5136c8bf112d..f67ad937eefb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -48,8 +48,10 @@ static void show_heartbeat(const struct i915_request *rq, struct drm_printer p = drm_debug_printer("heartbeat"); intel_engine_dump(engine, &p, - "%s heartbeat {prio:%d} not ticking\n", + "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", engine->name, + rq->fence.context, + rq->fence.seqno, rq->sched.attr.priority); } @@ -76,8 +78,19 @@ static void heartbeat(struct work_struct *wrk) goto out; if (engine->heartbeat.systole) { - if (engine->schedule && - rq->sched.attr.priority < I915_PRIORITY_BARRIER) { + if (!i915_sw_fence_signaled(&rq->submit)) { + /* + * Not yet submitted, system is stalled. + * + * This more often happens for ring submission, + * where all contexts are funnelled into a common + * ringbuffer. If one context is blocked on an + * external fence, not only is it not submitted, + * but all other contexts, including the kernel + * context are stuck waiting for the signal. + */ + } else if (engine->schedule && + rq->sched.attr.priority < I915_PRIORITY_BARRIER) { /* * Gradually raise the priority of the heartbeat to * give high priority work [which presumably desires From 2010b7f0a8521fa7463056c8f077277a33a45c5f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 May 2020 21:57:27 +0100 Subject: [PATCH 086/222] drm/i915/gt: Start timeslice on partial submission We may choose to only submit ELSP[0], even though we have sufficient requests to fill the whole ELSP. Normally, we only start timeslicing if we fill more than one port, but in this case we need to start timeslicing for the queue that we choose not to submit. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200528205727.20309-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 69fff36ec0cc..6fc0966b75ff 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2358,8 +2358,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->context == rq->context) goto done; - if (i915_request_has_sentinel(last)) + if (i915_request_has_sentinel(last)) { + start_timeslice(engine, rq_prio(rq)); goto done; + } /* * If GVT overrides us we only ever submit From af157b7611a21a33a5cd5b3065c6776f73ea91f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:39:04 +0300 Subject: [PATCH 087/222] drm/i915: Stop using mode->private_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of mode->private_flags with a truly private bitmaks in our own crtc state. We also need a copy in the crtc itself so the vblank code can get at it. We already have scanline_offset in there for a similar reason, as well as the vblank->hwmode which is assigned via drm_calc_timestamping_constants(). Fortunately we now have a nice place for doing the crtc_state->crtc copy in intel_crtc_update_active_timings() which gets called both for modesets and init/resume readout. The one slightly iffy spot is the INHERITED flag which we want to preserve until userspace/fb_helper does the first proper commit after actually calling .detecti() on the connectors. Otherwise we don't have the full sink capabilities (audio,infoframes,etc.) when .compute_config() gets called and thus we will fail to enable those features when the first userspace commit happens. The only internal commit we do prior to that should be from intel_initial_commit() and there we can simply preserve the INHERITED flag from the readout. v2: Deal with INHERITED in sanitize_watermarks() as well CC: Sam Ravnborg Cc: Daniel Vetter Cc: Emil Velikov Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429103904.11727-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/icl_dsi.c | 13 ++---- drivers/gpu/drm/i915/display/intel_atomic.c | 1 + drivers/gpu/drm/i915/display/intel_display.c | 40 ++++++++++++++----- .../drm/i915/display/intel_display_types.h | 9 ++++- drivers/gpu/drm/i915/display/intel_tv.c | 4 +- drivers/gpu/drm/i915/display/vlv_dsi.c | 6 +-- drivers/gpu/drm/i915/i915_irq.c | 4 +- 7 files changed, 49 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 4fec5bd64920..25200f289e6e 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1469,8 +1469,7 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc); if (gen11_dsi_is_periodic_cmd_mode(intel_dsi)) - pipe_config->hw.adjusted_mode.private_flags |= - I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE; + pipe_config->mode_flags |= I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE; } static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, @@ -1558,10 +1557,6 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, pipe_config->port_clock = afe_clk(encoder, pipe_config) / 5; - /* We would not operate in periodic command mode */ - pipe_config->hw.adjusted_mode.private_flags &= - ~I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE; - /* * In case of TE GATE cmd mode, we * receive TE from the slave if @@ -1569,14 +1564,14 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, */ if (is_cmd_mode(intel_dsi)) { if (intel_dsi->ports == (BIT(PORT_B) | BIT(PORT_A))) - pipe_config->hw.adjusted_mode.private_flags |= + pipe_config->mode_flags |= I915_MODE_FLAG_DSI_USE_TE1 | I915_MODE_FLAG_DSI_USE_TE0; else if (intel_dsi->ports == BIT(PORT_B)) - pipe_config->hw.adjusted_mode.private_flags |= + pipe_config->mode_flags |= I915_MODE_FLAG_DSI_USE_TE1; else - pipe_config->hw.adjusted_mode.private_flags |= + pipe_config->mode_flags |= I915_MODE_FLAG_DSI_USE_TE0; } diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 3cb866f22e74..20b73d57aa25 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -253,6 +253,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->fb_bits = 0; crtc_state->update_planes = 0; crtc_state->dsb = NULL; + crtc_state->mode_flags &= ~I915_MODE_FLAG_INHERITED; return &crtc_state->uapi; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9f1464624336..b62a88da909a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6425,7 +6425,7 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s * forcibly enable IPS on the first fastset. */ if (new_crtc_state->update_pipe && - old_crtc_state->hw.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED) + old_crtc_state->mode_flags & I915_MODE_FLAG_INHERITED) return true; return !old_crtc_state->ips_enabled; @@ -13605,8 +13605,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, bool ret = true; u32 bp_gamma = 0; bool fixup_inherited = fastset && - (current_config->hw.mode.private_flags & I915_MODE_FLAG_INHERITED) && - !(pipe_config->hw.mode.private_flags & I915_MODE_FLAG_INHERITED); + (current_config->mode_flags & I915_MODE_FLAG_INHERITED) && + !(pipe_config->mode_flags & I915_MODE_FLAG_INHERITED); if (fixup_inherited && !fastboot_enabled(dev_priv)) { drm_dbg_kms(&dev_priv->drm, @@ -14414,6 +14414,8 @@ intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state) drm_calc_timestamping_constants(&crtc->base, adjusted_mode); + crtc->mode_flags = crtc_state->mode_flags; + /* * The scanline counter increments at the leading edge of hsync. * @@ -14814,8 +14816,7 @@ static int intel_atomic_check(struct drm_device *dev, /* Catch I915_MODE_FLAG_INHERITED */ for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (new_crtc_state->uapi.mode.private_flags != - old_crtc_state->uapi.mode.private_flags) + if (new_crtc_state->mode_flags != old_crtc_state->mode_flags) new_crtc_state->uapi.mode_changed = true; } @@ -15185,7 +15186,7 @@ static void intel_update_crtc(struct intel_atomic_state *state, * of enabling them on the CRTC's first fastset. */ if (new_crtc_state->update_pipe && !modeset && - old_crtc_state->hw.mode.private_flags & I915_MODE_FLAG_INHERITED) + old_crtc_state->mode_flags & I915_MODE_FLAG_INHERITED) intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } @@ -17505,14 +17506,22 @@ void intel_modeset_init_hw(struct drm_i915_private *i915) static int sanitize_watermarks_add_affected(struct drm_atomic_state *state) { struct drm_plane *plane; - struct drm_crtc *crtc; + struct intel_crtc *crtc; - drm_for_each_crtc(crtc, state->dev) { - struct drm_crtc_state *crtc_state; + for_each_intel_crtc(state->dev, crtc) { + struct intel_crtc_state *crtc_state; - crtc_state = drm_atomic_get_crtc_state(state, crtc); + crtc_state = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); + + if (crtc_state->hw.active) { + /* + * Preserve the inherited flag to avoid + * taking the full modeset path. + */ + crtc_state->mode_flags |= I915_MODE_FLAG_INHERITED; + } } drm_for_each_plane(plane, state->dev) { @@ -17654,6 +17663,15 @@ retry: } if (crtc_state->hw.active) { + /* + * We've not yet detected sink capabilities + * (audio,infoframes,etc.) and thus we don't want to + * force a full state recomputation yet. We want that to + * happen only for the first real commit from userspace. + * So preserve the inherited flag for the time being. + */ + crtc_state->mode_flags |= I915_MODE_FLAG_INHERITED; + ret = drm_atomic_add_affected_planes(state, &crtc->base); if (ret) goto out; @@ -18432,7 +18450,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) * set a flag to indicate that a full recalculation is * needed on the next commit. */ - mode->private_flags = I915_MODE_FLAG_INHERITED; + crtc_state->mode_flags |= I915_MODE_FLAG_INHERITED; intel_crtc_compute_pixel_rate(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index b24266c624fa..b0346f672dbf 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -630,7 +630,7 @@ struct intel_crtc_scaler_state { int scaler_id; }; -/* drm_mode->private_flags */ +/* {crtc,crtc_state}->mode_flags */ #define I915_MODE_FLAG_INHERITED (1<<0) /* Flag to get scanline using frame time stamps */ #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) @@ -943,6 +943,9 @@ struct intel_crtc_state { /* Used by SDVO (and if we ever fix it, HDMI). */ unsigned pixel_multiplier; + /* I915_MODE_FLAG_* */ + u8 mode_flags; + u8 lane_count; /* @@ -1108,6 +1111,10 @@ struct intel_crtc { */ bool active; u8 plane_ids_mask; + + /* I915_MODE_FLAG_* */ + u8 mode_flags; + unsigned long long enabled_power_domains; struct intel_overlay *overlay; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index fbe12aad7d58..48093f19ec22 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1161,7 +1161,7 @@ intel_tv_get_config(struct intel_encoder *encoder, /* pixel counter doesn't work on i965gm TV output */ if (IS_I965GM(dev_priv)) - adjusted_mode->private_flags |= + pipe_config->mode_flags |= I915_MODE_FLAG_USE_SCANLINE_COUNTER; } @@ -1331,7 +1331,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, /* pixel counter doesn't work on i965gm TV output */ if (IS_I965GM(dev_priv)) - adjusted_mode->private_flags |= + pipe_config->mode_flags |= I915_MODE_FLAG_USE_SCANLINE_COUNTER; return 0; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index f582ab52f0b0..052e0b31a2da 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -298,7 +298,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv)) { /* Enable Frame time stamp based scanline reporting */ - adjusted_mode->private_flags |= + pipe_config->mode_flags |= I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; /* Dual link goes to DSI transcoder A. */ @@ -1097,8 +1097,8 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc); /* Enable Frame time stamo based scanline reporting */ - adjusted_mode->private_flags |= - I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; + pipe_config->mode_flags |= + I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; /* In terms of pixels */ adjusted_mode->crtc_hdisplay = diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index efdd4c7b8e92..63579ab71cf6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -777,7 +777,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) vblank = &crtc->base.dev->vblank[drm_crtc_index(&crtc->base)]; mode = &vblank->hwmode; - if (mode->private_flags & I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP) + if (crtc->mode_flags & I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP) return __intel_get_crtc_scanline_from_timestamp(crtc); vtotal = mode->crtc_vtotal; @@ -836,7 +836,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, unsigned long irqflags; bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) || - mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; + crtc->mode_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; if (drm_WARN_ON(&dev_priv->drm, !mode->crtc_clock)) { drm_dbg(&dev_priv->drm, From a227569d1f04da3ff4a07fd75c5a86c0ce92a7b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:39:36 +0300 Subject: [PATCH 088/222] drm/i915: Replace I915_MODE_FLAG_INHERITED with a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no reason for I915_MODE_FLAG_INHERITED to exist as a flag anymore. Just make it a boolean. v2: Deal with sanitize_watermarks() CC: Sam Ravnborg Cc: Daniel Vetter Cc: Emil Velikov Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429103936.11850-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_atomic.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 17 +++++++---------- .../gpu/drm/i915/display/intel_display_types.h | 2 +- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 20b73d57aa25..630f49b7aa01 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -249,11 +249,11 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_post = false; crtc_state->fifo_changed = false; crtc_state->preload_luts = false; + crtc_state->inherited = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; crtc_state->dsb = NULL; - crtc_state->mode_flags &= ~I915_MODE_FLAG_INHERITED; return &crtc_state->uapi; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b62a88da909a..0b5c15027f3b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6424,8 +6424,7 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s * We can't read out IPS on broadwell, assume the worst and * forcibly enable IPS on the first fastset. */ - if (new_crtc_state->update_pipe && - old_crtc_state->mode_flags & I915_MODE_FLAG_INHERITED) + if (new_crtc_state->update_pipe && old_crtc_state->inherited) return true; return !old_crtc_state->ips_enabled; @@ -13605,8 +13604,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, bool ret = true; u32 bp_gamma = 0; bool fixup_inherited = fastset && - (current_config->mode_flags & I915_MODE_FLAG_INHERITED) && - !(pipe_config->mode_flags & I915_MODE_FLAG_INHERITED); + current_config->inherited && !pipe_config->inherited; if (fixup_inherited && !fastboot_enabled(dev_priv)) { drm_dbg_kms(&dev_priv->drm, @@ -14813,10 +14811,9 @@ static int intel_atomic_check(struct drm_device *dev, int ret, i; bool any_ms = false; - /* Catch I915_MODE_FLAG_INHERITED */ for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (new_crtc_state->mode_flags != old_crtc_state->mode_flags) + if (new_crtc_state->inherited != old_crtc_state->inherited) new_crtc_state->uapi.mode_changed = true; } @@ -15186,7 +15183,7 @@ static void intel_update_crtc(struct intel_atomic_state *state, * of enabling them on the CRTC's first fastset. */ if (new_crtc_state->update_pipe && !modeset && - old_crtc_state->mode_flags & I915_MODE_FLAG_INHERITED) + old_crtc_state->inherited) intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } @@ -17520,7 +17517,7 @@ static int sanitize_watermarks_add_affected(struct drm_atomic_state *state) * Preserve the inherited flag to avoid * taking the full modeset path. */ - crtc_state->mode_flags |= I915_MODE_FLAG_INHERITED; + crtc_state->inherited = true; } } @@ -17670,7 +17667,7 @@ retry: * happen only for the first real commit from userspace. * So preserve the inherited flag for the time being. */ - crtc_state->mode_flags |= I915_MODE_FLAG_INHERITED; + crtc_state->inherited = true; ret = drm_atomic_add_affected_planes(state, &crtc->base); if (ret) @@ -18450,7 +18447,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) * set a flag to indicate that a full recalculation is * needed on the next commit. */ - crtc_state->mode_flags |= I915_MODE_FLAG_INHERITED; + crtc_state->inherited = true; intel_crtc_compute_pixel_rate(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index b0346f672dbf..363d30ceafce 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -631,7 +631,6 @@ struct intel_crtc_scaler_state { }; /* {crtc,crtc_state}->mode_flags */ -#define I915_MODE_FLAG_INHERITED (1<<0) /* Flag to get scanline using frame time stamps */ #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) /* Flag to use the scanline counter instead of the pixel counter */ @@ -828,6 +827,7 @@ struct intel_crtc_state { bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fifo_changed; /* FIFO split is changed */ bool preload_luts; + bool inherited; /* state inherited from BIOS? */ /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, From 98b7067a17ba745fb6dac9669e26bae1960d283c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 May 2020 09:58:08 +0100 Subject: [PATCH 089/222] drm/i915: Add a few asserts around handling of i915_request_is_active() Let's assert that we only call the execute callbacks on making the request active, and that we do not execute the request without calling the callbacks. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200529085809.23691-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0d810a62ff46..e5aba6824e26 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -192,6 +192,7 @@ static void __notify_execute_cb(struct i915_request *rq) lockdep_assert_held(&rq->lock); + GEM_BUG_ON(!i915_request_is_active(rq)); if (llist_empty(&rq->execute_cb)) return; @@ -518,15 +519,15 @@ xfer: /* We may be recursing from the signal callback of another i915 fence */ if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { list_move_tail(&request->sched.link, &engine->active.requests); clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + __notify_execute_cb(request); } + GEM_BUG_ON(!llist_empty(&request->execute_cb)); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) intel_engine_signal_breadcrumbs(engine); - __notify_execute_cb(request); - spin_unlock(&request->lock); return result; From b55230e5e800868961fc271b26d9ce53ae1f691e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 May 2020 15:39:26 +0100 Subject: [PATCH 090/222] drm/i915: Check for awaits on still currently executing requests With the advent of preempt-to-busy, a request may still be on the GPU as we unwind. And in the case of a unpreemptible [due to HW] request, that request will remain indefinitely on the GPU even though we have returned it back to our submission queue, and cleared the active bit. We only run the execution callbacks on transferring the request from our submission queue to the execution queue, but if this is a bonded request that the HW is waiting for, we will not submit it (as we wait for a fresh execution) even though it is still being executed. As we know that there are always preemption points between requests, we know that only the currently executing request may be still active even though we have cleared the flag. However, we do not precisely know which request is in ELSP[0] due to a delay in processing events, and furthermore we only store the last request in a context in our state tracker. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Testcase: igt/gem_exec_balancer/bonded-dual Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200529143926.3245-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 49 ++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e5aba6824e26..c5d7220de529 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -363,6 +363,53 @@ static void __llist_add(struct llist_node *node, struct llist_head *head) head->first = node; } +static struct i915_request * const * +__engine_active(struct intel_engine_cs *engine) +{ + return READ_ONCE(engine->execlists.active); +} + +static bool __request_in_flight(const struct i915_request *signal) +{ + struct i915_request * const *port, *rq; + bool inflight = false; + + if (!i915_request_is_ready(signal)) + return false; + + /* + * Even if we have unwound the request, it may still be on + * the GPU (preempt-to-busy). If that request is inside an + * unpreemptible critical section, it will not be removed. Some + * GPU functions may even be stuck waiting for the paired request + * (__await_execution) to be submitted and cannot be preempted + * until the bond is executing. + * + * As we know that there are always preemption points between + * requests, we know that only the currently executing request + * may be still active even though we have cleared the flag. + * However, we can't rely on our tracking of ELSP[0] to known + * which request is currently active and so maybe stuck, as + * the tracking maybe an event behind. Instead assume that + * if the context is still inflight, then it is still active + * even if the active flag has been cleared. + */ + if (!intel_context_inflight(signal->context)) + return false; + + rcu_read_lock(); + for (port = __engine_active(signal->engine); (rq = *port); port++) { + if (rq->context == signal->context) { + inflight = i915_seqno_passed(rq->fence.seqno, + signal->fence.seqno); + break; + } + } + rcu_read_unlock(); + + return inflight; +} + static int __await_execution(struct i915_request *rq, struct i915_request *signal, @@ -393,7 +440,7 @@ __await_execution(struct i915_request *rq, } spin_lock_irq(&signal->lock); - if (i915_request_is_active(signal)) { + if (i915_request_is_active(signal) || __request_in_flight(signal)) { if (hook) { hook(rq, &signal->fence); i915_request_put(signal); From ee3fab5b32c0ac81f03d5af0f9646d1741169da5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 May 2020 19:32:03 +0100 Subject: [PATCH 091/222] drm/i915/gem: Taint all shrinkable object locks If we declare that an object type is shrinkable (any that we can reclaim to recover system pages), make sure we taint the object mutex so that lockdep expects us to use it within fs_reclaim. lockdep will then complain the first time we try to allocate while holding the plain mutex, as doing so invites potential recursion. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200529183204.16850-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 99356c00c19e..21635dd415a3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -72,6 +72,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); + + if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) + i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), + &obj->mm.lock); } /** From 7d192daa73d9f377fadd2f8ac78146b41685b789 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 May 2020 19:32:04 +0100 Subject: [PATCH 092/222] drm/i915/gem: Give each object class a friendly name Name the object classes and their offspring for easier lockdep debugging. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200529183204.16850-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_internal.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_phys.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 1 + drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c | 1 + drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 3 +++ drivers/gpu/drm/i915/gvt/dmabuf.c | 1 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 1 + drivers/gpu/drm/i915/selftests/mock_region.c | 1 + 14 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 7db5a793739d..2679380159fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -217,6 +217,7 @@ static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { + .name = "i915_gem_object_dmabuf", .get_pages = i915_gem_object_get_pages_dmabuf, .put_pages = i915_gem_object_put_pages_dmabuf, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index cbbff81aa0af..ad22f42541bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -137,6 +137,7 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .name = "i915_gem_object_internal", .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_gem_object_get_pages_internal, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 70543c83df06..932ee21e6609 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -9,6 +9,7 @@ #include "i915_drv.h" const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { + .name = "i915_gem_object_lmem", .flags = I915_GEM_OBJECT_HAS_IOMEM, .get_pages = i915_gem_object_get_pages_buddy, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 21635dd415a3..b6ec5b50d93b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -53,7 +53,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, struct lock_class_key *key) { - __mutex_init(&obj->mm.lock, "obj->mm.lock", key); + __mutex_init(&obj->mm.lock, ops->name ?: "obj->mm.lock", key); spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 54ee658bb168..b1f82a11aef2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -61,6 +61,8 @@ struct drm_i915_gem_object_ops { int (*dmabuf_export)(struct drm_i915_gem_object *obj); void (*release)(struct drm_i915_gem_object *obj); + + const char *name; /* friendly name for debug, e.g. lockdep classes */ }; enum i915_mmap_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 021c747b5387..f4277afb89eb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -142,6 +142,7 @@ static void phys_release(struct drm_i915_gem_object *obj) } static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { + .name = "i915_gem_object_phys", .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 7cf8548ff708..38113d3c0138 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -429,6 +429,7 @@ static void shmem_release(struct drm_i915_gem_object *obj) } const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { + .name = "i915_gem_object_shmem", .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index dc250278bd2c..e0f21f12d3ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -566,6 +566,7 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) } static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { + .name = "i915_gem_object_stolen", .get_pages = i915_gem_object_get_pages_stolen, .put_pages = i915_gem_object_put_pages_stolen, .release = i915_gem_object_release_stolen, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2226146b01c9..2adc0ea429fb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -700,6 +700,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) } static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { + .name = "i915_gem_object_userptr", .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | I915_GEM_OBJECT_NO_MMAP | diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 2b46c6530da9..a768ec61e966 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -88,6 +88,7 @@ static void huge_put_pages(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops huge_ops = { + .name = "huge-gem", .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, .get_pages = huge_get_pages, .put_pages = huge_put_pages, diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index a0ed2fab0ff3..8291ede6902c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -139,6 +139,7 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops huge_page_ops = { + .name = "huge-gem", .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = get_huge_pages, @@ -283,12 +284,14 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops fake_ops = { + .name = "fake-gem", .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = fake_get_huge_pages, .put_pages = fake_put_huge_pages, }; static const struct drm_i915_gem_object_ops fake_ops_single = { + .name = "fake-gem", .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = fake_get_huge_pages_single, .put_pages = fake_put_huge_pages, diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 37fc460414a8..c3eb3838fe88 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -198,6 +198,7 @@ static void vgpu_gem_release(struct drm_i915_gem_object *gem_obj) } static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { + .name = "i915_gem_object_vgpu", .flags = I915_GEM_OBJECT_IS_PROXY, .get_pages = vgpu_gem_get_pages, .put_pages = vgpu_gem_put_pages, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 2e471500a646..0016ffc7d914 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -97,6 +97,7 @@ static void fake_put_pages(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops fake_ops = { + .name = "fake-gem", .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = fake_get_pages, .put_pages = fake_put_pages, diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index b2ad41c27e67..09660f5a0a4c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -9,6 +9,7 @@ #include "mock_region.h" static const struct drm_i915_gem_object_ops mock_region_obj_ops = { + .name = "mock-region", .get_pages = i915_gem_object_get_pages_buddy, .put_pages = i915_gem_object_put_pages_buddy, .release = i915_gem_object_release_memory_region, From a0196dd686a292248507aaa78e95e6dc5f2eaaaa Mon Sep 17 00:00:00 2001 From: Kishore Kadiyala Date: Mon, 1 Jun 2020 13:05:44 +0530 Subject: [PATCH 093/222] drm/i915: Add Plane color encoding support for YCBCR_BT2020 Currently the plane property doesn't have support for YCBCR_BT2020, which enables the corresponding color conversion mode on plane CSC. Enabling the plane property for the planes for GLK & ICL+ platforms. Also as per spec, update the Plane Color CSC from YUV601_TO_RGB709 to YUV601_TO_RGB601. V2: Enabling support for YCBCT_BT2020 for HDR planes on platforms GLK & ICL V3: Refined the condition check to handle GLK & ICL+ HDR planes Also added BT2020 handling in glk_plane_color_ctl. V4: Combine If-else into single If V5: Drop the checking for HDR planes and enable YCBCR_BT2020 for platforms GLK & ICL+. V6: As per Spec, update PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 to PLANE_COLOR_CSC_MODE_YUV601_TO_RGB601 as per Ville's feedback. V7: Rebased Cc: Ville Syrjala Cc: Jani Nikula Reviewed-by: Uma Shankar Signed-off-by: Kishore Kadiyala Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200601073544.11291-1-kishore.kadiyala@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 15 +++++++++++---- drivers/gpu/drm/i915/display/intel_sprite.c | 9 +++++++-- drivers/gpu/drm/i915/i915_reg.h | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0b5c15027f3b..0b0faf96495c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4812,11 +4812,18 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, plane_color_ctl |= glk_plane_color_ctl_alpha(plane_state); if (fb->format->is_yuv && !icl_is_hdr_plane(dev_priv, plane->id)) { - if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709) + switch (plane_state->hw.color_encoding) { + case DRM_COLOR_YCBCR_BT709: plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; - else - plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709; - + break; + case DRM_COLOR_YCBCR_BT2020: + plane_color_ctl |= + PLANE_COLOR_CSC_MODE_YUV2020_TO_RGB2020; + break; + default: + plane_color_ctl |= + PLANE_COLOR_CSC_MODE_YUV601_TO_RGB601; + } if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } else if (fb->format->is_yuv) { diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 571c36f929bd..3cd461bf9131 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -3061,6 +3061,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; + unsigned int supported_csc; const u64 *modifiers; const u32 *formats; int num_formats; @@ -3135,9 +3136,13 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0, supported_rotations); + supported_csc = BIT(DRM_COLOR_YCBCR_BT601) | BIT(DRM_COLOR_YCBCR_BT709); + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + supported_csc |= BIT(DRM_COLOR_YCBCR_BT2020); + drm_plane_create_color_properties(&plane->base, - BIT(DRM_COLOR_YCBCR_BT601) | - BIT(DRM_COLOR_YCBCR_BT709), + supported_csc, BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | BIT(DRM_COLOR_YCBCR_FULL_RANGE), DRM_COLOR_YCBCR_BT709, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e9d50fe0f375..578cfe11cbb9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6932,7 +6932,7 @@ enum { #define PLANE_COLOR_INPUT_CSC_ENABLE (1 << 20) /* ICL+ */ #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) -#define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) +#define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB601 (1 << 17) #define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) #define PLANE_COLOR_CSC_MODE_YUV2020_TO_RGB2020 (3 << 17) #define PLANE_COLOR_CSC_MODE_RGB709_TO_RGB2020 (4 << 17) From 0b0b25490a61a510fc0afc8421a3398f78784969 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 08:24:11 +0100 Subject: [PATCH 094/222] drm/i915: Handle very early engine initialisation failure If we fail during engine setup, we may leave some engines not yet setup. During the error cleanup, we have to be careful not to try and use the uninitialise engines before discarding them. [ 16.136152] RIP: 0010:__flush_work+0x198/0x1b0 [ 16.136168] Code: ff ff 8b 0b 48 8b 53 08 83 e1 08 48 0f ba 2b 03 80 c9 f0 e9 63 ff ff ff 0f 0b 48 83 c4 48 44 89 f0 5b 5d 41 5c 41 5d 41 5e c3 <0f> 0b 45 31 f6 e9 62 ff ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f [ 16.136186] RSP: 0018:ffffc900003bb928 EFLAGS: 00010246 [ 16.136201] RAX: 0000000000000000 RBX: ffff88844f392168 RCX: 0000000000000000 [ 16.136216] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88844f392168 [ 16.136231] RBP: ffff88844f392130 R08: 0000000000000000 R09: 0000000000000001 [ 16.136246] R10: ffff888441e31e40 R11: ffff88845e329c70 R12: ffff88844f796988 [ 16.136261] R13: ffff888441e4fb80 R14: 0000000000000001 R15: ffff88844f790000 [ 16.136388] FS: 00007fecbd208880(0000) GS:ffff88845e380000(0000) knlGS:0000000000000000 [ 16.136405] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 16.136420] CR2: 00007ff3ce748f90 CR3: 0000000457a6a001 CR4: 00000000000606e0 [ 16.136437] Call Trace: [ 16.136456] ? try_to_del_timer_sync+0x3a/0x50 [ 16.136529] intel_wakeref_wait_for_idle+0x87/0xb0 [i915] [ 16.136606] ? intel_engines_release+0x68/0xc0 [i915] [ 16.136680] intel_engines_release+0x49/0xc0 [i915] [ 16.136757] intel_gt_init+0x2f4/0x5e0 [i915] Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601072446.19548-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index da5b61085257..c8c14981eb5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -414,12 +414,12 @@ void intel_engines_release(struct intel_gt *gt) /* Decouple the backend; but keep the layout for late GPU resets */ for_each_engine(engine, gt, id) { - intel_wakeref_wait_for_idle(&engine->wakeref); - GEM_BUG_ON(intel_engine_pm_is_awake(engine)); - if (!engine->release) continue; + intel_wakeref_wait_for_idle(&engine->wakeref); + GEM_BUG_ON(intel_engine_pm_is_awake(engine)); + engine->release(engine); engine->release = NULL; From 03c10f472ae931e0d28c90f70e22aa5efeaa8183 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 08:24:23 +0100 Subject: [PATCH 095/222] drm/i915: Relinquish forcewake immediately after manual grouping Our forcewake utilisation is split into categories: automatic and manual. Around bare register reads, we look up the right forcewake domain and automatically acquire and release [upon a timer] the forcewake domain. For other access, where we know we require the forcewake across a group of register reads, we manually acquire the forcewake domain and release it at the end. Again, this currently arms the domain timer for a later release. However, looking at some energy utilisation profiles, we have tried to avoid using forcewake [and rely on the natural wake up to post register updates] due to that even keep the fw active for a brief period contributes to a significant power draw [i.e. when the gpu is sleeping with rc6 at high clocks]. But as it turns out, not posting the writes immediately also has unintended consequences, such as not reducing the clocks and so conserving power while busy. As a compromise, let us only arm the domain timer for automatic forcewake usage around bare register access, but immediately release the forcewake when manually acquired by intel_uncore_forcewake_get/_put. The corollary to this is that we may instead have to take forcewake more often, and so incur a latency penalty in doing so. For Sandybridge this was significant, and even on the latest machines, taking forcewake at interrupt frequency is a huge impact. [So we don't do that anymore! Hopefully, this will spare us from still needing the mitigation of the timer for steady state execution.] Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601072446.19548-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index a61cb8ca4d50..7d6b9ae7403c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -709,7 +709,7 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - fw_domain_arm_timer(domain); + uncore->funcs.force_wake_put(uncore, domain->mask); } } From f8c86ffa2800adc80adc679c84c45e0c6b027374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 27 May 2020 23:02:45 +0300 Subject: [PATCH 096/222] drm/i915: Fix global state use-after-frees with a refcount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the current locking/serialization of the global state suffices for protecting the obj->state access and the actual hardware reprogramming, we do have a problem with accessing the old/new states during nonblocking commits. The state computation and swap will be protected by the crtc locks, but the commit_tails can finish out of order, thus also causing the atomic states to be cleaned up out of order. This would mean the commit that started first but finished last has had its new state freed as the no-longer-needed old state by the other commit. To fix this let's just refcount the states. obj->state amounts to one reference, and the intel_atomic_state holds extra references to both its new and old global obj states. Fixes: 0ef1905ecf2e ("drm/i915: Introduce better global state handling") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200527200245.13184-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- .../gpu/drm/i915/display/intel_global_state.c | 45 ++++++++++++++++--- .../gpu/drm/i915/display/intel_global_state.h | 3 ++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 212d4ee68205..7a19215ad844 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -10,6 +10,28 @@ #include "intel_display_types.h" #include "intel_global_state.h" +static void __intel_atomic_global_state_free(struct kref *kref) +{ + struct intel_global_state *obj_state = + container_of(kref, struct intel_global_state, ref); + struct intel_global_obj *obj = obj_state->obj; + + obj->funcs->atomic_destroy_state(obj, obj_state); +} + +static void intel_atomic_global_state_put(struct intel_global_state *obj_state) +{ + kref_put(&obj_state->ref, __intel_atomic_global_state_free); +} + +static struct intel_global_state * +intel_atomic_global_state_get(struct intel_global_state *obj_state) +{ + kref_get(&obj_state->ref); + + return obj_state; +} + void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, struct intel_global_obj *obj, struct intel_global_state *state, @@ -17,6 +39,10 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, { memset(obj, 0, sizeof(*obj)); + state->obj = obj; + + kref_init(&state->ref); + obj->state = state; obj->funcs = funcs; list_add_tail(&obj->head, &dev_priv->global_obj_list); @@ -28,7 +54,9 @@ void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv) list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) { list_del(&obj->head); - obj->funcs->atomic_destroy_state(obj, obj->state); + + drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1); + intel_atomic_global_state_put(obj->state); } } @@ -97,10 +125,14 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, if (!obj_state) return ERR_PTR(-ENOMEM); + obj_state->obj = obj; obj_state->changed = false; + kref_init(&obj_state->ref); + state->global_objs[index].state = obj_state; - state->global_objs[index].old_state = obj->state; + state->global_objs[index].old_state = + intel_atomic_global_state_get(obj->state); state->global_objs[index].new_state = obj_state; state->global_objs[index].ptr = obj; obj_state->state = state; @@ -163,7 +195,9 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state) new_obj_state->state = NULL; state->global_objs[i].state = old_obj_state; - obj->state = new_obj_state; + + intel_atomic_global_state_put(obj->state); + obj->state = intel_atomic_global_state_get(new_obj_state); } } @@ -172,10 +206,9 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state) int i; for (i = 0; i < state->num_global_objs; i++) { - struct intel_global_obj *obj = state->global_objs[i].ptr; + intel_atomic_global_state_put(state->global_objs[i].old_state); + intel_atomic_global_state_put(state->global_objs[i].new_state); - obj->funcs->atomic_destroy_state(obj, - state->global_objs[i].state); state->global_objs[i].ptr = NULL; state->global_objs[i].state = NULL; state->global_objs[i].old_state = NULL; diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h index e6163a469029..1f16fa3073c9 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.h +++ b/drivers/gpu/drm/i915/display/intel_global_state.h @@ -6,6 +6,7 @@ #ifndef __INTEL_GLOBAL_STATE_H__ #define __INTEL_GLOBAL_STATE_H__ +#include #include struct drm_i915_private; @@ -54,7 +55,9 @@ struct intel_global_obj { for_each_if(obj) struct intel_global_state { + struct intel_global_obj *obj; struct intel_atomic_state *state; + struct kref ref; bool changed; }; From c48a798a74476877477d7597726308cc2c8e9e07 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 15:03:55 +0100 Subject: [PATCH 097/222] drm/i915: Trim the ironlake+ irq handler Ever noticed that our interrupt handlers are where we spend most of our time on a busy system? In part this is unavoidable as each interrupt requires to poll and reset several registers, but we can try and do so as efficiently as possible. Function old new delta ilk_irq_handler 2317 2156 -161 v2: Restore the irqreturn_t ret Function old new delta ilk_irq_handler.cold 63 72 +9 ilk_irq_handler 2221 2080 -141 A slight improvement in the baseline overnight as well! Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601140355.20243-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 57 +++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 63579ab71cf6..490574669eaa 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2097,67 +2097,68 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, */ static irqreturn_t ilk_irq_handler(int irq, void *arg) { - struct drm_i915_private *dev_priv = arg; + struct drm_i915_private *i915 = arg; + void __iomem * const regs = i915->uncore.regs; u32 de_iir, gt_iir, de_ier, sde_ier = 0; irqreturn_t ret = IRQ_NONE; - if (!intel_irqs_enabled(dev_priv)) + if (unlikely(!intel_irqs_enabled(i915))) return IRQ_NONE; /* IRQs are synced during runtime_suspend, we don't require a wakeref */ - disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + disable_rpm_wakeref_asserts(&i915->runtime_pm); /* disable master interrupt before clearing iir */ - de_ier = I915_READ(DEIER); - I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); + de_ier = raw_reg_read(regs, DEIER); + raw_reg_write(regs, DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); /* Disable south interrupts. We'll only write to SDEIIR once, so further * interrupts will will be stored on its back queue, and then we'll be * able to process them after we restore SDEIER (as soon as we restore * it, we'll get an interrupt if SDEIIR still has something to process * due to its back queue). */ - if (!HAS_PCH_NOP(dev_priv)) { - sde_ier = I915_READ(SDEIER); - I915_WRITE(SDEIER, 0); + if (!HAS_PCH_NOP(i915)) { + sde_ier = raw_reg_read(regs, SDEIER); + raw_reg_write(regs, SDEIER, 0); } /* Find, clear, then process each source of interrupt */ - gt_iir = I915_READ(GTIIR); + gt_iir = raw_reg_read(regs, GTIIR); if (gt_iir) { - I915_WRITE(GTIIR, gt_iir); - ret = IRQ_HANDLED; - if (INTEL_GEN(dev_priv) >= 6) - gen6_gt_irq_handler(&dev_priv->gt, gt_iir); + raw_reg_write(regs, GTIIR, gt_iir); + if (INTEL_GEN(i915) >= 6) + gen6_gt_irq_handler(&i915->gt, gt_iir); else - gen5_gt_irq_handler(&dev_priv->gt, gt_iir); + gen5_gt_irq_handler(&i915->gt, gt_iir); + ret = IRQ_HANDLED; } - de_iir = I915_READ(DEIIR); + de_iir = raw_reg_read(regs, DEIIR); if (de_iir) { - I915_WRITE(DEIIR, de_iir); - ret = IRQ_HANDLED; - if (INTEL_GEN(dev_priv) >= 7) - ivb_display_irq_handler(dev_priv, de_iir); + raw_reg_write(regs, DEIIR, de_iir); + if (INTEL_GEN(i915) >= 7) + ivb_display_irq_handler(i915, de_iir); else - ilk_display_irq_handler(dev_priv, de_iir); + ilk_display_irq_handler(i915, de_iir); + ret = IRQ_HANDLED; } - if (INTEL_GEN(dev_priv) >= 6) { - u32 pm_iir = I915_READ(GEN6_PMIIR); + if (INTEL_GEN(i915) >= 6) { + u32 pm_iir = raw_reg_read(regs, GEN6_PMIIR); if (pm_iir) { - I915_WRITE(GEN6_PMIIR, pm_iir); + raw_reg_write(regs, GEN6_PMIIR, pm_iir); + gen6_rps_irq_handler(&i915->gt.rps, pm_iir); ret = IRQ_HANDLED; - gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); } } - I915_WRITE(DEIER, de_ier); - if (!HAS_PCH_NOP(dev_priv)) - I915_WRITE(SDEIER, sde_ier); + raw_reg_write(regs, DEIER, de_ier); + if (sde_ier) + raw_reg_write(regs, SDEIER, sde_ier); /* IRQs are synced during runtime_suspend, we don't require a wakeref */ - enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + enable_rpm_wakeref_asserts(&i915->runtime_pm); return ret; } From 250a353cd85f2d0b06ca73bd20accbe58bea8d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 29 May 2020 16:27:57 -0700 Subject: [PATCH 098/222] drm/i915/tgl: Update TC DP vswing table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Small updates in dkl_de_emphasis_control field. BSpec: 49292 Signed-off-by: José Roberto de Souza Reviewed-by: Khaled Almahallawy Link: https://patchwork.freedesktop.org/patch/msgid/20200529232757.37832-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index aa22465bb56e..cd211f48c401 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -639,11 +639,11 @@ struct tgl_dkl_phy_ddi_buf_trans { static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { /* VS pre-emp Non-trans mV Pre-emph dB */ { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0b }, /* 0 2 400mV 6 dB */ + { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ + { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x03 }, /* 1 1 600mV 3.5 dB */ + { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ From f9496520df11de00fbafc3cbd693b9570d600ab3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 17:19:42 +0100 Subject: [PATCH 099/222] drm/i915: Whitelist context-local timestamp in the gen9 cmdparser Allow batch buffers to read their own _local_ cumulative HW runtime of their logical context. Fixes: 0f2f39758341 ("drm/i915: Add gen9 BCS cmdparsing") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601161942.30854-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 189b573d02be..372354d33f55 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor { #define REG32(_reg, ...) \ { .addr = (_reg), __VA_ARGS__ } +#define REG32_IDX(_reg, idx) \ + { .addr = _reg(idx) } + /* * Convenience macro for adding 64-bit registers. * @@ -669,6 +672,7 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG32(BCS_SWCTRL), REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE), REG64_IDX(BCS_GPR, 0), REG64_IDX(BCS_GPR, 1), REG64_IDX(BCS_GPR, 2), From c1f8587870602274e1de97aca89361cf91bc12d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 08:24:12 +0100 Subject: [PATCH 100/222] drm/i915/gt: Split low level gen2-7 CS emitters Pull the routines for writing CS packets out of intel_ring_submission into their own files. These are low level operations for building CS instructions, rather than the logic for filling the global ring buffer with requests, and we will want to reuse them outside of this context. Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601072446.19548-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 2 + drivers/gpu/drm/i915/gt/gen2_engine_cs.c | 340 +++++++ drivers/gpu/drm/i915/gt/gen2_engine_cs.h | 38 + drivers/gpu/drm/i915/gt/gen6_engine_cs.c | 455 ++++++++++ drivers/gpu/drm/i915/gt/gen6_engine_cs.h | 39 + drivers/gpu/drm/i915/gt/intel_engine.h | 1 - .../gpu/drm/i915/gt/intel_ring_submission.c | 832 +----------------- 7 files changed, 901 insertions(+), 806 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/gen2_engine_cs.c create mode 100644 drivers/gpu/drm/i915/gt/gen2_engine_cs.h create mode 100644 drivers/gpu/drm/i915/gt/gen6_engine_cs.c create mode 100644 drivers/gpu/drm/i915/gt/gen6_engine_cs.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b0da6ea6e3f1..41a27fd5dbc7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -78,6 +78,8 @@ gt-y += \ gt/debugfs_engines.o \ gt/debugfs_gt.o \ gt/debugfs_gt_pm.o \ + gt/gen2_engine_cs.o \ + gt/gen6_engine_cs.o \ gt/gen6_ppgtt.o \ gt/gen7_renderclear.o \ gt/gen8_ppgtt.o \ diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c new file mode 100644 index 000000000000..8d2e85081247 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "gen2_engine_cs.h" +#include "i915_drv.h" +#include "intel_engine.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_ring.h" + +int gen2_emit_flush(struct i915_request *rq, u32 mode) +{ + unsigned int num_store_dw; + u32 cmd, *cs; + + cmd = MI_FLUSH; + num_store_dw = 0; + if (mode & EMIT_INVALIDATE) + cmd |= MI_READ_FLUSH; + if (mode & EMIT_FLUSH) + num_store_dw = 4; + + cs = intel_ring_begin(rq, 2 + 3 * num_store_dw); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + while (num_store_dw--) { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + *cs++ = 0; + } + *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + u32 cmd, *cs; + int i; + + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH; + if (mode & EMIT_INVALIDATE) { + cmd |= MI_EXE_FLUSH; + if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5)) + cmd |= MI_INVALIDATE_ISP; + } + + i = 2; + if (mode & EMIT_INVALIDATE) + i += 20; + + cs = intel_ring_begin(rq, i); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + + /* + * A random delay to let the CS invalidate take effect? Without this + * delay, the GPU relocation path fails as the CS does not see + * the updated contents. Just as important, if we apply the flushes + * to the EMIT_FLUSH branch (i.e. immediately after the relocation + * write and before the invalidate on the next batch), the relocations + * still fail. This implies that is a delay following invalidation + * that is required to reset the caches as opposed to a delay to + * ensure the memory is written. + */ + if (mode & EMIT_INVALIDATE) { + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + + for (i = 0; i < 12; i++) + *cs++ = MI_FLUSH; + + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + } + + *cs++ = cmd; + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH; + + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +#define GEN5_WA_STORES 8 /* must be at least 1! */ +u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + int i; + + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH; + + BUILD_BUG_ON(GEN5_WA_STORES < 1); + for (i = 0; i < GEN5_WA_STORES; i++) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + } + + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} +#undef GEN5_WA_STORES + +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT SZ_256K +#define I830_TLB_ENTRIES (2) +#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) +int i830_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs, cs_offset = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + + GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Evict the invalid PTE TLBs */ + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { + if (len > I830_BATCH_LIMIT) + return -ENOSPC; + + cs = intel_ring_begin(rq, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Blit the batch (which has now all relocs applied) to the + * stable batch scratch bo area (so that the CS never + * stumbles over its tlb invalidation bug) ... + */ + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* ... and execute it. */ + offset = cs_offset; + } + + if (!(dispatch_flags & I915_DISPATCH_SECURE)) + offset |= MI_BATCH_NON_SECURE; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen3_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs; + + if (!(dispatch_flags & I915_DISPATCH_SECURE)) + offset |= MI_BATCH_NON_SECURE; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen4_emit_bb_start(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags) +{ + u32 security; + u32 *cs; + + security = MI_BATCH_NON_SECURE_I965; + if (dispatch_flags & I915_DISPATCH_SECURE) + security = 0; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security; + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +void gen2_irq_enable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + i915->irq_mask &= ~engine->irq_enable_mask; + intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); + ENGINE_POSTING_READ16(engine, RING_IMR); +} + +void gen2_irq_disable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + i915->irq_mask |= engine->irq_enable_mask; + intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); +} + +void gen3_irq_enable(struct intel_engine_cs *engine) +{ + engine->i915->irq_mask &= ~engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); + intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); +} + +void gen3_irq_disable(struct intel_engine_cs *engine) +{ + engine->i915->irq_mask |= engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); +} + +void gen5_irq_enable(struct intel_engine_cs *engine) +{ + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); +} + +void gen5_irq_disable(struct intel_engine_cs *engine) +{ + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); +} diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h new file mode 100644 index 000000000000..a5cd64a65c9e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN2_ENGINE_CS_H__ +#define __GEN2_ENGINE_CS_H__ + +#include + +struct i915_request; +struct intel_engine_cs; + +int gen2_emit_flush(struct i915_request *rq, u32 mode); +int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode); +int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode); + +u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs); +u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs); + +int i830_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); +int gen3_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); +int gen4_emit_bb_start(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags); + +void gen2_irq_enable(struct intel_engine_cs *engine); +void gen2_irq_disable(struct intel_engine_cs *engine); +void gen3_irq_enable(struct intel_engine_cs *engine); +void gen3_irq_disable(struct intel_engine_cs *engine); +void gen5_irq_enable(struct intel_engine_cs *engine); +void gen5_irq_disable(struct intel_engine_cs *engine); + +#endif /* __GEN2_ENGINE_CS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c new file mode 100644 index 000000000000..ce38d1bcaba3 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "gen6_engine_cs.h" +#include "intel_engine.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_ring.h" + +#define HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) + +/* + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +static int +gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) +{ + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + u32 *cs, flags = 0; + int ret; + + /* Force SNB workarounds for PIPE_CONTROL flushes */ + ret = gen6_emit_post_sync_nonzero_flush(rq); + if (ret) + return ret; + + /* + * Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. And when rearranging requests, the order of flushes is + * unknown. + */ + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* + * Ensure that any following seqno writes only happen + * when the render cache is indeed flushed. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + /* + * TLB invalidate requires a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + + /* Finally we can flush and with it emit the breadcrumb */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +static int mi_flush_dw(struct i915_request *rq, u32 flags) +{ + u32 cmd, *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW; + + /* + * We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + /* + * Bspec vol 1c.3 - blitter engine command streamer: + * "If ENABLED, all TLBs will be invalidated once the flush + * operation is complete. This bit is only valid when the + * Post-Sync Operation field is a value of 1h or 3h." + */ + cmd |= flags; + + *cs++ = cmd; + *cs++ = HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) +{ + return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0); +} + +int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode) +{ + return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); +} + +int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode) +{ + return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); +} + +int gen6_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 security; + u32 *cs; + + security = MI_BATCH_NON_SECURE_I965; + if (dispatch_flags & I915_DISPATCH_SECURE) + security = 0; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = __gen6_emit_bb_start(cs, offset, security); + intel_ring_advance(rq, cs); + + return 0; +} + +int +hsw_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 security; + u32 *cs; + + security = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW; + if (dispatch_flags & I915_DISPATCH_SECURE) + security = 0; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = __gen6_emit_bb_start(cs, offset, security); + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen7_stall_cs(struct i915_request *rq) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + u32 *cs, flags = 0; + + /* + * Ensure that any following seqno writes only happen when the render + * cache is indeed flushed. + * + * Workaround: 4th PIPE_CONTROL command (except the ones with only + * read-cache invalidate bits set) must have the CS_STALL bit set. We + * don't try to be clever and just set it unconditionally. + */ + flags |= PIPE_CONTROL_CS_STALL; + + /* + * CS_STALL suggests at least a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + /* + * Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. + */ + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + } + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; + + /* + * Workaround: we must issue a pipe_control with CS-stall bit + * set before a pipe_control command that has the state cache + * invalidate bit set. + */ + gen7_stall_cs(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) +{ + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +#define GEN7_XCS_WA 32 +u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) +{ + int i; + + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + for (i = 0; i < GEN7_XCS_WA; i++) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + } + + *cs++ = MI_FLUSH_DW; + *cs++ = 0; + *cs++ = 0; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} +#undef GEN7_XCS_WA + +void gen6_irq_enable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + + /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ + ENGINE_POSTING_READ(engine, RING_IMR); + + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); +} + +void gen6_irq_disable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); +} + +void hsw_irq_enable_vecs(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask); + + /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ + ENGINE_POSTING_READ(engine, RING_IMR); + + gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask); +} + +void hsw_irq_disable_vecs(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~0); + gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask); +} diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.h b/drivers/gpu/drm/i915/gt/gen6_engine_cs.h new file mode 100644 index 000000000000..76c6bc9f3bde --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN6_ENGINE_CS_H__ +#define __GEN6_ENGINE_CS_H__ + +#include + +#include "intel_gpu_commands.h" + +struct i915_request; +struct intel_engine_cs; + +int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode); +int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode); +int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode); +u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs); + +int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode); +u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs); + +int gen6_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); +int hsw_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); + +void gen6_irq_enable(struct intel_engine_cs *engine); +void gen6_irq_disable(struct intel_engine_cs *engine); + +void hsw_irq_enable_vecs(struct intel_engine_cs *engine); +void hsw_irq_disable_vecs(struct intel_engine_cs *engine); + +#endif /* __GEN6_ENGINE_CS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 9bf6d4989968..791897f8d847 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -187,7 +187,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index ca7286e58409..96881cd8b17b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -27,21 +27,15 @@ * */ -#include - -#include "gem/i915_gem_context.h" - +#include "gen2_engine_cs.h" +#include "gen6_engine_cs.h" #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" -#include "i915_trace.h" #include "intel_context.h" #include "intel_gt.h" -#include "intel_gt_irq.h" -#include "intel_gt_pm_irq.h" #include "intel_reset.h" #include "intel_ring.h" -#include "intel_workarounds.h" #include "shmem_utils.h" /* Rough estimate of the typical request size, performing a flush, @@ -49,436 +43,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -static int -gen2_render_ring_flush(struct i915_request *rq, u32 mode) -{ - unsigned int num_store_dw; - u32 cmd, *cs; - - cmd = MI_FLUSH; - num_store_dw = 0; - if (mode & EMIT_INVALIDATE) - cmd |= MI_READ_FLUSH; - if (mode & EMIT_FLUSH) - num_store_dw = 4; - - cs = intel_ring_begin(rq, 2 + 3 * num_store_dw); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = cmd; - while (num_store_dw--) { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT); - *cs++ = 0; - } - *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen4_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 cmd, *cs; - int i; - - /* - * read/write caches: - * - * I915_GEM_DOMAIN_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is - * also flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * - * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if - * MI_READ_FLUSH is set, and is always flushed on 965. - * - * I915_GEM_DOMAIN_COMMAND may not exist? - * - * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is - * invalidated when MI_EXE_FLUSH is set. - * - * I915_GEM_DOMAIN_VERTEX, which exists on 965, is - * invalidated with every MI_FLUSH. - * - * TLBs: - * - * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND - * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and - * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER - * are flushed at any MI_FLUSH. - */ - - cmd = MI_FLUSH; - if (mode & EMIT_INVALIDATE) { - cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5)) - cmd |= MI_INVALIDATE_ISP; - } - - i = 2; - if (mode & EMIT_INVALIDATE) - i += 20; - - cs = intel_ring_begin(rq, i); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = cmd; - - /* - * A random delay to let the CS invalidate take effect? Without this - * delay, the GPU relocation path fails as the CS does not see - * the updated contents. Just as important, if we apply the flushes - * to the EMIT_FLUSH branch (i.e. immediately after the relocation - * write and before the invalidate on the next batch), the relocations - * still fail. This implies that is a delay following invalidation - * that is required to reset the caches as opposed to a delay to - * ensure the memory is written. - */ - if (mode & EMIT_INVALIDATE) { - *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT) | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - - for (i = 0; i < 12; i++) - *cs++ = MI_FLUSH; - - *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT) | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - } - - *cs++ = cmd; - - intel_ring_advance(rq, cs); - - return 0; -} - -/* - * Emits a PIPE_CONTROL with a non-zero post-sync operation, for - * implementing two workarounds on gen6. From section 1.4.7.1 - * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: - * - * [DevSNB-C+{W/A}] Before any depth stall flush (including those - * produced by non-pipelined state commands), software needs to first - * send a PIPE_CONTROL with no bits set except Post-Sync Operation != - * 0. - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable - * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. - * - * And the workaround for these two requires this workaround first: - * - * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent - * BEFORE the pipe-control with a post-sync op and no write-cache - * flushes. - * - * And this last workaround is tricky because of the requirements on - * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM - * volume 2 part 1: - * - * "1 of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - Notify Enable ([8] of DW1)" - * - * The cache flushes require the workaround flush that triggered this - * one, so we can't use it. Depth stall would trigger the same. - * Post-sync nonzero is what triggered this second workaround, so we - * can't use that one either. Notify enable is IRQs, which aren't - * really our business. That leaves only stall at scoreboard. - */ -static int -gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) -{ - u32 scratch_addr = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - u32 *cs; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; /* low dword */ - *cs++ = 0; /* high dword */ - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen6_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 scratch_addr = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - u32 *cs, flags = 0; - int ret; - - /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = gen6_emit_post_sync_nonzero_flush(rq); - if (ret) - return ret; - - /* Just flush everything. Experiments have shown that reducing the - * number of bits based on the write domains has little performance - * impact. - */ - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* - * Ensure that any following seqno writes only happen - * when the render cache is indeed flushed. - */ - flags |= PIPE_CONTROL_CS_STALL; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = flags; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = 0; - *cs++ = 0; - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT) | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - - /* Finally we can flush and with it emit the breadcrumb */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -static int -gen7_render_ring_cs_stall_wa(struct i915_request *rq) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = 0; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen7_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 scratch_addr = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - u32 *cs, flags = 0; - - /* - * Ensure that any following seqno writes only happen when the render - * cache is indeed flushed. - * - * Workaround: 4th PIPE_CONTROL command (except the ones with only - * read-cache invalidate bits set) must have the CS_STALL bit set. We - * don't try to be clever and just set it unconditionally. - */ - flags |= PIPE_CONTROL_CS_STALL; - - /* - * CS_STALL suggests at least a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - /* Just flush everything. Experiments have shown that reducing the - * number of bits based on the write domains has little performance - * impact. - */ - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; - - /* Workaround: we must issue a pipe_control with CS-stall bit - * set before a pipe_control command that has the state cache - * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(rq); - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN7_XCS_WA 32 -static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | - MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->fence.seqno; - - for (i = 0; i < GEN7_XCS_WA; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - } - - *cs++ = MI_FLUSH_DW; - *cs++ = 0; - *cs++ = 0; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} -#undef GEN7_XCS_WA - static void set_hwstam(struct intel_engine_cs *engine, u32 mask) { /* @@ -918,255 +482,6 @@ static void i9xx_submit_request(struct i915_request *request) intel_ring_set_tail(request->ring, request->tail)); } -static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN5_WA_STORES 8 /* must be at least 1! */ -static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - BUILD_BUG_ON(GEN5_WA_STORES < 1); - for (i = 0; i < GEN5_WA_STORES; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - } - - *cs++ = MI_USER_INTERRUPT; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} -#undef GEN5_WA_STORES - -static void -gen5_irq_enable(struct intel_engine_cs *engine) -{ - gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -gen5_irq_disable(struct intel_engine_cs *engine) -{ - gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -i9xx_irq_enable(struct intel_engine_cs *engine) -{ - engine->i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); - intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); -} - -static void -i9xx_irq_disable(struct intel_engine_cs *engine) -{ - engine->i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); -} - -static void -i8xx_irq_enable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); - ENGINE_POSTING_READ16(engine, RING_IMR); -} - -static void -i8xx_irq_disable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); -} - -static int -bsd_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_FLUSH; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - return 0; -} - -static void -gen6_irq_enable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - - /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ - ENGINE_POSTING_READ(engine, RING_IMR); - - gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -gen6_irq_disable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -hsw_vebox_irq_enable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask); - - /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ - ENGINE_POSTING_READ(engine, RING_IMR); - - gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask); -} - -static void -hsw_vebox_irq_disable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~0); - gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask); -} - -static int -i965_emit_bb_start(struct i915_request *rq, - u64 offset, u32 length, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & - I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ -#define I830_BATCH_LIMIT SZ_256K -#define I830_TLB_ENTRIES (2) -#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) -static int -i830_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs, cs_offset = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT); - - GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Evict the invalid PTE TLBs */ - *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; - *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ - *cs++ = cs_offset; - *cs++ = 0xdeadbeef; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { - if (len > I830_BATCH_LIMIT) - return -ENOSPC; - - cs = intel_ring_begin(rq, 6 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Blit the batch (which has now all relocs applied) to the - * stable batch scratch bo area (so that the CS never - * stumbles over its tlb invalidation bug) ... - */ - *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; - *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; - *cs++ = cs_offset; - *cs++ = 4096; - *cs++ = offset; - - *cs++ = MI_FLUSH; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - /* ... and execute it. */ - offset = cs_offset; - } - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : - MI_BATCH_NON_SECURE); - intel_ring_advance(rq, cs); - - return 0; -} - -static int -i915_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : - MI_BATCH_NON_SECURE); - intel_ring_advance(rq, cs); - - return 0; -} - static void __ring_context_fini(struct intel_context *ce) { i915_vma_put(ce->state); @@ -1704,99 +1019,6 @@ static void gen6_bsd_submit_request(struct i915_request *request) intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); } -static int mi_flush_dw(struct i915_request *rq, u32 flags) -{ - u32 cmd, *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cmd = MI_FLUSH_DW; - - /* - * We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). - */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - - /* - * Bspec vol 1c.3 - blitter engine command streamer: - * "If ENABLED, all TLBs will be invalidated once the flush - * operation is complete. This bit is only valid when the - * Post-Sync Operation field is a value of 1h or 3h." - */ - cmd |= flags; - - *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) -{ - return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0); -} - -static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) -{ - return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); -} - -static int -hsw_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW); - /* bit0-7 is the length on GEN6+ */ - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen6_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965); - /* bit0-7 is the length on GEN6+ */ - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -/* Blitter support (SandyBridge+) */ - -static int gen6_ring_flush(struct i915_request *rq, u32 mode) -{ - return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); -} - static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; @@ -1843,11 +1065,11 @@ static void setup_irq(struct intel_engine_cs *engine) engine->irq_enable = gen5_irq_enable; engine->irq_disable = gen5_irq_disable; } else if (INTEL_GEN(i915) >= 3) { - engine->irq_enable = i9xx_irq_enable; - engine->irq_disable = i9xx_irq_disable; + engine->irq_enable = gen3_irq_enable; + engine->irq_disable = gen3_irq_disable; } else { - engine->irq_enable = i8xx_irq_enable; - engine->irq_disable = i8xx_irq_disable; + engine->irq_enable = gen2_irq_enable; + engine->irq_disable = gen2_irq_disable; } } @@ -1874,7 +1096,7 @@ static void setup_common(struct intel_engine_cs *engine) * equivalent to our next initial bread so we can elide * engine->emit_init_breadcrumb(). */ - engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; if (IS_GEN(i915, 5)) engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; @@ -1883,11 +1105,11 @@ static void setup_common(struct intel_engine_cs *engine) if (INTEL_GEN(i915) >= 6) engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(i915) >= 4) - engine->emit_bb_start = i965_emit_bb_start; + engine->emit_bb_start = gen4_emit_bb_start; else if (IS_I830(i915) || IS_I845G(i915)) engine->emit_bb_start = i830_emit_bb_start; else - engine->emit_bb_start = i915_emit_bb_start; + engine->emit_bb_start = gen3_emit_bb_start; } static void setup_rcs(struct intel_engine_cs *engine) @@ -1900,18 +1122,18 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (INTEL_GEN(i915) >= 7) { - engine->emit_flush = gen7_render_ring_flush; - engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; + engine->emit_flush = gen7_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; } else if (IS_GEN(i915, 6)) { - engine->emit_flush = gen6_render_ring_flush; - engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; + engine->emit_flush = gen6_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; } else if (IS_GEN(i915, 5)) { - engine->emit_flush = gen4_render_ring_flush; + engine->emit_flush = gen4_emit_flush_rcs; } else { if (INTEL_GEN(i915) < 4) - engine->emit_flush = gen2_render_ring_flush; + engine->emit_flush = gen2_emit_flush; else - engine->emit_flush = gen4_render_ring_flush; + engine->emit_flush = gen4_emit_flush_rcs; engine->irq_enable_mask = I915_USER_INTERRUPT; } @@ -1929,15 +1151,15 @@ static void setup_vcs(struct intel_engine_cs *engine) /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN(i915, 6)) engine->set_default_submission = gen6_bsd_set_default_submission; - engine->emit_flush = gen6_bsd_ring_flush; + engine->emit_flush = gen6_emit_flush_vcs; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; if (IS_GEN(i915, 6)) - engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; else - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } else { - engine->emit_flush = bsd_ring_flush; + engine->emit_flush = gen4_emit_flush_vcs; if (IS_GEN(i915, 5)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else @@ -1949,13 +1171,13 @@ static void setup_bcs(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - engine->emit_flush = gen6_ring_flush; + engine->emit_flush = gen6_emit_flush_xcs; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; if (IS_GEN(i915, 6)) - engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; else - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } static void setup_vecs(struct intel_engine_cs *engine) @@ -1964,12 +1186,12 @@ static void setup_vecs(struct intel_engine_cs *engine) GEM_BUG_ON(INTEL_GEN(i915) < 7); - engine->emit_flush = gen6_ring_flush; + engine->emit_flush = gen6_emit_flush_xcs; engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - engine->irq_enable = hsw_vebox_irq_enable; - engine->irq_disable = hsw_vebox_irq_disable; + engine->irq_enable = hsw_irq_enable_vecs; + engine->irq_disable = hsw_irq_disable_vecs; - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, From 4e408a720a0530ae880378cb5830e7e812fefa4b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 08:24:13 +0100 Subject: [PATCH 101/222] drm/i915/gt: Move legacy context wa to intel_workarounds Use the central mechanism for recording and verifying that we restore the w/a for the older devices as well. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601072446.19548-3-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gt/intel_ring_submission.c | 28 ----------------- drivers/gpu/drm/i915/gt/intel_workarounds.c | 31 +++++++++++++++++++ 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 96881cd8b17b..d9c1701061b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -429,32 +429,6 @@ static void reset_finish(struct intel_engine_cs *engine) { } -static int rcs_resume(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - - /* - * Disable CONSTANT_BUFFER before it is loaded from the context - * image. For as it is loaded, it is executed and the stored - * address may no longer be valid, leading to a GPU hang. - * - * This imposes the requirement that userspace reload their - * CONSTANT_BUFFER on every batch, fortunately a requirement - * they are already accustomed to from before contexts were - * enabled. - */ - if (IS_GEN(i915, 4)) - intel_uncore_write(uncore, ECOSKPD, - _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); - - if (IS_GEN_RANGE(i915, 6, 7)) - intel_uncore_write(uncore, INSTPM, - _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - - return xcs_resume(engine); -} - static void reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; @@ -1139,8 +1113,6 @@ static void setup_rcs(struct intel_engine_cs *engine) if (IS_HASWELL(i915)) engine->emit_bb_start = hsw_emit_bb_start; - - engine->resume = rcs_resume; } static void setup_vcs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index fa1e15657663..94d66a9d760d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -199,6 +199,18 @@ wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) #define WA_SET_FIELD_MASKED(addr, mask, value) \ wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value))) +static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); +} + +static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); +} + static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -638,6 +650,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, chv_ctx_workarounds_init(engine, wal); else if (IS_BROADWELL(i915)) bdw_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 7)) + gen7_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 6)) + gen6_ctx_workarounds_init(engine, wal); else if (INTEL_GEN(i915) < 8) return; else @@ -1583,6 +1599,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), /* XXX bit doesn't stick on Broadwater */ IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); + + if (IS_GEN(i915, 4)) + /* + * Disable CONSTANT_BUFFER before it is loaded from the context + * image. For as it is loaded, it is executed and the stored + * address may no longer be valid, leading to a GPU hang. + * + * This imposes the requirement that userspace reload their + * CONSTANT_BUFFER on every batch, fortunately a requirement + * they are already accustomed to from before contexts were + * enabled. + */ + wa_add(wal, ECOSKPD, + 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), + 0 /* XXX bit doesn't stick on Broadwater */); } static void From dbf4081ffb68c0d9b518a34c715a8d8681658411 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:08 +0300 Subject: [PATCH 102/222] drm/i915/params: don't expose inject_probe_failure in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter only makes sense as a module parameter only. Fixes: c43c5a8818d4 ("drm/i915/params: add i915 parameters to debugfs") Cc: Juha-Pekka Heikkilä Cc: Venkata Sandeep Dhanalakota Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_params.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 45323732f099..4f21bfffbf0e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -64,7 +64,7 @@ struct drm_printer; param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ param(int, edp_vswing, 0, 0400) \ param(unsigned int, reset, 3, 0600) \ - param(unsigned int, inject_probe_failure, 0, 0600) \ + param(unsigned int, inject_probe_failure, 0, 0) \ param(int, fastboot, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ From f322e851f20e534cf5305332a9ad5eefadb55d56 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:09 +0300 Subject: [PATCH 103/222] drm/i915/params: fix i915.fake_lmem_start module param sysfs permissions fake_lmem_start does not need to be mutable via module param sysfs. It's only used during driver probe. Fixes: 1629224324b6 ("drm/i915/lmem: add the fake lmem region") Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index add00ec1f787..a3dde770226d 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -173,7 +173,7 @@ i915_param_named(enable_gvt, bool, 0400, #endif #if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) -i915_param_named_unsafe(fake_lmem_start, ulong, 0600, +i915_param_named_unsafe(fake_lmem_start, ulong, 0400, "Fake LMEM start offset (default: 0)"); #endif From db80066cf33c3a50abb2418beab9b042c6f9a44b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:10 +0300 Subject: [PATCH 104/222] drm/i915/params: prevent changing module params runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only support runtime changes through the debugfs. i915.verbose_state_checks remains an exception, and is not exposed via debugfs. This depends on IGT having been updated to use the debugfs for modifying the parameters. Cc: Juha-Pekka Heikkilä Cc: Venkata Sandeep Dhanalakota Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_params.c | 38 +++++++++++++++++++----------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index a3dde770226d..ace44ad7e6df 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -40,6 +40,15 @@ struct i915_params i915_modparams __read_mostly = { #undef MEMBER }; +/* + * Note: As a rule, keep module parameter sysfs permissions read-only + * 0400. Runtime changes are only supported through i915 debugfs. + * + * For any exceptions requiring write access and runtime changes through module + * parameter sysfs, prevent debugfs file creation by setting the parameter's + * debugfs mode to 0. + */ + i915_param_named(modeset, int, 0400, "Use kernel modesetting [KMS] (0=disable, " "1=on, -1=force vga console preference [default])"); @@ -49,7 +58,7 @@ i915_param_named_unsafe(enable_dc, int, 0400, "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); -i915_param_named_unsafe(enable_fbc, int, 0600, +i915_param_named_unsafe(enable_fbc, int, 0400, "Enable frame buffer compression for power savings " "(default: -1 (use per-chip default))"); @@ -57,7 +66,7 @@ i915_param_named_unsafe(lvds_channel_mode, int, 0400, "Specify LVDS channel mode " "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)"); -i915_param_named_unsafe(panel_use_ssc, int, 0600, +i915_param_named_unsafe(panel_use_ssc, int, 0400, "Use Spread Spectrum Clock with panels [LVDS/eDP] " "(default: auto from VBT)"); @@ -65,25 +74,25 @@ i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400, "Override/Ignore selection of SDVO panel mode in the VBT " "(-2=ignore, -1=auto [default], index in VBT BIOS table)"); -i915_param_named_unsafe(reset, int, 0600, +i915_param_named_unsafe(reset, int, 0400, "Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])"); i915_param_named_unsafe(vbt_firmware, charp, 0400, "Load VBT from specified file under /lib/firmware"); #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) -i915_param_named(error_capture, bool, 0600, +i915_param_named(error_capture, bool, 0400, "Record the GPU state following a hang. " "This information in /sys/class/drm/card/error is vital for " "triaging and debugging hangs."); #endif -i915_param_named_unsafe(enable_hangcheck, bool, 0600, +i915_param_named_unsafe(enable_hangcheck, bool, 0400, "Periodically check GPU activity for detecting hangs. " "WARNING: Disabling this can cause system wide hangs. " "(default: true)"); -i915_param_named_unsafe(enable_psr, int, 0600, +i915_param_named_unsafe(enable_psr, int, 0400, "Enable PSR " "(0=disabled, 1=enabled) " "Default: -1 (use per-chip default)"); @@ -96,22 +105,22 @@ i915_param_named_unsafe(disable_power_well, int, 0400, "Disable display power wells when possible " "(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)"); -i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); +i915_param_named_unsafe(enable_ips, int, 0400, "Enable IPS (default: true)"); -i915_param_named(fastboot, int, 0600, +i915_param_named(fastboot, int, 0400, "Try to skip unnecessary mode sets at boot time " "(0=disabled, 1=enabled) " "Default: -1 (use per-chip default)"); -i915_param_named_unsafe(load_detect_test, bool, 0600, +i915_param_named_unsafe(load_detect_test, bool, 0400, "Force-enable the VGA load detect code for testing (default:false). " "For developers only."); -i915_param_named_unsafe(force_reset_modeset_test, bool, 0600, +i915_param_named_unsafe(force_reset_modeset_test, bool, 0400, "Force a modeset during gpu reset for testing (default:false). " "For developers only."); -i915_param_named_unsafe(invert_brightness, int, 0600, +i915_param_named_unsafe(invert_brightness, int, 0400, "Invert backlight brightness " "(-1 force normal, 0 machine defaults, 1 force inversion), please " "report PCI device ID, subsystem vendor and subsystem device ID " @@ -121,10 +130,11 @@ i915_param_named_unsafe(invert_brightness, int, 0600, i915_param_named(disable_display, bool, 0400, "Disable display (default: false)"); -i915_param_named(mmio_debug, int, 0600, +i915_param_named(mmio_debug, int, 0400, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); +/* Special case writable file */ i915_param_named(verbose_state_checks, bool, 0600, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); @@ -155,7 +165,7 @@ i915_param_named_unsafe(huc_firmware_path, charp, 0400, i915_param_named_unsafe(dmc_firmware_path, charp, 0400, "DMC firmware path to use instead of the default one"); -i915_param_named_unsafe(enable_dp_mst, bool, 0600, +i915_param_named_unsafe(enable_dp_mst, bool, 0400, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) @@ -163,7 +173,7 @@ i915_param_named_unsafe(inject_probe_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); #endif -i915_param_named(enable_dpcd_backlight, int, 0600, +i915_param_named(enable_dpcd_backlight, int, 0400, "Enable support for DPCD backlight control" "(-1=use per-VBT LFP backlight type setting [default], 0=disabled, 1=enabled)"); From d161306161f8cae29e5672c4fd1c42cf7e9ae95a Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Fri, 22 May 2020 13:26:30 -0700 Subject: [PATCH 105/222] drm/i915/dsi: Dont forget to clean up the connector on error (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an error is encountered during the DSI initialization setup, the drm connector object also needs to be cleaned up along with the encoder. The error can happen due to a missing mode in the VBT or for other reasons. v2: Rephrase the commit message to make it more clear. Cc: Jani Nikula Cc: Vandita Kulkarni Signed-off-by: Vivek Kasireddy Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200522202630.7604-1-vivek.kasireddy@intel.com --- drivers/gpu/drm/i915/display/icl_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 25200f289e6e..8c55f5bee9ab 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1949,6 +1949,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) return; err: + drm_connector_cleanup(connector); drm_encoder_cleanup(&encoder->base); kfree(intel_dsi); kfree(intel_connector); From c95ebab1c7c4c400bb2922b8e9d443747cd645b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jun 2020 16:48:39 +0100 Subject: [PATCH 106/222] drm/i915/selftests: Ignore autoincrementing timestamp on verfifying whitelists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a timestamp will automatically update itself, it will not hold only contexts we write into it, and will change from the baseline value making us suspect that our writes are landing. As this confuses us and we would need more careful treatment to detect invalid stores into the timestamp, skip it when verifying the whitelists. Signed-off-by: Chris Wilson Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200602154839.6902-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 32785463ec9e..febc9e6692ba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -417,6 +417,20 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) return false; } +static bool timestamp(const struct intel_engine_cs *engine, u32 reg) +{ + reg = (reg - engine->mmio_base) & ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; + switch (reg) { + case 0x358: + case 0x35c: + case 0x3a8: + return true; + + default: + return false; + } +} + static bool ro_register(u32 reg) { if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == @@ -497,6 +511,9 @@ static int check_dirty_whitelist(struct intel_context *ce) if (wo_register(engine, reg)) continue; + if (timestamp(engine, reg)) + continue; /* timestamps are expected to autoincrement */ + ro_reg = ro_register(reg); /* Clear non priv flags */ From 5f4ae2704d59ee02b6e6d1a20e0ecfd273ae758c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jun 2020 15:05:40 +0100 Subject: [PATCH 107/222] drm/i915: Identify Cometlake platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cometlake is a small refresh of Coffeelake, but since we have found out a difference in the plaforms, we need to identify them as separate platforms. Since we previously took Coffeelake/Cometlake as identical, update all IS_COFFEELAKE() to also include IS_COMETLAKE(). Signed-off-by: Chris Wilson Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200602140541.5481-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_csr.c | 4 ++- drivers/gpu/drm/i915/display/intel_ddi.c | 34 +++++++++++++------ drivers/gpu/drm/i915/display/intel_hdcp.c | 7 ++-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 18 +++++++---- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +- drivers/gpu/drm/i915/gvt/display.c | 30 +++++++++++------ drivers/gpu/drm/i915/gvt/edid.c | 2 +- drivers/gpu/drm/i915/gvt/handlers.c | 17 ++++++---- drivers/gpu/drm/i915/i915_drv.h | 9 ++++++ drivers/gpu/drm/i915/i915_pci.c | 22 ++++++++++--- drivers/gpu/drm/i915/intel_device_info.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_gvt.c | 2 ++ drivers/gpu/drm/i915/intel_pch.c | 36 ++++++++++++++------- drivers/gpu/drm/i915/intel_pm.c | 10 ++++-- 15 files changed, 140 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index 319932b03e88..9843c9af6c13 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -707,7 +707,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) csr->fw_path = GLK_CSR_PATH; csr->required_version = GLK_CSR_VERSION_REQUIRED; csr->max_fw_size = GLK_CSR_MAX_FW_SIZE; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + } else if (IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) { csr->fw_path = KBL_CSR_PATH; csr->required_version = KBL_CSR_VERSION_REQUIRED; csr->max_fw_size = KBL_CSR_MAX_FW_SIZE; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index cd211f48c401..bb8107ab5a51 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -722,10 +722,14 @@ skl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) static const struct ddi_buf_trans * kbl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) { - if (IS_KBL_ULX(dev_priv) || IS_CFL_ULX(dev_priv)) { + if (IS_KBL_ULX(dev_priv) || + IS_CFL_ULX(dev_priv) || + IS_CML_ULX(dev_priv)) { *n_entries = ARRAY_SIZE(kbl_y_ddi_translations_dp); return kbl_y_ddi_translations_dp; - } else if (IS_KBL_ULT(dev_priv) || IS_CFL_ULT(dev_priv)) { + } else if (IS_KBL_ULT(dev_priv) || + IS_CFL_ULT(dev_priv) || + IS_CML_ULT(dev_priv)) { *n_entries = ARRAY_SIZE(kbl_u_ddi_translations_dp); return kbl_u_ddi_translations_dp; } else { @@ -738,12 +742,16 @@ static const struct ddi_buf_trans * skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { if (dev_priv->vbt.edp.low_vswing) { - if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv)) { + if (IS_SKL_ULX(dev_priv) || + IS_KBL_ULX(dev_priv) || + IS_CFL_ULX(dev_priv) || + IS_CML_ULX(dev_priv)) { *n_entries = ARRAY_SIZE(skl_y_ddi_translations_edp); return skl_y_ddi_translations_edp; - } else if (IS_SKL_ULT(dev_priv) || IS_KBL_ULT(dev_priv) || - IS_CFL_ULT(dev_priv)) { + } else if (IS_SKL_ULT(dev_priv) || + IS_KBL_ULT(dev_priv) || + IS_CFL_ULT(dev_priv) || + IS_CML_ULT(dev_priv)) { *n_entries = ARRAY_SIZE(skl_u_ddi_translations_edp); return skl_u_ddi_translations_edp; } else { @@ -752,7 +760,9 @@ skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) return kbl_get_buf_trans_dp(dev_priv, n_entries); else return skl_get_buf_trans_dp(dev_priv, n_entries); @@ -761,8 +771,10 @@ skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) static const struct ddi_buf_trans * skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) { - if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv)) { + if (IS_SKL_ULX(dev_priv) || + IS_KBL_ULX(dev_priv) || + IS_CFL_ULX(dev_priv) || + IS_CML_ULX(dev_priv)) { *n_entries = ARRAY_SIZE(skl_y_ddi_translations_hdmi); return skl_y_ddi_translations_hdmi; } else { @@ -784,7 +796,9 @@ static const struct ddi_buf_trans * intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, enum port port, int *n_entries) { - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + if (IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) { const struct ddi_buf_trans *ddi_translations = kbl_get_buf_trans_dp(dev_priv, n_entries); *n_entries = skl_buf_trans_num_entries(port, *n_entries); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 2cbc4619b4ce..815b054bb167 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1923,8 +1923,11 @@ static bool is_hdcp2_supported(struct drm_i915_private *dev_priv) if (!IS_ENABLED(CONFIG_INTEL_MEI_HDCP)) return false; - return (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)); + return (INTEL_GEN(dev_priv) >= 10 || + IS_GEMINILAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)); } void intel_hdcp_component_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 94d66a9d760d..6e1accbcc045 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -361,7 +361,10 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, HDC_FORCE_NON_COHERENT); /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) + if (IS_SKYLAKE(i915) || + IS_KABYLAKE(i915) || + IS_COFFEELAKE(i915) || + IS_COMETLAKE(i915)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); @@ -636,7 +639,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) cnl_ctx_workarounds_init(engine, wal); - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_ctx_workarounds_init(engine, wal); else if (IS_GEMINILAKE(i915)) glk_ctx_workarounds_init(engine, wal); @@ -706,7 +709,7 @@ static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { /* WaDisableKillLogic:bxt,skl,kbl */ - if (!IS_COFFEELAKE(i915)) + if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, GAM_ECOCHK, ECOCHK_DIS_TLB); @@ -969,7 +972,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) cnl_gt_workarounds_init(i915, wal); - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_gt_workarounds_init(i915, wal); else if (IS_GEMINILAKE(i915)) glk_gt_workarounds_init(i915, wal); @@ -1304,7 +1307,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) cnl_whitelist_build(engine); - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_whitelist_build(engine); else if (IS_GEMINILAKE(i915)) glk_whitelist_build(engine); @@ -1515,7 +1518,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN9_FFSC_PERCTX_PREEMPT_CTRL); } - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { + if (IS_SKYLAKE(i915) || + IS_KABYLAKE(i915) || + IS_COFFEELAKE(i915) || + IS_COMETLAKE(i915)) { /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ wa_write_or(wal, GEN8_GARBCNTL, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 9b6218128d09..e75be3999358 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -55,7 +55,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ - fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index a83df2f84eb9..776a73a19503 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -199,8 +199,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) { vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |= @@ -275,8 +277,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) && + if ((IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } @@ -459,8 +463,10 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) struct drm_i915_private *i915 = vgpu->gvt->gt->i915; /* TODO: add more platforms support */ - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || - IS_COFFEELAKE(i915)) { + if (IS_SKYLAKE(i915) || + IS_KABYLAKE(i915) || + IS_COFFEELAKE(i915) || + IS_COMETLAKE(i915)) { if (connected) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; @@ -488,8 +494,10 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) clean_virtual_dp_monitor(vgpu, PORT_D); else clean_virtual_dp_monitor(vgpu, PORT_B); @@ -512,8 +520,10 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) intel_vgpu_init_i2c_edid(vgpu); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, resolution); else diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index 190651df5db1..22247805c345 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -149,7 +149,7 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu, if (IS_BROXTON(i915)) port = bxt_get_port_from_gmbus0(pin_select); - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) port = cnp_get_port_from_gmbus0(pin_select); else port = get_port_from_gmbus0(pin_select); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 3e88e3b5c43a..26cae4846c82 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -59,7 +59,7 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) return D_KBL; else if (IS_BROXTON(i915)) return D_BXT; - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) return D_CFL; return 0; @@ -1435,7 +1435,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, case GEN9_PCODE_READ_MEM_LATENCY: if (IS_SKYLAKE(vgpu->gvt->gt->i915) || IS_KABYLAKE(vgpu->gvt->gt->i915) || - IS_COFFEELAKE(vgpu->gvt->gt->i915)) { + IS_COFFEELAKE(vgpu->gvt->gt->i915) || + IS_COMETLAKE(vgpu->gvt->gt->i915)) { /** * "Read memory latency" command on gen9. * Below memory latency values are read @@ -1460,7 +1461,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, case SKL_PCODE_CDCLK_CONTROL: if (IS_SKYLAKE(vgpu->gvt->gt->i915) || IS_KABYLAKE(vgpu->gvt->gt->i915) || - IS_COFFEELAKE(vgpu->gvt->gt->i915)) + IS_COFFEELAKE(vgpu->gvt->gt->i915) || + IS_COMETLAKE(vgpu->gvt->gt->i915)) *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; case GEN6_PCODE_READ_RC6VIDS: @@ -1722,7 +1724,8 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, int ret; (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(1); - if (IS_COFFEELAKE(vgpu->gvt->gt->i915)) + if (IS_COFFEELAKE(vgpu->gvt->gt->i915) || + IS_COMETLAKE(vgpu->gvt->gt->i915)) (*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(2); write_vreg(vgpu, offset, p_data, bytes); @@ -1731,7 +1734,8 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } - if (IS_COFFEELAKE(vgpu->gvt->gt->i915) && + if ((IS_COFFEELAKE(vgpu->gvt->gt->i915) || + IS_COMETLAKE(vgpu->gvt->gt->i915)) && data & _MASKED_BIT_ENABLE(2)) { enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); return 0; @@ -3393,7 +3397,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) goto err; } else if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || - IS_COFFEELAKE(i915)) { + IS_COFFEELAKE(i915) || + IS_COMETLAKE(i915)) { ret = init_bdw_mmio_info(gvt); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7436bd9f7f20..f1078e79cf6f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1411,6 +1411,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_KABYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_KABYLAKE) #define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE) #define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE) +#define IS_COMETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COMETLAKE) #define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE) #define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) #define IS_ELKHARTLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE) @@ -1459,6 +1460,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, INTEL_INFO(dev_priv)->gt == 2) #define IS_CFL_GT3(dev_priv) (IS_COFFEELAKE(dev_priv) && \ INTEL_INFO(dev_priv)->gt == 3) + +#define IS_CML_ULT(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_COMETLAKE, INTEL_SUBPLATFORM_ULT) +#define IS_CML_ULX(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_COMETLAKE, INTEL_SUBPLATFORM_ULX) +#define IS_CML_GT2(dev_priv) (IS_COMETLAKE(dev_priv) && \ + INTEL_INFO(dev_priv)->gt == 2) + #define IS_CNL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_CANNONLAKE, INTEL_SUBPLATFORM_PORTF) #define IS_ICL_WITH_PORT_F(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index eb6d4a0c9196..e6054b166fe8 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -766,6 +766,20 @@ static const struct intel_device_info cfl_gt3_info = { BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; +#define CML_PLATFORM \ + GEN9_FEATURES, \ + PLATFORM(INTEL_COMETLAKE) + +static const struct intel_device_info cml_gt1_info = { + CML_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info cml_gt2_info = { + CML_PLATFORM, + .gt = 2, +}; + #define GEN10_FEATURES \ GEN9_FEATURES, \ GEN(10), \ @@ -942,10 +956,10 @@ static const struct pci_device_id pciidlist[] = { INTEL_WHL_U_GT2_IDS(&cfl_gt2_info), INTEL_AML_CFL_GT2_IDS(&cfl_gt2_info), INTEL_WHL_U_GT3_IDS(&cfl_gt3_info), - INTEL_CML_GT1_IDS(&cfl_gt1_info), - INTEL_CML_GT2_IDS(&cfl_gt2_info), - INTEL_CML_U_GT1_IDS(&cfl_gt1_info), - INTEL_CML_U_GT2_IDS(&cfl_gt2_info), + INTEL_CML_GT1_IDS(&cml_gt1_info), + INTEL_CML_GT2_IDS(&cml_gt2_info), + INTEL_CML_U_GT1_IDS(&cml_gt1_info), + INTEL_CML_U_GT2_IDS(&cml_gt2_info), INTEL_CNL_IDS(&cnl_info), INTEL_ICL_11_IDS(&icl_info), INTEL_EHL_IDS(&ehl_info), diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index c245c10c9bee..544ac61fbc36 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -57,6 +57,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(KABYLAKE), PLATFORM_NAME(GEMINILAKE), PLATFORM_NAME(COFFEELAKE), + PLATFORM_NAME(COMETLAKE), PLATFORM_NAME(CANNONLAKE), PLATFORM_NAME(ICELAKE), PLATFORM_NAME(ELKHARTLAKE), diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index c912acd06109..3613c04904e0 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -73,6 +73,7 @@ enum intel_platform { INTEL_KABYLAKE, INTEL_GEMINILAKE, INTEL_COFFEELAKE, + INTEL_COMETLAKE, /* gen10 */ INTEL_CANNONLAKE, /* gen11 */ diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 21b91313cc5d..dd8981340d6e 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -52,6 +52,8 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) return true; if (IS_COFFEELAKE(dev_priv)) return true; + if (IS_COMETLAKE(dev_priv)) + return true; return false; } diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 102b03d24f90..c668e99eb2e4 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -64,37 +64,49 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n"); drm_WARN_ON(&dev_priv->drm, - !IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); + !IS_SKYLAKE(dev_priv) && + !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv) && + !IS_COMETLAKE(dev_priv)); return PCH_SPT; case INTEL_PCH_KBP_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n"); drm_WARN_ON(&dev_priv->drm, - !IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); + !IS_SKYLAKE(dev_priv) && + !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv) && + !IS_COMETLAKE(dev_priv)); /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n"); - drm_WARN_ON(&dev_priv->drm, !IS_CANNONLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, + !IS_CANNONLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv) && + !IS_COMETLAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake LP PCH (CNP-LP)\n"); - drm_WARN_ON(&dev_priv->drm, !IS_CANNONLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, + !IS_CANNONLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv) && + !IS_COMETLAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CMP_DEVICE_ID_TYPE: case INTEL_PCH_CMP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n"); - drm_WARN_ON(&dev_priv->drm, !IS_COFFEELAKE(dev_priv) && + drm_WARN_ON(&dev_priv->drm, + !IS_COFFEELAKE(dev_priv) && + !IS_COMETLAKE(dev_priv) && !IS_ROCKETLAKE(dev_priv)); /* CometPoint is CNP Compatible */ return PCH_CNP; case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n"); - drm_WARN_ON(&dev_priv->drm, !IS_COFFEELAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, + !IS_COFFEELAKE(dev_priv) && + !IS_COMETLAKE(dev_priv)); /* Comet Lake V PCH is based on KBP, which is SPT compatible */ return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: @@ -149,7 +161,9 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv) id = INTEL_PCH_MCC_DEVICE_ID_TYPE; else if (IS_ICELAKE(dev_priv)) id = INTEL_PCH_ICP_DEVICE_ID_TYPE; - else if (IS_CANNONLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + else if (IS_CANNONLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) id = INTEL_PCH_CNP_DEVICE_ID_TYPE; else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) id = INTEL_PCH_SPT_DEVICE_ID_TYPE; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b134a1b9d738..26b670fa3f88 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5256,7 +5256,9 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * WaIncreaseLatencyIPCEnabled: kbl,cfl * Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && + if ((IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) && dev_priv->ipc_enabled) latency += 4; @@ -6822,7 +6824,9 @@ static bool intel_can_enable_ipc(struct drm_i915_private *dev_priv) return false; /* Display WA #1141: SKL:all KBL:all CFL */ - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || + IS_COMETLAKE(dev_priv)) return dev_priv->dram_info.symmetric_memory; return true; @@ -7703,7 +7707,7 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = icl_init_clock_gating; else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.init_clock_gating = cnl_init_clock_gating; - else if (IS_COFFEELAKE(dev_priv)) + else if (IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; else if (IS_SKYLAKE(dev_priv)) dev_priv->display.init_clock_gating = skl_init_clock_gating; From dbc7e72897a4e565a69b6d3533c87cfeec4a2831 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jun 2020 16:48:39 +0100 Subject: [PATCH 108/222] drm/i915/gt: Make the CTX_TIMESTAMP readable on !rcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For reasons that be, the HW only allows usersace to read its own CTX_TIMESTAMP [context local HW runtime] on rcs. Make it available for all by adding it to the whitelists. v2: The change took effect from Cometlake. Signed-off-by: Chris Wilson Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200602154839.6902-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 25 ++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6e1accbcc045..0731bbcef06c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1206,6 +1206,18 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4); } +static void cml_whitelist_build(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + if (engine->class != RENDER_CLASS) + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); + + cfl_whitelist_build(engine); +} + static void cnl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1256,9 +1268,15 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; default: + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1290,6 +1308,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) whitelist_reg(w, HIZ_CHICKEN); break; default: + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1307,7 +1328,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) cnl_whitelist_build(engine); - else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) + else if (IS_COMETLAKE(i915)) + cml_whitelist_build(engine); + else if (IS_COFFEELAKE(i915)) cfl_whitelist_build(engine); else if (IS_GEMINILAKE(i915)) glk_whitelist_build(engine); From 6783ebda63c53d477ee8707a6e14bb09d48680de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Jun 2020 11:46:57 +0100 Subject: [PATCH 109/222] drm/i915/gt: Suppress the error message for GT init failure on error injection If we injected an error (such as pretending the GuC firmware was broken), then suppress the error message as it is expected and our CI complains if it sees any *ERROR*. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200603104657.25651-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 6bdb434a442d..f1d5333f9456 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -214,8 +214,8 @@ int intel_gt_resume(struct intel_gt *gt) /* Only when the HW is re-initialised, can we replay the requests */ err = intel_gt_init_hw(gt); if (err) { - drm_err(>->i915->drm, - "Failed to initialize GPU, declaring it wedged!\n"); + i915_probe_error(gt->i915, + "Failed to initialize GPU, declaring it wedged!\n"); goto err_wedged; } From 5a833995364141a3306e234500800277aea52334 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jun 2020 23:09:53 +0100 Subject: [PATCH 110/222] drm/i915: Drop i915_request.i915 backpointer We infrequently use the direct i915 backpointer from the i915_request, so do we really need to waste the space in the struct for it? 8 bytes from the most frequently allocated struct vs an 3 bytes and pointer chasing in using rq->engine->i915? Signed-off-by: Chris Wilson Reviewed-by: Akeem G Abodunrin Link: https://patchwork.freedesktop.org/patch/msgid/20200602220953.21178-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/gt/gen2_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_context_sseu.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++---- drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/selftest_mocs.c | 2 +- drivers/gpu/drm/i915/gt/selftest_rc6.c | 9 ++++----- drivers/gpu/drm/i915/gt/selftest_timeline.c | 4 ++-- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 12 ++++++------ drivers/gpu/drm/i915/i915_request.h | 3 --- drivers/gpu/drm/i915/i915_trace.h | 10 +++++----- drivers/gpu/drm/i915/selftests/i915_perf.c | 2 +- drivers/gpu/drm/i915/selftests/igt_spinner.c | 14 +++++++------- 17 files changed, 43 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 219a36995b96..02a5c0ce39ca 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1910,8 +1910,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) u32 *cs; int i; - if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { - drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n"); + if (!IS_GEN(rq->engine->i915, 7) || rq->engine->id != RCS0) { + drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 8d2e85081247..3fb0dc1fb910 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -77,7 +77,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5)) + if (IS_G4X(rq->engine->i915) || IS_GEN(rq->engine->i915, 5)) cmd |= MI_INVALIDATE_ISP; } diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 487299cb91f2..27ae48049239 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -30,7 +30,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); + *cs++ = intel_sseu_make_rpcs(rq->engine->i915, &sseu); intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c8c14981eb5d..e37490d459c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -661,7 +661,6 @@ static int measure_breadcrumb_dw(struct intel_context *ce) if (!frame) return -ENOMEM; - frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.context = ce; rcu_assign_pointer(frame->rq.timeline, ce->timeline); @@ -1192,8 +1191,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -static int print_sched_attr(struct drm_i915_private *i915, - const struct i915_sched_attr *attr, +static int print_sched_attr(const struct i915_sched_attr *attr, char *buf, int x, int len) { if (attr->priority == I915_PRIORITY_INVALID) @@ -1213,7 +1211,7 @@ static void print_request(struct drm_printer *m, char buf[80] = ""; int x = 0; - x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", prefix, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6fc0966b75ff..aac8da18694f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3533,7 +3533,7 @@ static int emit_pdps(struct i915_request *rq) int err, i; u32 *cs; - GEM_BUG_ON(intel_vgpu_active(rq->i915)); + GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); /* * Beware ye of the dragons, this sequence is magic! @@ -4512,11 +4512,11 @@ static int gen8_emit_flush_render(struct i915_request *request, * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (IS_GEN(request->i915, 9)) + if (IS_GEN(request->engine->i915, 9)) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0)) + if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0)) dc_flush_wa = true; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index d9c1701061b9..68a08486fc87 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -645,8 +645,8 @@ static inline int mi_set_context(struct i915_request *rq, struct intel_context *ce, u32 flags) { - struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; + struct drm_i915_private *i915 = engine->i915; enum intel_engine_id id; const int num_engines = IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; @@ -760,7 +760,7 @@ static inline int mi_set_context(struct i915_request *rq, static int remap_l3_slice(struct i915_request *rq, int slice) { - u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; + u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice]; int i; if (!remap_info) @@ -871,7 +871,7 @@ static int switch_context(struct i915_request *rq) void **residuals = NULL; int ret; - GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { if (engine->wa_ctx.vma->private != ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0731bbcef06c..30cd798b9664 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1757,7 +1757,7 @@ wa_list_srm(struct i915_request *rq, const struct i915_wa_list *wal, struct i915_vma *vma) { - struct drm_i915_private *i915 = rq->i915; + struct drm_i915_private *i915 = rq->engine->i915; unsigned int i, count = 0; const struct i915_wa *wa; u32 srm, *cs; @@ -1846,7 +1846,7 @@ static int engine_wa_list_verify(struct intel_context *ce, err = 0; for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) + if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg))) continue; if (!wa_verify(wa, results[i], wal->name, from)) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index f88e445a1cae..729c3c7b11e2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -49,7 +49,7 @@ static int write_timestamp(struct i915_request *rq, int slot) return PTR_ERR(cs); cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 8831ffee2061..7bae64018ad9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -143,7 +143,7 @@ static int read_mocs_table(struct i915_request *rq, { u32 addr; - if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915)) + if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915)) addr = global_mocs_offset(); else addr = mocs_offset(rq->engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 2dc460624bbc..3c8434846fa1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -132,7 +132,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce) } cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; @@ -197,10 +197,10 @@ int live_rc6_ctx_wa(void *arg) int pass; for (pass = 0; pass < 2; pass++) { + struct i915_gpu_error *error = >->i915->gpu_error; struct intel_context *ce; unsigned int resets = - i915_reset_engine_count(>->i915->gpu_error, - engine); + i915_reset_engine_count(error, engine); const u32 *res; /* Use a sacrifical context */ @@ -230,8 +230,7 @@ int live_rc6_ctx_wa(void *arg) engine->name, READ_ONCE(*res)); if (resets != - i915_reset_engine_count(>->i915->gpu_error, - engine)) { + i915_reset_engine_count(error, engine)) { pr_err("%s: GPU reset required\n", engine->name); add_taint_for_CI(TAINT_WARN); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index ef1c35073dc0..b2aad7ef046a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -426,12 +426,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) if (IS_ERR(cs)) return PTR_ERR(cs); - if (INTEL_GEN(rq->i915) >= 8) { + if (INTEL_GEN(rq->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = addr; *cs++ = 0; *cs++ = value; - } else if (INTEL_GEN(rq->i915) >= 4) { + } else if (INTEL_GEN(rq->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = addr; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3a9bd8e4d8db..1e4dd4544dcf 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -348,7 +348,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) u32 *cs; int err; - if (IS_GEN(req->i915, 9) && is_inhibit_context(req->context)) + if (IS_GEN(req->engine->i915, 9) && is_inhibit_context(req->context)) intel_vgpu_restore_inhibit_context(vgpu, req); /* @@ -935,7 +935,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0) + if (IS_BROADWELL(rq->engine->i915) && rq->engine->id == RCS0) context_page_num = 19; context_base = (void *) ctx->lrc_reg_state - diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c5d7220de529..3bb7320249ae 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -56,7 +56,7 @@ static struct i915_global_request { static const char *i915_fence_get_driver_name(struct dma_fence *fence) { - return dev_name(to_request(fence)->i915->drm.dev); + return dev_name(to_request(fence)->engine->i915->drm.dev); } static const char *i915_fence_get_timeline_name(struct dma_fence *fence) @@ -812,7 +812,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - rq->i915 = ce->engine->i915; rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; @@ -1011,12 +1010,12 @@ __emit_semaphore_wait(struct i915_request *to, struct i915_request *from, u32 seqno) { - const int has_token = INTEL_GEN(to->i915) >= 12; + const int has_token = INTEL_GEN(to->engine->i915) >= 12; u32 hwsp_offset; int len, err; u32 *cs; - GEM_BUG_ON(INTEL_GEN(to->i915) < 8); + GEM_BUG_ON(INTEL_GEN(to->engine->i915) < 8); GEM_BUG_ON(i915_request_has_initial_breadcrumb(to)); /* We need to pin the signaler's HWSP until we are finished reading. */ @@ -1211,7 +1210,7 @@ __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) { mark_external(rq); return i915_sw_fence_await_dma_fence(&rq->submit, fence, - i915_fence_context_timeout(rq->i915, + i915_fence_context_timeout(rq->engine->i915, fence->context), I915_FENCE_GFP); } @@ -1782,7 +1781,8 @@ long i915_request_wait(struct i915_request *rq, * (bad for battery). */ if (flags & I915_WAIT_PRIORITY) { - if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) + if (!i915_request_started(rq) && + INTEL_GEN(rq->engine->i915) >= 6) intel_rps_boost(rq); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 5d4709a3dace..118ab6650d1f 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -162,9 +162,6 @@ struct i915_request { struct dma_fence fence; spinlock_t lock; - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - /** * Context and ring buffer related to this request * Contexts are refcounted, so when this request is associated with a diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index bc854ad60954..a4addcc64978 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -735,7 +735,7 @@ TRACE_EVENT(i915_request_queue, ), TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; + __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -761,7 +761,7 @@ DECLARE_EVENT_CLASS(i915_request, ), TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; + __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -804,7 +804,7 @@ TRACE_EVENT(i915_request_in, ), TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; + __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -833,7 +833,7 @@ TRACE_EVENT(i915_request_out, ), TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; + __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -895,7 +895,7 @@ TRACE_EVENT(i915_request_wait_begin, * less desirable. */ TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; + __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 8eb3108f1767..be54570c407c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -162,7 +162,7 @@ static int write_timestamp(struct i915_request *rq, int slot) return PTR_ERR(cs); len = 5; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(rq->engine->i915) >= 8) len++; *cs++ = GFX_OP_PIPE_CONTROL(len); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index e35ba5f9e73f..699bfe0328fb 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -134,15 +134,15 @@ igt_spinner_create_request(struct igt_spinner *spin, batch = spin->batch; - if (INTEL_GEN(rq->i915) >= 8) { + if (INTEL_GEN(rq->engine->i915) >= 8) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); - } else if (INTEL_GEN(rq->i915) >= 6) { + } else if (INTEL_GEN(rq->engine->i915) >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; *batch++ = hws_address(hws, rq); - } else if (INTEL_GEN(rq->i915) >= 4) { + } else if (INTEL_GEN(rq->engine->i915) >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = hws_address(hws, rq); @@ -154,11 +154,11 @@ igt_spinner_create_request(struct igt_spinner *spin, *batch++ = arbitration_command; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(rq->engine->i915) >= 8) *batch++ = MI_BATCH_BUFFER_START | BIT(8) | 1; - else if (IS_HASWELL(rq->i915)) + else if (IS_HASWELL(rq->engine->i915)) *batch++ = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW; - else if (INTEL_GEN(rq->i915) >= 6) + else if (INTEL_GEN(rq->engine->i915) >= 6) *batch++ = MI_BATCH_BUFFER_START; else *batch++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; @@ -176,7 +176,7 @@ igt_spinner_create_request(struct igt_spinner *spin, } flags = 0; - if (INTEL_GEN(rq->i915) <= 5) + if (INTEL_GEN(rq->engine->i915) <= 5) flags |= I915_DISPATCH_SECURE; err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); From d1d08a4994a8142e1c47ad28949035c507293b03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:39 +0300 Subject: [PATCH 111/222] drm/i915: Fix cpt/ppt max pre-emphasis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpt/ppt support pre-emphasis level 3. Let's actually declare support for it, instead of clamping things to level 2. Also tweak the if-ladder in intel_dp_voltage_max() to match intel_dp_pre_emphasis_max() to make it easier to compare them. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-2-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_dp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4749d2fe2324..338f6d046de3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4000,12 +4000,11 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (HAS_DDI(dev_priv)) return intel_ddi_dp_voltage_max(encoder); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + (HAS_PCH_CPT(dev_priv) && port != PORT_A)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; - else if (HAS_PCH_CPT(dev_priv) && port != PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; } @@ -4019,7 +4018,8 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) if (HAS_DDI(dev_priv)) { return intel_ddi_dp_pre_emphasis_max(encoder, voltage_swing); - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: return DP_TRAIN_PRE_EMPH_LEVEL_3; From da882e6bb968b04195c32072376c4206f64037ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:40 +0300 Subject: [PATCH 112/222] drm/i915: Fix ibx max vswing/preemph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IBX supports vswing level 3 and pre-emphasis level 3. Don't limit it to level 2 for those. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-3-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 338f6d046de3..67cb712f2d8f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4001,7 +4001,7 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (HAS_DDI(dev_priv)) return intel_ddi_dp_voltage_max(encoder); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || - (HAS_PCH_CPT(dev_priv) && port != PORT_A)) + (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; @@ -4019,7 +4019,7 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) if (HAS_DDI(dev_priv)) { return intel_ddi_dp_pre_emphasis_max(encoder, voltage_swing); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || - (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { + (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: return DP_TRAIN_PRE_EMPH_LEVEL_3; From 33520eae45f1c97670c8de7adec1cb5ab6a94416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:41 +0300 Subject: [PATCH 113/222] drm/i915: Fix ivb cpu edp vswing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the DP spec supporting vswing 1 + preemph 2 is mandatory. We don't have the hw settings for that though. In order to pretend to follow the DP spec let's just select vswing 0 + preemph 2 in this case (the DP spec says to use the requested preemph in preference to the vswing when the requested values aren't supported). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-4-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_dp.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 67cb712f2d8f..04fc07016c1a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4003,8 +4003,6 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; else return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; } @@ -4031,16 +4029,6 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) default: return DP_TRAIN_PRE_EMPH_LEVEL_0; } - } else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) { - switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: - return DP_TRAIN_PRE_EMPH_LEVEL_2; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1: - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2: - return DP_TRAIN_PRE_EMPH_LEVEL_1; - default: - return DP_TRAIN_PRE_EMPH_LEVEL_0; - } } else { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: @@ -4336,6 +4324,7 @@ static u32 ivb_cpu_edp_signal_levels(u8 train_set) case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1: return EDP_LINK_TRAIN_400MV_3_5DB_IVB; case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2: + case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2: return EDP_LINK_TRAIN_400MV_6DB_IVB; case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: From 53de0a20c8cdfa4416a53818c822c68796e5f266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:42 +0300 Subject: [PATCH 114/222] drm/i915: Add {preemph,voltage}_max() vfuncs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Different platforms have different max vswing/preemph settings. Turn that into a pair vfuncs so we can decouple intel_dp.c and intel_ddi.c further. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-5-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare --- drivers/gpu/drm/i915/display/intel_ddi.c | 21 ++---- drivers/gpu/drm/i915/display/intel_ddi.h | 3 - .../drm/i915/display/intel_display_types.h | 3 + drivers/gpu/drm/i915/display/intel_dp.c | 69 ++++++------------- drivers/gpu/drm/i915/display/intel_dp.h | 4 -- .../drm/i915/display/intel_dp_link_training.c | 20 +++++- 6 files changed, 50 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index bb8107ab5a51..236f3762b6f9 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2109,10 +2109,10 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, ddi_translations[level].deemphasis); } -u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) +static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp) { + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; @@ -2165,19 +2165,9 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) * used on all DDI platforms. Should that change we need to * rethink this code. */ -u8 intel_ddi_dp_pre_emphasis_max(struct intel_encoder *encoder, u8 voltage_swing) +static u8 intel_ddi_dp_preemph_max(struct intel_dp *intel_dp) { - switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: - return DP_TRAIN_PRE_EMPH_LEVEL_3; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1: - return DP_TRAIN_PRE_EMPH_LEVEL_2; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2: - return DP_TRAIN_PRE_EMPH_LEVEL_1; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3: - default: - return DP_TRAIN_PRE_EMPH_LEVEL_0; - } + return DP_TRAIN_PRE_EMPH_LEVEL_3; } static void cnl_ddi_vswing_program(struct intel_encoder *encoder, @@ -4535,6 +4525,9 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) else intel_dig_port->dp.set_signal_levels = hsw_set_signal_levels; + intel_dig_port->dp.voltage_max = intel_ddi_dp_voltage_max; + intel_dig_port->dp.preemph_max = intel_ddi_dp_preemph_max; + if (INTEL_GEN(dev_priv) < 12) { intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port); intel_dig_port->dp.regs.dp_tp_status = DP_TP_STATUS(port); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index fbdf8ddde486..077e9dbbe367 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -42,9 +42,6 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state); u32 bxt_signal_levels(struct intel_dp *intel_dp); u32 ddi_signal_levels(struct intel_dp *intel_dp); -u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); -u8 intel_ddi_dp_pre_emphasis_max(struct intel_encoder *encoder, - u8 voltage_swing); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, bool enable); void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 363d30ceafce..76a49eac7305 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1367,6 +1367,9 @@ struct intel_dp { void (*set_idle_link_train)(struct intel_dp *intel_dp); void (*set_signal_levels)(struct intel_dp *intel_dp); + u8 (*preemph_max)(struct intel_dp *intel_dp); + u8 (*voltage_max)(struct intel_dp *intel_dp); + /* Displayport compliance testing */ struct intel_dp_compliance compliance; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 04fc07016c1a..707791489122 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3990,58 +3990,24 @@ intel_dp_get_link_status(struct intel_dp *intel_dp, u8 link_status[DP_LINK_STATU DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE; } -/* These are source-specific values. */ -u8 -intel_dp_voltage_max(struct intel_dp *intel_dp) +static u8 intel_dp_voltage_max_2(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - enum port port = encoder->port; - - if (HAS_DDI(dev_priv)) - return intel_ddi_dp_voltage_max(encoder); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || - (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - else - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; } -u8 -intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) +static u8 intel_dp_voltage_max_3(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - enum port port = encoder->port; + return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; +} - if (HAS_DDI(dev_priv)) { - return intel_ddi_dp_pre_emphasis_max(encoder, voltage_swing); - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || - (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) { - switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: - return DP_TRAIN_PRE_EMPH_LEVEL_3; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1: - return DP_TRAIN_PRE_EMPH_LEVEL_2; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2: - return DP_TRAIN_PRE_EMPH_LEVEL_1; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3: - default: - return DP_TRAIN_PRE_EMPH_LEVEL_0; - } - } else { - switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: - return DP_TRAIN_PRE_EMPH_LEVEL_2; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1: - return DP_TRAIN_PRE_EMPH_LEVEL_2; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2: - return DP_TRAIN_PRE_EMPH_LEVEL_1; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3: - default: - return DP_TRAIN_PRE_EMPH_LEVEL_0; - } - } +static u8 intel_dp_pre_empemph_max_2(struct intel_dp *intel_dp) +{ + return DP_TRAIN_PRE_EMPH_LEVEL_2; +} + +static u8 intel_dp_pre_empemph_max_3(struct intel_dp *intel_dp) +{ + return DP_TRAIN_PRE_EMPH_LEVEL_3; } static void vlv_set_signal_levels(struct intel_dp *intel_dp) @@ -8355,6 +8321,15 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, else intel_dig_port->dp.set_signal_levels = g4x_set_signal_levels; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) { + intel_dig_port->dp.preemph_max = intel_dp_pre_empemph_max_3; + intel_dig_port->dp.voltage_max = intel_dp_voltage_max_3; + } else { + intel_dig_port->dp.preemph_max = intel_dp_pre_empemph_max_2; + intel_dig_port->dp.voltage_max = intel_dp_voltage_max_2; + } + intel_dig_port->dp.output_reg = output_reg; intel_dig_port->max_lanes = 4; intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 1702959ca079..0a8950f744f6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -92,10 +92,6 @@ intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, void intel_dp_set_signal_levels(struct intel_dp *intel_dp); void intel_dp_set_idle_link_train(struct intel_dp *intel_dp); -u8 -intel_dp_voltage_max(struct intel_dp *intel_dp); -u8 -intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing); void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, u8 *link_bw, u8 *rate_select); bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index e4f1843170b7..171d9e842fc0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -34,6 +34,21 @@ intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE]) link_status[3], link_status[4], link_status[5]); } +static u8 dp_pre_emphasis_max(u8 voltage_swing) +{ + switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { + case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: + return DP_TRAIN_PRE_EMPH_LEVEL_3; + case DP_TRAIN_VOLTAGE_SWING_LEVEL_1: + return DP_TRAIN_PRE_EMPH_LEVEL_2; + case DP_TRAIN_VOLTAGE_SWING_LEVEL_2: + return DP_TRAIN_PRE_EMPH_LEVEL_1; + case DP_TRAIN_VOLTAGE_SWING_LEVEL_3: + default: + return DP_TRAIN_PRE_EMPH_LEVEL_0; + } +} + void intel_dp_get_adjust_train(struct intel_dp *intel_dp, const u8 link_status[DP_LINK_STATUS_SIZE]) { @@ -53,11 +68,12 @@ void intel_dp_get_adjust_train(struct intel_dp *intel_dp, p = this_p; } - voltage_max = intel_dp_voltage_max(intel_dp); + voltage_max = intel_dp->voltage_max(intel_dp); if (v >= voltage_max) v = voltage_max | DP_TRAIN_MAX_SWING_REACHED; - preemph_max = intel_dp_pre_emphasis_max(intel_dp, v); + preemph_max = min(intel_dp->preemph_max(intel_dp), + dp_pre_emphasis_max(v)); if (p >= preemph_max) p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; From f6adb5f061954f0f859e47117665554504f7ec80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:43 +0300 Subject: [PATCH 115/222] drm/i915: Reverse preemph vs. voltage swing preference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP spec says: "When the combination of the requested pre-emphasis level and voltage swing exceeds the capability of a DPTX, the DPTX shall set the pre-emphasis level according to the request and use the highest voltage swing it can output with the given pre-emphasis level." and "When a DPTX reads a request beyond the limits of this Standard, the DPTX shall set the pre-emphasis level according to the request and set the highest voltage swing level it can output with the given pre-emphasis level. If a DPTX is requested for 9.5dB of pre-emphasis level (may be supported for a DPTX) and cannot support that level, it shall set the pre-emphasis level to the next highest level, 6dB." Ie. we should first validate the pre-emphasis, and then select the appropriate vswing for it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-6-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare --- .../drm/i915/display/intel_dp_link_training.c | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 171d9e842fc0..573f93779449 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -34,18 +34,18 @@ intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE]) link_status[3], link_status[4], link_status[5]); } -static u8 dp_pre_emphasis_max(u8 voltage_swing) +static u8 dp_voltage_max(u8 preemph) { - switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: - return DP_TRAIN_PRE_EMPH_LEVEL_3; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1: - return DP_TRAIN_PRE_EMPH_LEVEL_2; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2: - return DP_TRAIN_PRE_EMPH_LEVEL_1; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3: + switch (preemph & DP_TRAIN_PRE_EMPHASIS_MASK) { + case DP_TRAIN_PRE_EMPH_LEVEL_0: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; + case DP_TRAIN_PRE_EMPH_LEVEL_1: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + case DP_TRAIN_PRE_EMPH_LEVEL_2: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_1; + case DP_TRAIN_PRE_EMPH_LEVEL_3: default: - return DP_TRAIN_PRE_EMPH_LEVEL_0; + return DP_TRAIN_VOLTAGE_SWING_LEVEL_0; } } @@ -68,15 +68,15 @@ void intel_dp_get_adjust_train(struct intel_dp *intel_dp, p = this_p; } - voltage_max = intel_dp->voltage_max(intel_dp); - if (v >= voltage_max) - v = voltage_max | DP_TRAIN_MAX_SWING_REACHED; - - preemph_max = min(intel_dp->preemph_max(intel_dp), - dp_pre_emphasis_max(v)); + preemph_max = intel_dp->preemph_max(intel_dp); if (p >= preemph_max) p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; + voltage_max = min(intel_dp->voltage_max(intel_dp), + dp_voltage_max(p)); + if (v >= voltage_max) + v = voltage_max | DP_TRAIN_MAX_SWING_REACHED; + for (lane = 0; lane < 4; lane++) intel_dp->train_set[lane] = v | p; } From e2db55244e0ff2c03a864a49276754ad840e338f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:45 +0300 Subject: [PATCH 116/222] drm/i915: Replace some hand rolled max()s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use max() instead of hand rolling it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-8-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 573f93779449..b9e4ee2dbddc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -59,13 +59,8 @@ void intel_dp_get_adjust_train(struct intel_dp *intel_dp, u8 preemph_max; for (lane = 0; lane < intel_dp->lane_count; lane++) { - u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane); - u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane); - - if (this_v > v) - v = this_v; - if (this_p > p) - p = this_p; + v = max(v, drm_dp_get_adjust_request_voltage(link_status, lane)); + p = max(p, drm_dp_get_adjust_request_pre_emphasis(link_status, lane)); } preemph_max = intel_dp->preemph_max(intel_dp); From 34becfdb945a5eb819b7c8e4f0ec5cc5952ec68f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 18:11:26 +0300 Subject: [PATCH 117/222] drm/i915/params: fix i915.reset module param type The reset member in i915_params was previously changed to unsigned, but this failed to change the actual module parameter. Fixes: aae970d8454b ("drm/i915: Mark i915.reset as unsigned") Cc: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200602151126.25626-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index ace44ad7e6df..fd3b14caf4ce 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -74,7 +74,7 @@ i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400, "Override/Ignore selection of SDVO panel mode in the VBT " "(-2=ignore, -1=auto [default], index in VBT BIOS table)"); -i915_param_named_unsafe(reset, int, 0400, +i915_param_named_unsafe(reset, uint, 0400, "Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])"); i915_param_named_unsafe(vbt_firmware, charp, 0400, From d61345f342981f31022f56277cf2826c8d28ae7e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jun 2020 13:36:41 +0100 Subject: [PATCH 118/222] drm/i915/selftests: Exercise all copy engines with the blt routines Just to remove an obnoxious HAS_ENGINES(), and in the process make the code agnostic to the availabilty of any particular engine by making it exercise any and all such engines declared on the system. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Daniele Ceraolo Spurio Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200604123641.767-1-chris@chris-wilson.co.uk --- .../i915/gem/selftests/i915_gem_client_blt.c | 3 - .../i915/gem/selftests/i915_gem_object_blt.c | 55 ++++++++++++------- .../gpu/drm/i915/gem/selftests/mock_context.c | 37 +++++++++++++ .../gpu/drm/i915/gem/selftests/mock_context.h | 4 ++ drivers/gpu/drm/i915/i915_drv.h | 5 ++ 5 files changed, 80 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 8fe3ad2ee34e..299c29e9ad86 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -702,8 +702,5 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - if (!HAS_ENGINE(i915, BCS0)) - return 0; - return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 31549ad83fa6..23b6e11bbc3e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -193,7 +193,7 @@ err_src: } struct igt_thread_arg { - struct drm_i915_private *i915; + struct intel_engine_cs *engine; struct i915_gem_context *ctx; struct file *file; struct rnd_state prng; @@ -203,7 +203,7 @@ struct igt_thread_arg { static int igt_fill_blt_thread(void *arg) { struct igt_thread_arg *thread = arg; - struct drm_i915_private *i915 = thread->i915; + struct intel_engine_cs *engine = thread->engine; struct rnd_state *prng = &thread->prng; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; @@ -215,7 +215,7 @@ static int igt_fill_blt_thread(void *arg) ctx = thread->ctx; if (!ctx) { - ctx = live_context(i915, thread->file); + ctx = live_context_for_engine(engine, thread->file); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -223,7 +223,7 @@ static int igt_fill_blt_thread(void *arg) ctx->sched.priority = I915_USER_PRIORITY(prio); } - ce = i915_gem_context_get_engine(ctx, BCS0); + ce = i915_gem_context_get_engine(ctx, 0); GEM_BUG_ON(IS_ERR(ce)); /* @@ -256,7 +256,7 @@ static int igt_fill_blt_thread(void *arg) pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); - obj = huge_gem_object(i915, phys_sz, sz); + obj = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -321,7 +321,7 @@ err_flush: static int igt_copy_blt_thread(void *arg) { struct igt_thread_arg *thread = arg; - struct drm_i915_private *i915 = thread->i915; + struct intel_engine_cs *engine = thread->engine; struct rnd_state *prng = &thread->prng; struct drm_i915_gem_object *src, *dst; struct i915_gem_context *ctx; @@ -333,7 +333,7 @@ static int igt_copy_blt_thread(void *arg) ctx = thread->ctx; if (!ctx) { - ctx = live_context(i915, thread->file); + ctx = live_context_for_engine(engine, thread->file); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -341,7 +341,7 @@ static int igt_copy_blt_thread(void *arg) ctx->sched.priority = I915_USER_PRIORITY(prio); } - ce = i915_gem_context_get_engine(ctx, BCS0); + ce = i915_gem_context_get_engine(ctx, 0); GEM_BUG_ON(IS_ERR(ce)); /* @@ -374,7 +374,7 @@ static int igt_copy_blt_thread(void *arg) pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); - src = huge_gem_object(i915, phys_sz, sz); + src = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(src)) { err = PTR_ERR(src); goto err_flush; @@ -394,7 +394,7 @@ static int igt_copy_blt_thread(void *arg) if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) src->cache_dirty = true; - dst = huge_gem_object(i915, phys_sz, sz); + dst = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_put_src; @@ -456,7 +456,7 @@ err_flush: return err; } -static int igt_threaded_blt(struct drm_i915_private *i915, +static int igt_threaded_blt(struct intel_engine_cs *engine, int (*blt_fn)(void *arg), unsigned int flags) #define SINGLE_CTX BIT(0) @@ -477,14 +477,14 @@ static int igt_threaded_blt(struct drm_i915_private *i915, if (!thread) goto out_tsk; - thread[0].file = mock_file(i915); + thread[0].file = mock_file(engine->i915); if (IS_ERR(thread[0].file)) { err = PTR_ERR(thread[0].file); goto out_thread; } if (flags & SINGLE_CTX) { - thread[0].ctx = live_context(i915, thread[0].file); + thread[0].ctx = live_context_for_engine(engine, thread[0].file); if (IS_ERR(thread[0].ctx)) { err = PTR_ERR(thread[0].ctx); goto out_file; @@ -492,7 +492,7 @@ static int igt_threaded_blt(struct drm_i915_private *i915, } for (i = 0; i < n_cpus; ++i) { - thread[i].i915 = i915; + thread[i].engine = engine; thread[i].file = thread[0].file; thread[i].ctx = thread[0].ctx; thread[i].n_cpus = n_cpus; @@ -532,24 +532,40 @@ out_tsk: return err; } +static int test_copy_engines(struct drm_i915_private *i915, + int (*fn)(void *arg), + unsigned int flags) +{ + struct intel_engine_cs *engine; + int ret; + + for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) { + ret = igt_threaded_blt(engine, fn, flags); + if (ret) + return ret; + } + + return 0; +} + static int igt_fill_blt(void *arg) { - return igt_threaded_blt(arg, igt_fill_blt_thread, 0); + return test_copy_engines(arg, igt_fill_blt_thread, 0); } static int igt_fill_blt_ctx0(void *arg) { - return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX); + return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX); } static int igt_copy_blt(void *arg) { - return igt_threaded_blt(arg, igt_copy_blt_thread, 0); + return test_copy_engines(arg, igt_copy_blt_thread, 0); } static int igt_copy_blt_ctx0(void *arg) { - return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX); + return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX); } int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) @@ -564,9 +580,6 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - if (!HAS_ENGINE(i915, BCS0)) - return 0; - return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index e7e3c620f542..aa0d06cf1903 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -99,6 +99,43 @@ err_ctx: return ERR_PTR(err); } +struct i915_gem_context * +live_context_for_engine(struct intel_engine_cs *engine, struct file *file) +{ + struct i915_gem_engines *engines; + struct i915_gem_context *ctx; + struct intel_context *ce; + + engines = alloc_engines(1); + if (!engines) + return ERR_PTR(-ENOMEM); + + ctx = live_context(engine->i915, file); + if (IS_ERR(ctx)) { + __free_engines(engines, 0); + return ctx; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + __free_engines(engines, 0); + return ERR_CAST(ce); + } + + intel_context_set_gem(ce, ctx); + engines->engines[0] = ce; + engines->num_engines = 1; + + mutex_lock(&ctx->engines_mutex); + i915_gem_context_set_user_engines(ctx); + engines = rcu_replace_pointer(ctx->engines, engines, 1); + mutex_unlock(&ctx->engines_mutex); + + engines_idle_release(ctx, engines); + + return ctx; +} + struct i915_gem_context * kernel_context(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index fb83d2f09212..2a6121d33352 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -9,6 +9,7 @@ struct file; struct drm_i915_private; +struct intel_engine_cs; void mock_init_contexts(struct drm_i915_private *i915); @@ -21,6 +22,9 @@ void mock_context_close(struct i915_gem_context *ctx); struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file); +struct i915_gem_context * +live_context_for_engine(struct intel_engine_cs *engine, struct file *file); + struct i915_gem_context *kernel_context(struct drm_i915_private *i915); void kernel_context_close(struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f1078e79cf6f..472f60122fcd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1261,6 +1261,11 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) (engine__); \ (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) +#define for_each_uabi_class_engine(engine__, class__, i915__) \ + for ((engine__) = intel_engine_lookup_user((i915__), (class__), 0); \ + (engine__) && (engine__)->uabi_class == (class__); \ + (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) + #define I915_GTT_OFFSET_NONE ((u32)-1) /* From 84f9cbf335809412704f99b5fb9b737ef7cb8e89 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Wed, 3 Jun 2020 15:11:50 -0700 Subject: [PATCH 119/222] drm/i915/tgl: Implement WA_16011163337 Set GS Timer to 224. Combine with Wa_1604555607 due to register FF_MODE2 not being able to be read. V2: Math issue fixed Cc: Chris Wilson Cc: Caz Yokoyama Cc: Matt Atwood Signed-off-by: Clint Taylor Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200603221150.14745-1-clinton.a.taylor@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 11 +++++++---- drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 30cd798b9664..3eec31c5a714 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -609,11 +609,14 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, * Wa_1604555607:gen12 and Wa_1608008084:gen12 * FF_MODE2 register will return the wrong value when read. The default * value for this register is zero for all fields and there are no bit - * masks. So instead of doing a RMW we should just write the TDS timer - * value for Wa_1604555607. + * masks. So instead of doing a RMW we should just write the GS Timer + * and TDS timer values for Wa_1604555607 and Wa_16011163337. */ - wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, 0); + wa_add(wal, + FF_MODE2, + FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, + 0); /* WaDisableGPGPUMidThreadPreemption:tgl */ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 578cfe11cbb9..96d351fbeebb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8004,6 +8004,8 @@ enum { #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) #define FF_MODE2 _MMIO(0x6604) +#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) +#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) From 57a78ca4eceab1ecb0299fba8a10211289329889 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jun 2020 11:37:30 +0100 Subject: [PATCH 120/222] drm/i915/gem: Mark the buffer pool as active for the cmdparser If the execbuf is interrupted after building the cmdparser pipeline, and before we commit to submitting the request to HW, we would attempt to clean up the cmdparser early. While we held active references to the vma being parsed and constructed, we did not hold an active reference for the buffer pool itself. The result was that an interrupted execbuf could still have run the cmdparser pipeline, but since the buffer pool was idle, its target vma could have been recycled. Note this problem only occurs if the cmdparser is running async due to pipelined waits on busy fences, and the execbuf is interrupted. Fixes: 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser") Fixes: 16e87459673a ("drm/i915/gt: Move the batch buffer pool from the engine to the gt") Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200604103751.18816-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 56 ++++++++++++++++--- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 02a5c0ce39ca..340e7f108baf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1987,6 +1987,38 @@ static const struct dma_fence_work_ops eb_parse_ops = { .release = __eb_parse_release, }; +static inline int +__parser_mark_active(struct i915_vma *vma, + struct intel_timeline *tl, + struct dma_fence *fence) +{ + struct intel_gt_buffer_pool_node *node = vma->private; + + return i915_active_ref(&node->active, tl, fence); +} + +static int +parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) +{ + int err; + + mutex_lock(&tl->mutex); + + err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); + if (err) + goto unlock; + + if (pw->trampoline) { + err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); + if (err) + goto unlock; + } + +unlock: + mutex_unlock(&tl->mutex); + return err; +} + static int eb_parse_pipeline(struct i915_execbuffer *eb, struct i915_vma *shadow, struct i915_vma *trampoline) @@ -2021,20 +2053,25 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, pw->shadow = shadow; pw->trampoline = trampoline; + /* Mark active refs early for this worker, in case we get interrupted */ + err = parser_mark_active(pw, eb->context->timeline); + if (err) + goto err_commit; + err = dma_resv_lock_interruptible(pw->batch->resv, NULL); if (err) - goto err_trampoline; + goto err_commit; err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) - goto err_batch_unlock; + goto err_commit_unlock; /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, 0, I915_FENCE_GFP); if (err < 0) - goto err_batch_unlock; + goto err_commit_unlock; /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); @@ -2049,11 +2086,13 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, dma_fence_work_commit_imm(&pw->base); return 0; -err_batch_unlock: +err_commit_unlock: dma_resv_unlock(pw->batch->resv); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); +err_commit: + i915_sw_fence_set_error_once(&pw->base.chain, err); + dma_fence_work_commit_imm(&pw->base); + return err; + err_shadow: i915_active_release(&shadow->active); err_batch: @@ -2099,6 +2138,7 @@ static int eb_parse(struct i915_execbuffer *eb) goto err; } i915_gem_object_set_readonly(shadow->obj); + shadow->private = pool; trampoline = NULL; if (CMDPARSER_USES_GGTT(eb->i915)) { @@ -2112,6 +2152,7 @@ static int eb_parse(struct i915_execbuffer *eb) shadow = trampoline; goto err_shadow; } + shadow->private = pool; eb->batch_flags |= I915_DISPATCH_SECURE; } @@ -2128,7 +2169,6 @@ static int eb_parse(struct i915_execbuffer *eb) eb->trampoline = trampoline; eb->batch_start_offset = 0; - shadow->private = pool; return 0; err_trampoline: From f4bb45f727341126aa81d8ac2f3e45c7029fe448 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jun 2020 14:59:38 +0100 Subject: [PATCH 121/222] drm/i915: Trim set_timer_ms() intervals Use the plain msec_to_jiffies() rather than the _timeout variant so we round down and do not add an extra jiffy to our interval. For example, with timeslicing we do not want to err on the longer side as any fairness depends on catching hogging contexts on the GPU. Bring on CFS. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200604135938.3975-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 29 +++++++++++--------------- drivers/gpu/drm/i915/i915_utils.c | 2 +- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3e35a45d6218..67d74e6432a8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1140,9 +1140,17 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine) return rq; } -static long timeslice_threshold(const struct intel_engine_cs *engine) +static long slice_timeout(struct intel_engine_cs *engine) { - return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; + long timeout; + + /* Enough time for a timeslice to kick in, and kick out */ + timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); + + /* Enough time for the nop request to complete */ + timeout += HZ / 5; + + return timeout + 1; } static int live_timeslice_queue(void *arg) @@ -1260,7 +1268,7 @@ static int live_timeslice_queue(void *arg) } /* Timeslice every jiffy, so within 2 we should signal */ - if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { + if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1379,7 +1387,7 @@ static int live_timeslice_nopreempt(void *arg) * allow the maximum priority barrier through. Wait long * enough to see if it is timesliced in by mistake. */ - if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { + if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", engine->name); err = -EINVAL; @@ -3890,19 +3898,6 @@ static int live_virtual_mask(void *arg) return 0; } -static long slice_timeout(struct intel_engine_cs *engine) -{ - long timeout; - - /* Enough time for a timeslice to kick in, and kick out */ - timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); - - /* Enough time for the nop request to complete */ - timeout += HZ / 5; - - return timeout; -} - static int slicein_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index e28eae4a8f70..f42a9e9a0b4f 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -91,7 +91,7 @@ void set_timer_ms(struct timer_list *t, unsigned long timeout) return; } - timeout = msecs_to_jiffies_timeout(timeout); + timeout = msecs_to_jiffies(timeout); /* * Paranoia to make sure the compiler computes the timeout before From ac533c56b7ba4a2ce2f6fe83c7a4bb672f452f1f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jun 2020 16:31:45 +0100 Subject: [PATCH 122/222] drm/i915/gt: Track if an engine requires forcewake w/a Sometimes an engine might need to keep forcewake active while it is busy submitting requests for a particular workaround. Track such nuisance with engine->fw_domain. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Venkata Sandeep Dhanalakota Link: https://patchwork.freedesktop.org/patch/msgid/20200604153145.21068-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 11 +++++++++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 2b6cdf47d428..073c3769e8cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -24,6 +24,7 @@ #include "i915_selftest.h" #include "intel_sseu.h" #include "intel_timeline_types.h" +#include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" @@ -313,6 +314,16 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; + /* + * Some w/a require forcewake to be held (which prevents RC6) while + * a particular engine is active. If so, we set fw_domain to which + * domains need to be held for the duration of request activity, + * and 0 if none. We try to limit the duration of the hold as much + * as possible. + */ + enum forcewake_domains fw_domain; + atomic_t fw_active; + unsigned long context_tag; struct rb_node uabi_node; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index aac8da18694f..33b7173b7195 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1373,6 +1373,8 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid |= engine->execlists.ccid; __intel_gt_pm_get(engine->gt); + if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1441,6 +1443,8 @@ __execlists_schedule_out(struct i915_request *rq, intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); /* From e95e79749b32b83b4941f28294204be1be3fa3a8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 3 Jun 2020 14:15:15 -0700 Subject: [PATCH 123/222] drm/i915/rkl: Set transcoder mask properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although we properly captured RKL's three pipes in the device info structure, we forgot to make the corresponding update to the transcoder mask. Set this field so that our transcoder loops will operate properly. Fixes: 123f62de419f ("drm/i915/rkl: Add RKL platform info and PCI ids") Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-2-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index e6054b166fe8..54b258a030fd 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -881,6 +881,8 @@ static const struct intel_device_info rkl_info = { GEN12_FEATURES, PLATFORM(INTEL_ROCKETLAKE), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | + BIT(TRANSCODER_C), .require_force_probe = 1, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0), From 4628142aeccca6e37d8c3dfc9ce8be65512a324f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 3 Jun 2020 14:15:20 -0700 Subject: [PATCH 124/222] drm/i915/rkl: provide port/phy mapping for vbt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RKL uses the DDI A, DDI B, DDI USBC1, DDI USBC2 from the DE point of view, so all DDI/pipe/transcoder register use these indexes to refer to them. Combo phy and IO functions follow another namespace that we keep as "enum phy". The VBT in theory would use the DE point of view, but that does not happen in practice. Provide a table to convert the child devices to the "correct" port numbering we use. Now this is the output we get while reading the VBT: DDIA: [drm:intel_bios_port_aux_ch [i915]] using AUX A for port A (VBT) [drm:intel_dp_init_connector [i915]] Adding DP connector on [ENCODER:275:DDI A] [drm:intel_hdmi_init_connector [i915]] Adding HDMI connector on [ENCODER:275:DDI A] [drm:intel_hdmi_init_connector [i915]] Using DDC pin 0x1 for port A (VBT) DDIB: [drm:intel_bios_port_aux_ch [i915]] using AUX B for port B (platform default) [drm:intel_hdmi_init_connector [i915]] Adding HDMI connector on [ENCODER:291:DDI B] [drm:intel_hdmi_init_connector [i915]] Using DDC pin 0x2 for port B (VBT) DDI USBC1: [drm:intel_bios_port_aux_ch [i915]] using AUX D for port D (VBT) [drm:intel_dp_init_connector [i915]] Adding DP connector on [ENCODER:295:DDI D] [drm:intel_hdmi_init_connector [i915]] Adding HDMI connector on [ENCODER:295:DDI D] [drm:intel_hdmi_init_connector [i915]] Using DDC pin 0x3 for port D (VBT) DDI USBC2: [drm:intel_bios_port_aux_ch [i915]] using AUX E for port E (VBT) [drm:intel_dp_init_connector [i915]] Adding DP connector on [ENCODER:306:DDI E] [drm:intel_hdmi_init_connector [i915]] Adding HDMI connector on [ENCODER:306:DDI E] [drm:intel_hdmi_init_connector [i915]] Using DDC pin 0x9 for port E (VBT) Cc: Clinton Taylor Cc: Aditya Swarup Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 72 ++++++++++++++++------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 839124647202..4f1a72a90b8f 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1619,30 +1619,18 @@ static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin) return 0; } -static enum port dvo_port_to_port(u8 dvo_port) +static enum port __dvo_port_to_port(int n_ports, int n_dvo, + const int port_mapping[][3], u8 dvo_port) { - /* - * Each DDI port can have more than one value on the "DVO Port" field, - * so look for all the possible values for each port. - */ - static const int dvo_ports[][3] = { - [PORT_A] = { DVO_PORT_HDMIA, DVO_PORT_DPA, -1}, - [PORT_B] = { DVO_PORT_HDMIB, DVO_PORT_DPB, -1}, - [PORT_C] = { DVO_PORT_HDMIC, DVO_PORT_DPC, -1}, - [PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1}, - [PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, - [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1}, - [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1}, - }; enum port port; int i; - for (port = PORT_A; port < ARRAY_SIZE(dvo_ports); port++) { - for (i = 0; i < ARRAY_SIZE(dvo_ports[port]); i++) { - if (dvo_ports[port][i] == -1) + for (port = PORT_A; port < n_ports; port++) { + for (i = 0; i < n_dvo; i++) { + if (port_mapping[port][i] == -1) break; - if (dvo_port == dvo_ports[port][i]) + if (dvo_port == port_mapping[port][i]) return port; } } @@ -1650,6 +1638,48 @@ static enum port dvo_port_to_port(u8 dvo_port) return PORT_NONE; } +static enum port dvo_port_to_port(struct drm_i915_private *dev_priv, + u8 dvo_port) +{ + /* + * Each DDI port can have more than one value on the "DVO Port" field, + * so look for all the possible values for each port. + */ + static const int port_mapping[][3] = { + [PORT_A] = { DVO_PORT_HDMIA, DVO_PORT_DPA, -1 }, + [PORT_B] = { DVO_PORT_HDMIB, DVO_PORT_DPB, -1 }, + [PORT_C] = { DVO_PORT_HDMIC, DVO_PORT_DPC, -1 }, + [PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1 }, + [PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, -1 }, + [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1 }, + [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1 }, + }; + /* + * Bspec lists the ports as A, B, C, D - however internally in our + * driver we keep them as PORT_A, PORT_B, PORT_D and PORT_E so the + * registers in Display Engine match the right offsets. Apply the + * mapping here to translate from VBT to internal convention. + */ + static const int rkl_port_mapping[][3] = { + [PORT_A] = { DVO_PORT_HDMIA, DVO_PORT_DPA, -1 }, + [PORT_B] = { DVO_PORT_HDMIB, DVO_PORT_DPB, -1 }, + [PORT_C] = { -1 }, + [PORT_D] = { DVO_PORT_HDMIC, DVO_PORT_DPC, -1 }, + [PORT_E] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1 }, + }; + + if (IS_ROCKETLAKE(dev_priv)) + return __dvo_port_to_port(ARRAY_SIZE(rkl_port_mapping), + ARRAY_SIZE(rkl_port_mapping[0]), + rkl_port_mapping, + dvo_port); + else + return __dvo_port_to_port(ARRAY_SIZE(port_mapping), + ARRAY_SIZE(port_mapping[0]), + port_mapping, + dvo_port); +} + static void parse_ddi_port(struct drm_i915_private *dev_priv, struct display_device_data *devdata, u8 bdb_version) @@ -1659,7 +1689,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; enum port port; - port = dvo_port_to_port(child->dvo_port); + port = dvo_port_to_port(dev_priv, child->dvo_port); if (port == PORT_NONE) return; @@ -2603,10 +2633,10 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, aux_ch = AUX_CH_B; break; case DP_AUX_C: - aux_ch = AUX_CH_C; + aux_ch = IS_ROCKETLAKE(dev_priv) ? AUX_CH_D : AUX_CH_C; break; case DP_AUX_D: - aux_ch = AUX_CH_D; + aux_ch = IS_ROCKETLAKE(dev_priv) ? AUX_CH_E : AUX_CH_D; break; case DP_AUX_E: aux_ch = AUX_CH_E; From cd0a89527d06f19e9e6eb7efce81c629c4b76b43 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 3 Jun 2020 14:15:22 -0700 Subject: [PATCH 125/222] drm/i915/rkl: Add DDC pin mapping The pin mapping for the final two outputs varies according to which PCH is present on the platform: with TGP the pins are remapped into the TC range, whereas with CMP they stay in the traditional combo output range. Bspec: 49181 Cc: Aditya Swarup Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-9-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_hdmi.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 010f37240710..a31a98d26882 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3082,6 +3082,24 @@ static u8 mcc_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) return ddc_pin; } +static u8 rkl_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) +{ + enum phy phy = intel_port_to_phy(dev_priv, port); + + WARN_ON(port == PORT_C); + + /* + * Pin mapping for RKL depends on which PCH is present. With TGP, the + * final two outputs use type-c pins, even though they're actually + * combo outputs. With CMP, the traditional DDI A-D pins are used for + * all outputs. + */ + if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && phy >= PHY_C) + return GMBUS_PIN_9_TC1_ICP + phy - PHY_C; + + return GMBUS_PIN_1_BXT + phy; +} + static u8 g4x_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) { @@ -3119,7 +3137,9 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) return ddc_pin; } - if (HAS_PCH_MCC(dev_priv)) + if (IS_ROCKETLAKE(dev_priv)) + ddc_pin = rkl_port_to_ddc_pin(dev_priv, port); + else if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) ddc_pin = icl_port_to_ddc_pin(dev_priv, port); From b8226d62e77620d372f6eb8c34b51798f3962414 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 3 Jun 2020 14:15:25 -0700 Subject: [PATCH 126/222] drm/i915/rkl: Handle comp master/slave relationships for PHYs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain combo PHYs act as a compensation master to other PHYs and need to be initialized with a special irefgen bit in the PORT_COMP_DW8 register. Previously PHY A was the only compensation master (for PHYs B & C), but RKL adds a fourth PHY which is slaved to PHY C instead. Bspec: 49291 Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Aditya Swarup Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-12-matthew.d.roper@intel.com --- .../gpu/drm/i915/display/intel_combo_phy.c | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 43d8784f6fa0..77b04bb3ec62 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -234,6 +234,27 @@ static bool ehl_vbt_ddi_d_present(struct drm_i915_private *i915) return false; } +static bool phy_is_master(struct drm_i915_private *dev_priv, enum phy phy) +{ + /* + * Certain PHYs are connected to compensation resistors and act + * as masters to other PHYs. + * + * ICL,TGL: + * A(master) -> B(slave), C(slave) + * RKL: + * A(master) -> B(slave) + * C(master) -> D(slave) + * + * We must set the IREFGEN bit for any PHY acting as a master + * to another PHY. + */ + if (IS_ROCKETLAKE(dev_priv) && phy == PHY_C) + return true; + + return phy == PHY_A; +} + static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, enum phy phy) { @@ -245,7 +266,7 @@ static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, ret = cnl_verify_procmon_ref_values(dev_priv, phy); - if (phy == PHY_A) { + if (phy_is_master(dev_priv, phy)) { ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy), IREFGEN, IREFGEN); @@ -356,7 +377,7 @@ static void icl_combo_phys_init(struct drm_i915_private *dev_priv) skip_phy_misc: cnl_set_procmon_ref_values(dev_priv, phy); - if (phy == PHY_A) { + if (phy_is_master(dev_priv, phy)) { val = intel_de_read(dev_priv, ICL_PORT_COMP_DW8(phy)); val |= IREFGEN; intel_de_write(dev_priv, ICL_PORT_COMP_DW8(phy), val); From 19aefbc778b8b8e87c2d31be9736c634f0ea95a8 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Mon, 1 Jun 2020 20:30:58 +0300 Subject: [PATCH 127/222] drm/i915: Fix wrong CDCLK adjustment changes Previous patch didn't take into account all pipes but only those in state, which could cause wrong CDCLK conclcusions and calculations. Also there was a severe issue with min_cdclk being assigned to 0 every compare cycle. Too bad this was found by me only after merge. This could be also causing the issues in test, however not clear - anyway marking this as fixing the "Adjust CDCLK accordingly to our DBuf bw needs". v2: - s/pipe/crtc->pipe/ - save a bit of instructions by skipping inactive pipes, without getting 0 DBuf slice mask for it. Signed-off-by: Stanislav Lisovskiy Fixes: cd1915460861 ("drm/i915: Adjust CDCLK accordingly to our DBuf bw needs") Reviewed-by: Manasi Navare Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200601173058.5084-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 52 +++++++++++++------- drivers/gpu/drm/i915/display/intel_cdclk.c | 19 ++++--- drivers/gpu/drm/i915/display/intel_display.c | 30 +++++------ 3 files changed, 57 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index a79bd7aeb03b..bd060404d249 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -437,6 +437,7 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) struct intel_crtc *crtc; int max_bw = 0; int slice_id; + enum pipe pipe; int i; for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { @@ -447,10 +448,15 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) if (IS_ERR(new_bw_state)) return PTR_ERR(new_bw_state); + old_bw_state = intel_atomic_get_old_bw_state(state); + crtc_bw = &new_bw_state->dbuf_bw[crtc->pipe]; memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw)); + if (!crtc_state->hw.active) + continue; + for_each_plane_id_on_crtc(crtc, plane_id) { const struct skl_ddb_entry *plane_alloc = &crtc_state->wm.skl.plane_ddb_y[plane_id]; @@ -478,6 +484,15 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) for_each_dbuf_slice_in_mask(slice_id, dbuf_mask) crtc_bw->used_bw[slice_id] += data_rate; } + } + + if (!old_bw_state) + return 0; + + for_each_pipe(dev_priv, pipe) { + struct intel_dbuf_bw *crtc_bw; + + crtc_bw = &new_bw_state->dbuf_bw[pipe]; for_each_dbuf_slice(slice_id) { /* @@ -490,14 +505,9 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) */ max_bw += crtc_bw->used_bw[slice_id]; } - - new_bw_state->min_cdclk = max_bw / 64; - - old_bw_state = intel_atomic_get_old_bw_state(state); } - if (!old_bw_state) - return 0; + new_bw_state->min_cdclk = max_bw / 64; if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { int ret = intel_atomic_lock_global_state(&new_bw_state->base); @@ -511,34 +521,38 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) int intel_bw_calc_min_cdclk(struct intel_atomic_state *state) { - int i; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_bw_state *new_bw_state = NULL; + struct intel_bw_state *old_bw_state = NULL; const struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; int min_cdclk = 0; - struct intel_bw_state *new_bw_state = NULL; - struct intel_bw_state *old_bw_state = NULL; + enum pipe pipe; + int i; for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - struct intel_cdclk_state *cdclk_state; - new_bw_state = intel_atomic_get_bw_state(state); if (IS_ERR(new_bw_state)) return PTR_ERR(new_bw_state); - cdclk_state = intel_atomic_get_cdclk_state(state); - if (IS_ERR(cdclk_state)) - return PTR_ERR(cdclk_state); - - min_cdclk = max(cdclk_state->min_cdclk[crtc->pipe], min_cdclk); - - new_bw_state->min_cdclk = min_cdclk; - old_bw_state = intel_atomic_get_old_bw_state(state); } if (!old_bw_state) return 0; + for_each_pipe(dev_priv, pipe) { + struct intel_cdclk_state *cdclk_state; + + cdclk_state = intel_atomic_get_new_cdclk_state(state); + if (!cdclk_state) + return 0; + + min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); + } + + new_bw_state->min_cdclk = min_cdclk; + if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { int ret = intel_atomic_lock_global_state(&new_bw_state->base); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index f9b0fc7317de..08468b121d02 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2084,9 +2084,12 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) { struct intel_atomic_state *state = cdclk_state->base.state; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_bw_state *bw_state = NULL; struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; int min_cdclk, i; + enum pipe pipe; for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { int ret; @@ -2095,6 +2098,10 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) if (min_cdclk < 0) return min_cdclk; + bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(bw_state)) + return PTR_ERR(bw_state); + if (cdclk_state->min_cdclk[i] == min_cdclk) continue; @@ -2106,15 +2113,11 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) } min_cdclk = cdclk_state->force_min_cdclk; + for_each_pipe(dev_priv, pipe) { + min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - struct intel_bw_state *bw_state; - - min_cdclk = max(cdclk_state->min_cdclk[crtc->pipe], min_cdclk); - - bw_state = intel_atomic_get_bw_state(state); - if (IS_ERR(bw_state)) - return PTR_ERR(bw_state); + if (!bw_state) + continue; min_cdclk = max(bw_state->min_cdclk, min_cdclk); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0b0faf96495c..43de656f7fa6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14716,13 +14716,14 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int i; - struct intel_plane_state *plane_state; - struct intel_plane *plane; - int ret; struct intel_cdclk_state *new_cdclk_state; - struct intel_crtc_state *new_crtc_state; - struct intel_crtc *crtc; + struct intel_plane_state *plane_state; + struct intel_bw_state *new_bw_state; + struct intel_plane *plane; + int min_cdclk = 0; + enum pipe pipe; + int ret; + int i; /* * active_planes bitmask has been updated, and potentially * affected planes are part of the state. We can now @@ -14743,23 +14744,18 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, if (ret) return ret; - if (!new_cdclk_state) + new_bw_state = intel_atomic_get_new_bw_state(state); + + if (!new_cdclk_state || !new_bw_state) return 0; - for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { - struct intel_bw_state *bw_state; - int min_cdclk = 0; - - min_cdclk = max(new_cdclk_state->min_cdclk[crtc->pipe], min_cdclk); - - bw_state = intel_atomic_get_bw_state(state); - if (IS_ERR(bw_state)) - return PTR_ERR(bw_state); + for_each_pipe(dev_priv, pipe) { + min_cdclk = max(new_cdclk_state->min_cdclk[pipe], min_cdclk); /* * Currently do this change only if we need to increase */ - if (bw_state->min_cdclk > min_cdclk) + if (new_bw_state->min_cdclk > min_cdclk) *need_cdclk_calc = true; } From 9fa6769952ee14250bb7107a2ec66062d2ccae1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 2 Jun 2020 13:54:24 -0700 Subject: [PATCH 128/222] drm/i915/tgl: Add HBR and HBR2+ voltage swing table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As latest update we have now 2 voltage swing tables for DP over DKL PHY with only one difference in Level 0 pre-emphasis 3. So with 2 tables for DP is time to have one single function to return all DKL voltage swing tables. BSpec: 49292 Cc: Khaled Almahallawy Signed-off-by: José Roberto de Souza Tested-by: Khaled Almahallawy Reviewed-by: Khaled Almahallawy Link: https://patchwork.freedesktop.org/patch/msgid/20200602205424.138143-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 50 ++++++++++++++++++++---- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 236f3762b6f9..96eaa4b39c68 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -641,6 +641,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ + { 0x0, 0x0, 0x18 }, /* 0 3 400mV 9.5 dB */ + { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ + { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ + { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ + { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ + { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ + { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ +}; + +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans_hbr2[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ + { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ + { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ @@ -1028,6 +1042,22 @@ tgl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate, return tgl_combo_phy_ddi_translations_dp_hbr; } +static const struct tgl_dkl_phy_ddi_buf_trans * +tgl_get_dkl_buf_trans(struct drm_i915_private *dev_priv, int type, int rate, + int *n_entries) +{ + if (type == INTEL_OUTPUT_HDMI) { + *n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + return tgl_dkl_phy_hdmi_ddi_trans; + } else if (rate > 270000) { + *n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans_hbr2); + return tgl_dkl_phy_dp_ddi_trans_hbr2; + } + + *n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + return tgl_dkl_phy_dp_ddi_trans; +} + static int intel_ddi_hdmi_level(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -1039,7 +1069,8 @@ static int intel_ddi_hdmi_level(struct intel_encoder *encoder) tgl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + tgl_get_dkl_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, + &n_entries); default_entry = n_entries - 1; } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) @@ -2122,7 +2153,8 @@ static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp) tgl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + tgl_get_dkl_buf_trans(dev_priv, encoder->type, + intel_dp->link_rate, &n_entries); } else if (INTEL_GEN(dev_priv) == 11) { if (IS_ELKHARTLAKE(dev_priv)) ehl_get_combo_buf_trans(dev_priv, encoder->type, @@ -2589,15 +2621,17 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; + int rate = 0; - if (encoder->type == INTEL_OUTPUT_HDMI) { - n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); - ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; - } else { - n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); - ddi_translations = tgl_dkl_phy_dp_ddi_trans; + if (encoder->type != INTEL_OUTPUT_HDMI) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + rate = intel_dp->link_rate; } + ddi_translations = tgl_get_dkl_buf_trans(dev_priv, encoder->type, rate, + &n_entries); + if (level >= n_entries) level = n_entries - 1; From aefaa1f452ab2bb933265e1fe6ea25337f08c5ce Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 3 Jun 2020 14:15:19 -0700 Subject: [PATCH 129/222] drm/i915/rkl: Setup ports/phys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RKL uses DDI's A, B, TC1, and TC2 which need to map to combo PHY's A-D. Bspec: 49181 Cc: Imre Deak Cc: Aditya Swarup Cc: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-6-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_display.c | 34 ++++++++++++-------- drivers/gpu/drm/i915/i915_reg.h | 4 ++- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 43de656f7fa6..3e6ef5bf1284 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7218,30 +7218,33 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) { if (phy == PHY_NONE) return false; - - if (IS_ELKHARTLAKE(dev_priv)) + else if (IS_ROCKETLAKE(dev_priv)) + return phy <= PHY_D; + else if (IS_ELKHARTLAKE(dev_priv)) return phy <= PHY_C; - - if (INTEL_GEN(dev_priv) >= 11) + else if (INTEL_GEN(dev_priv) >= 11) return phy <= PHY_B; - - return false; + else + return false; } bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) { - if (INTEL_GEN(dev_priv) >= 12) + if (IS_ROCKETLAKE(dev_priv)) + return false; + else if (INTEL_GEN(dev_priv) >= 12) return phy >= PHY_D && phy <= PHY_I; - - if (INTEL_GEN(dev_priv) >= 11 && !IS_ELKHARTLAKE(dev_priv)) + else if (INTEL_GEN(dev_priv) >= 11 && !IS_ELKHARTLAKE(dev_priv)) return phy >= PHY_C && phy <= PHY_F; - - return false; + else + return false; } enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port) { - if (IS_ELKHARTLAKE(i915) && port == PORT_D) + if (IS_ROCKETLAKE(i915) && port >= PORT_D) + return (enum phy)port - 1; + else if (IS_ELKHARTLAKE(i915) && port == PORT_D) return PHY_A; return (enum phy)port; @@ -16818,7 +16821,12 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return; - if (INTEL_GEN(dev_priv) >= 12) { + if (IS_ROCKETLAKE(dev_priv)) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_D); /* DDI TC1 */ + intel_ddi_init(dev_priv, PORT_E); /* DDI TC2 */ + } else if (INTEL_GEN(dev_priv) >= 12) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_D); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 96d351fbeebb..814a70945468 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1869,9 +1869,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _ICL_COMBOPHY_A 0x162000 #define _ICL_COMBOPHY_B 0x6C000 #define _EHL_COMBOPHY_C 0x160000 +#define _RKL_COMBOPHY_D 0x161000 #define _ICL_COMBOPHY(phy) _PICK(phy, _ICL_COMBOPHY_A, \ _ICL_COMBOPHY_B, \ - _EHL_COMBOPHY_C) + _EHL_COMBOPHY_C, \ + _RKL_COMBOPHY_D) /* CNL/ICL Port CL_DW registers */ #define _ICL_PORT_CL_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ From 562ddcb7485432b2526fe6e158f7ef58631e4d99 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 3 Jun 2020 14:15:23 -0700 Subject: [PATCH 130/222] drm/i915/rkl: Don't try to access transcoder D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple places in our driver that loop over transcoders A..D for gen11+; since RKL only has three pipes/transcoders, this can lead to unclaimed register reads/writes. We should add checks for transcoder existence where appropriate. v2: Move one transcoder check that wound up in the wrong function after conflict resolution. It belongs in bdw_get_trans_port_sync_config rather than bxt_get_dsi_transcoder_state. v3: Switch loops to use for_each_cpu_transcoder_masked() since this iterator already checks the platform's transcoder mask for us. (Ville) Cc: Aditya Swarup Cc: Ville Syrjälä Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-10-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 490574669eaa..8e823ba25f5f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2872,13 +2872,15 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; enum pipe pipe; + u32 trans_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | + BIT(TRANSCODER_C) | BIT(TRANSCODER_D); intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0); if (INTEL_GEN(dev_priv) >= 12) { enum transcoder trans; - for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + for_each_cpu_transcoder_masked(dev_priv, trans, trans_mask) { enum intel_display_power_domain domain; domain = POWER_DOMAIN_TRANSCODER(trans); @@ -3400,6 +3402,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) u32 de_port_masked = gen8_de_port_aux_mask(dev_priv); u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; + u32 trans_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | + BIT(TRANSCODER_C) | BIT(TRANSCODER_D); enum pipe pipe; if (INTEL_GEN(dev_priv) <= 10) @@ -3420,7 +3424,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 12) { enum transcoder trans; - for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + for_each_cpu_transcoder_masked(dev_priv, trans, trans_mask) { enum intel_display_power_domain domain; domain = POWER_DOMAIN_TRANSCODER(trans); From 24d2fc3d530e779c0a7a3dc00d58746cba93e5c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 3 Jun 2020 14:15:28 -0700 Subject: [PATCH 131/222] drm/i915/rkl: Disable PSR2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RKL doesn't have PSR2 HW tracking, it was replaced by software/manual tracking. The driver is required to track the areas that needs update and program hardware to send selective updates. So until the software tracking is implemented, PSR2 needs to be disabled for platforms without PSR2 HW tracking. BSpec: 50422 BSpec: 50424 Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20200603211529.3005059-15-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 4 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index b7a2c102648a..714c590b39f5 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -646,6 +646,21 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + /* + * Some platforms lack PSR2 HW tracking and instead require manual + * tracking by software. In this case, the driver is required to track + * the areas that need updates and program hardware to send selective + * updates. + * + * So until the software tracking is implemented, PSR2 needs to be + * disabled for platforms without PSR2 HW tracking. + */ + if (!HAS_PSR_HW_TRACKING(dev_priv)) { + drm_dbg_kms(&dev_priv->drm, + "No PSR2 HW tracking in the platform\n"); + return false; + } + /* * DSC and PSR2 cannot be enabled simultaneously. If a requested * resolution requires DSC to be enabled, priority is given to DSC diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 472f60122fcd..7a9ea43bab66 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1643,6 +1643,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DDI(dev_priv) (INTEL_INFO(dev_priv)->display.has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) +#define HAS_PSR_HW_TRACKING(dev_priv) \ + (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) #define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->cpu_transcoder_mask & BIT(trans)) != 0) #define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 54b258a030fd..8d0212d65828 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -536,6 +536,7 @@ static const struct intel_device_info vlv_info = { .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ .display.has_psr = 1, \ + .display.has_psr_hw_tracking = 1, \ .display.has_dp_mst = 1, \ .has_rc6p = 0 /* RC6p removed-by HSW */, \ HSW_PIPE_OFFSETS, \ @@ -690,6 +691,7 @@ static const struct intel_device_info skl_gt4_info = { .display.has_fbc = 1, \ .display.has_hdcp = 1, \ .display.has_psr = 1, \ + .display.has_psr_hw_tracking = 1, \ .has_runtime_pm = 1, \ .display.has_csr = 1, \ .has_rc6 = 1, \ @@ -884,6 +886,7 @@ static const struct intel_device_info rkl_info = { .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), .require_force_probe = 1, + .display.has_psr_hw_tracking = 0, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0), }; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 3613c04904e0..34dbffd65bad 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -148,6 +148,7 @@ enum intel_ppgtt_type { func(has_modular_fia); \ func(has_overlay); \ func(has_psr); \ + func(has_psr_hw_tracking); \ func(overlay_needs_physical); \ func(supports_tv); From 2d3879950f8ac1eb5638958a01ff0abeba5427d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 20 May 2020 14:27:56 -0700 Subject: [PATCH 132/222] drm/i915: Add psr_safest_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This parameter is meant to be used when PSR issues are found as some issues in the past was due wrong values set in VBT so this would be a quick and easy way to ask users or for us to check if the issue is due VBT values. Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20200520212756.354623-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 39 ++++++++++++++++++------ drivers/gpu/drm/i915/i915_params.c | 5 +++ drivers/gpu/drm/i915/i915_params.h | 1 + 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 714c590b39f5..7a0011e42e00 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -426,6 +426,12 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp) if (INTEL_GEN(dev_priv) >= 11) val |= EDP_PSR_TP4_TIME_0US; + if (i915_modparams.psr_safest_params) { + val |= EDP_PSR_TP1_TIME_2500us; + val |= EDP_PSR_TP2_TP3_TIME_2500us; + goto check_tp3_sel; + } + if (dev_priv->vbt.psr.tp1_wakeup_time_us == 0) val |= EDP_PSR_TP1_TIME_0us; else if (dev_priv->vbt.psr.tp1_wakeup_time_us <= 100) @@ -444,6 +450,7 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp) else val |= EDP_PSR_TP2_TP3_TIME_2500us; +check_tp3_sel: if (intel_dp_source_supports_hbr2(intel_dp) && drm_dp_tps3_supported(intel_dp->dpcd)) val |= EDP_PSR_TP1_TP3_SEL; @@ -495,6 +502,27 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) intel_de_write(dev_priv, EDP_PSR_CTL(dev_priv->psr.transcoder), val); } +static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u32 val = 0; + + if (i915_modparams.psr_safest_params) + return EDP_PSR2_TP2_TIME_2500us; + + if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 && + dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 50) + val |= EDP_PSR2_TP2_TIME_50us; + else if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 100) + val |= EDP_PSR2_TP2_TIME_100us; + else if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 500) + val |= EDP_PSR2_TP2_TIME_500us; + else + val |= EDP_PSR2_TP2_TIME_2500us; + + return val; +} + static void hsw_activate_psr2(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -507,16 +535,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); - - if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 && - dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 50) - val |= EDP_PSR2_TP2_TIME_50us; - else if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 100) - val |= EDP_PSR2_TP2_TIME_100us; - else if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 500) - val |= EDP_PSR2_TP2_TIME_500us; - else - val |= EDP_PSR2_TP2_TIME_2500us; + val |= intel_psr2_get_tp_time(intel_dp); /* * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index fd3b14caf4ce..a7b61e6ec508 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -97,6 +97,11 @@ i915_param_named_unsafe(enable_psr, int, 0400, "(0=disabled, 1=enabled) " "Default: -1 (use per-chip default)"); +i915_param_named(psr_safest_params, bool, 0400, + "Replace PSR VBT parameters by the safest and not optimal ones. This " + "is helpfull to detect if PSR issues are related to bad values set in " + " VBT. (0=use VBT paramters, 1=use safest parameters)"); + i915_param_named_unsafe(force_probe, charp, 0400, "Force probe the driver for specified devices. " "See CONFIG_DRM_I915_FORCE_PROBE for details."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 4f21bfffbf0e..53fb5ba8fbed 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -53,6 +53,7 @@ struct drm_printer; param(int, enable_dc, -1, 0400) \ param(int, enable_fbc, -1, 0600) \ param(int, enable_psr, -1, 0600) \ + param(bool, psr_safest_params, false, 0600) \ param(int, disable_power_well, -1, 0400) \ param(int, enable_ips, 1, 0600) \ param(int, invert_brightness, 0, 0600) \ From 9e0f9464e2ab36b864359a59b0e9058fdef0ce47 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jun 2020 22:14:57 +0100 Subject: [PATCH 133/222] drm/i915/gem: Async GPU relocations only Reduce the 3 relocation paths down to the single path that accommodates all. The primary motivation for this is to guard the relocations with a natural fence (derived from the i915_request used to write the relocation from the GPU). The tradeoff in using async gpu relocations is that it increases latency over using direct CPU relocations, for the cases where the target is idle and accessible by the CPU. The benefit is greatly reduced lock contention and improved concurrency by pipelining. Note that forcing the async gpu relocations does reveal a few issues they have. Firstly, is that they are visible as writes to gem_busy, causing to mark some buffers are being to written to by the GPU even though userspace only reads. Secondly is that, in combination with the cmdparser, they can cause priority inversions. This should be the case where the work is being put into a common workqueue losing our priority information and so being executed in FIFO from the worker, denying us the opportunity to reorder the requests afterwards. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200604211457.19696-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 293 ++---------------- .../i915/gem/selftests/i915_gem_execbuffer.c | 21 +- 2 files changed, 26 insertions(+), 288 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 340e7f108baf..cfe6d2cdbef1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -45,13 +45,6 @@ struct eb_vma_array { struct eb_vma vma[]; }; -enum { - FORCE_CPU_RELOC = 1, - FORCE_GTT_RELOC, - FORCE_GPU_RELOC, -#define DBG_FORCE_RELOC 0 /* choose one of the above! */ -}; - #define __EXEC_OBJECT_HAS_PIN BIT(31) #define __EXEC_OBJECT_HAS_FENCE BIT(30) #define __EXEC_OBJECT_NEEDS_MAP BIT(29) @@ -260,8 +253,6 @@ struct i915_execbuffer { */ struct reloc_cache { struct drm_mm_node node; /** temporary GTT binding */ - unsigned long vaddr; /** Current kmap address */ - unsigned long page; /** Currently mapped page index */ unsigned int gen; /** Cached value of INTEL_GEN */ bool use_64bit_reloc : 1; bool has_llc : 1; @@ -605,23 +596,6 @@ eb_add_vma(struct i915_execbuffer *eb, } } -static inline int use_cpu_reloc(const struct reloc_cache *cache, - const struct drm_i915_gem_object *obj) -{ - if (!i915_gem_object_has_struct_page(obj)) - return false; - - if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) - return true; - - if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) - return false; - - return (cache->has_llc || - obj->cache_dirty || - obj->cache_level != I915_CACHE_NONE); -} - static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) @@ -945,8 +919,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { - cache->page = -1; - cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ cache->gen = INTEL_GEN(i915); cache->has_llc = HAS_LLC(i915); @@ -1089,181 +1061,6 @@ static int reloc_gpu_flush(struct reloc_cache *cache) return err; } -static void reloc_cache_reset(struct reloc_cache *cache) -{ - void *vaddr; - - if (!cache->vaddr) - return; - - vaddr = unmask_page(cache->vaddr); - if (cache->vaddr & KMAP) { - if (cache->vaddr & CLFLUSH_AFTER) - mb(); - - kunmap_atomic(vaddr); - i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); - } else { - struct i915_ggtt *ggtt = cache_to_ggtt(cache); - - intel_gt_flush_ggtt_writes(ggtt->vm.gt); - io_mapping_unmap_atomic((void __iomem *)vaddr); - - if (drm_mm_node_allocated(&cache->node)) { - ggtt->vm.clear_range(&ggtt->vm, - cache->node.start, - cache->node.size); - mutex_lock(&ggtt->vm.mutex); - drm_mm_remove_node(&cache->node); - mutex_unlock(&ggtt->vm.mutex); - } else { - i915_vma_unpin((struct i915_vma *)cache->node.mm); - } - } - - cache->vaddr = 0; - cache->page = -1; -} - -static void *reloc_kmap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - void *vaddr; - - if (cache->vaddr) { - kunmap_atomic(unmask_page(cache->vaddr)); - } else { - unsigned int flushes; - int err; - - err = i915_gem_object_prepare_write(obj, &flushes); - if (err) - return ERR_PTR(err); - - BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); - BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); - - cache->vaddr = flushes | KMAP; - cache->node.mm = (void *)obj; - if (flushes) - mb(); - } - - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); - cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; - cache->page = page; - - return vaddr; -} - -static void *reloc_iomap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - struct i915_ggtt *ggtt = cache_to_ggtt(cache); - unsigned long offset; - void *vaddr; - - if (cache->vaddr) { - intel_gt_flush_ggtt_writes(ggtt->vm.gt); - io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); - } else { - struct i915_vma *vma; - int err; - - if (i915_gem_object_is_tiled(obj)) - return ERR_PTR(-EINVAL); - - if (use_cpu_reloc(cache, obj)) - return NULL; - - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - return ERR_PTR(err); - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); - if (IS_ERR(vma)) { - memset(&cache->node, 0, sizeof(cache->node)); - mutex_lock(&ggtt->vm.mutex); - err = drm_mm_insert_node_in_range - (&ggtt->vm.mm, &cache->node, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - mutex_unlock(&ggtt->vm.mutex); - if (err) /* no inactive aperture space, use cpu reloc */ - return NULL; - } else { - cache->node.start = vma->node.start; - cache->node.mm = (void *)vma; - } - } - - offset = cache->node.start; - if (drm_mm_node_allocated(&cache->node)) { - ggtt->vm.insert_page(&ggtt->vm, - i915_gem_object_get_dma_address(obj, page), - offset, I915_CACHE_NONE, 0); - } else { - offset += page << PAGE_SHIFT; - } - - vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, - offset); - cache->page = page; - cache->vaddr = (unsigned long)vaddr; - - return vaddr; -} - -static void *reloc_vaddr(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - void *vaddr; - - if (cache->page == page) { - vaddr = unmask_page(cache->vaddr); - } else { - vaddr = NULL; - if ((cache->vaddr & KMAP) == 0) - vaddr = reloc_iomap(obj, cache, page); - if (!vaddr) - vaddr = reloc_kmap(obj, cache, page); - } - - return vaddr; -} - -static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) -{ - if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { - if (flushes & CLFLUSH_BEFORE) { - clflushopt(addr); - mb(); - } - - *addr = value; - - /* - * Writes to the same cacheline are serialised by the CPU - * (including clflush). On the write path, we only require - * that it hits memory in an orderly fashion and place - * mb barriers at the start and end of the relocation phase - * to ensure ordering of clflush wrt to the system. - */ - if (flushes & CLFLUSH_AFTER) - clflushopt(addr); - } else - *addr = value; -} - static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -1429,17 +1226,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return cmd; } -static inline bool use_reloc_gpu(struct i915_vma *vma) -{ - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) - return true; - - if (DBG_FORCE_RELOC) - return false; - - return !dma_resv_test_signaled_rcu(vma->resv, true); -} - static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) { struct page *page; @@ -1454,10 +1240,10 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) return addr + offset_in_page(offset); } -static bool __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) +static int __reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; @@ -1473,7 +1259,7 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb, batch = reloc_gpu(eb, vma, len); if (IS_ERR(batch)) - return false; + return PTR_ERR(batch); addr = gen8_canonical_addr(vma->node.start + offset); if (gen >= 8) { @@ -1522,55 +1308,21 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = target_addr; } - return true; -} - -static bool reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - if (eb->reloc_cache.vaddr) - return false; - - if (!use_reloc_gpu(vma)) - return false; - - return __reloc_entry_gpu(eb, vma, offset, target_addr); + return 0; } static u64 -relocate_entry(struct i915_vma *vma, +relocate_entry(struct i915_execbuffer *eb, + struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, - struct i915_execbuffer *eb, const struct i915_vma *target) { u64 target_addr = relocation_target(reloc, target); - u64 offset = reloc->offset; + int err; - if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; - -repeat: - vaddr = reloc_vaddr(vma->obj, - &eb->reloc_cache, - offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_addr), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_addr >>= 32; - wide = false; - goto repeat; - } - } + err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr); + if (err) + return err; return target->node.start | UPDATE; } @@ -1635,8 +1387,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, * If the relocation already has the right value in it, no * more work needs to be done. */ - if (!DBG_FORCE_RELOC && - gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) + if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -1668,7 +1419,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ev->flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ - return relocate_entry(ev->vma, reloc, eb, target->vma); + return relocate_entry(eb, ev->vma, reloc, target->vma); } static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) @@ -1706,10 +1457,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * this is bad and so lockdep complains vehemently. */ copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); - if (unlikely(copied)) { - remain = -EFAULT; - goto out; - } + if (unlikely(copied)) + return -EFAULT; remain -= count; do { @@ -1717,8 +1466,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) if (likely(offset == 0)) { } else if ((s64)offset < 0) { - remain = (int)offset; - goto out; + return (int)offset; } else { /* * Note that reporting an error now @@ -1748,9 +1496,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); -out: - reloc_cache_reset(&eb->reloc_cache); - return remain; + + return 0; } static int eb_relocate(struct i915_execbuffer *eb) @@ -2658,7 +2405,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.i915 = i915; eb.file = file; eb.args = args; - if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) + if (!(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index a49016f8ee0d..57c14d3340cd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -37,20 +37,14 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, return err; /* 8-Byte aligned */ - if (!__reloc_entry_gpu(eb, vma, - offsets[0] * sizeof(u32), - 0)) { - err = -EIO; + err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); + if (err) goto unpin_vma; - } /* !8-Byte aligned */ - if (!__reloc_entry_gpu(eb, vma, - offsets[1] * sizeof(u32), - 1)) { - err = -EIO; + err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); + if (err) goto unpin_vma; - } /* Skip to the end of the cmd page */ i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; @@ -60,12 +54,9 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, eb->reloc_cache.rq_size += i; /* Force batch chaining */ - if (!__reloc_entry_gpu(eb, vma, - offsets[2] * sizeof(u32), - 2)) { - err = -EIO; + err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); + if (err) goto unpin_vma; - } GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); From 684f1a1bf92e6d0406fac1007da5ceee4c6960a3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 5 Jun 2020 07:19:53 -0700 Subject: [PATCH 134/222] drm/i915: Fix comments mentioning typo in IS_ENABLED() This has no code changes, but the typo is clearly getting copy/pasted, so better to avoid this now and fix the typo. IS_ENABLED() takes full names, and must have the "CONFIG_" prefix. Reported-by: Joe Perches Link: https://lore.kernel.org/lkml/b08611018fdb6d88757c6008a5c02fa0e07b32fb.camel@perches.com Signed-off-by: Kees Cook Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/202006050718.9D4FCFC2E@keescook --- drivers/dma-buf/selftests.h | 2 +- drivers/gpu/drm/i915/selftests/i915_live_selftests.h | 4 ++-- drivers/gpu/drm/i915/selftests/i915_mock_selftests.h | 4 ++-- drivers/gpu/drm/i915/selftests/i915_perf_selftests.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index 5320386f02e5..42fc17a88b4b 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -5,7 +5,7 @@ * a module parameter. It must be unique and legal for a C identifier. * * The function should be of type int function(void). It may be conditionally - * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * compiled using #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST). * * Tests are executed in order by igt/dmabuf_selftest */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 5dd5d81646c4..a92c0e9b7e6b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,9 +11,9 @@ * a module parameter. It must be unique and legal for a C identifier. * * The function should be of type int function(void). It may be conditionally - * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * compiled using #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST). * - * Tests are executed in order by igt/drv_selftest + * Tests are executed in order by igt/i915_selftest */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 6a2be7d0dd95..1929feba4e8e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -11,9 +11,9 @@ * a module parameter. It must be unique and legal for a C identifier. * * The function should be of type int function(void). It may be conditionally - * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * compiled using #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST). * - * Tests are executed in order by igt/drv_selftest + * Tests are executed in order by igt/i915_selftest */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(shmem, shmem_utils_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index d8da142985eb..c2389f8a257d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -11,7 +11,7 @@ * a module parameter. It must be unique and legal for a C identifier. * * The function should be of type int function(void). It may be conditionally - * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * compiled using #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST). * * Tests are executed in order by igt/i915_selftest */ From ac4fc5b38d1abc039d61eecd15c12fc6fbeb7f33 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2020 15:47:05 +0100 Subject: [PATCH 135/222] drm/i915/gt: Include the engine's fw-domains in the debug info Add engine->fw_domain/active to the pretty printer for debug dumps and debugfs. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Venkata Sandeep Dhanalakota Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200605144705.31127-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e37490d459c2..e5141a897786 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1519,6 +1519,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, yesno(!llist_empty(&engine->barrier_tasks))); drm_printf(m, "\tLatency: %luus\n", ewma__engine_latency_read(&engine->latency)); + drm_printf(m, "\tForcewake: %x domains, %d active\n", + engine->fw_domain, atomic_read(&engine->fw_active)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); From fdd4f9416511d7a1f0a84a75052a3476dd163438 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2020 13:23:25 +0100 Subject: [PATCH 136/222] drm/i915/gt: Set timeslicing priority from queue If we only submit the first port, leaving the second empty yet have ready requests pending in the queue, use that to set the timeslicing priority (i.e. the priority at which we will decided to enabling timeslicing and evict the currently active context if the queue is of equal priority after its quantum expired). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200605122334.2798-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 33b7173b7195..92c3368ffcbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1966,7 +1966,7 @@ static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { if (list_is_last(&rq->sched.link, &engine->active.requests)) - return INT_MIN; + return engine->execlists.queue_priority_hint; return rq_prio(list_next_entry(rq, sched.link)); } From 12b67c2e9c58f6c716efcd5cfc9411d7e5f45930 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2020 13:23:26 +0100 Subject: [PATCH 137/222] drm/i915/gt: Always check to enable timeslicing if not submitting We may choose not to submit for a number of reasons, yet not fill both ELSP. In which case we must start timeslicing (there will be no ACK event on which to hook the start) if the queue would benefit from the currently active context being evicted. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200605122334.2798-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 92c3368ffcbd..d55a5e0466e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2362,10 +2362,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->context == rq->context) goto done; - if (i915_request_has_sentinel(last)) { - start_timeslice(engine, rq_prio(rq)); + if (i915_request_has_sentinel(last)) goto done; - } /* * If GVT overrides us we only ever submit @@ -2446,6 +2444,7 @@ done: set_preempt_timeout(engine, *active); execlists_submit_ports(engine); } else { + start_timeslice(engine, execlists->queue_priority_hint); skip_submit: ring_set_paused(engine, 0); } From 9bdcaa5e3a2fb0a18d8c7a49bd64a52bce105bd2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2020 17:52:58 +0100 Subject: [PATCH 138/222] drm/i915: Discard a misplaced GGTT vma Across the many users of the GGTT vma (internal objects, mmapings, display etc), we may end up with conflicting requirements for the placement. Currently, we try to resolve the conflict by unbinding the vma and rebinding it to match the new constraints; over time we will end up with a GGTT that matches the most strict constraints over all concurrent users. However, this causes a problem if the vma is currently in use as we must wait until it is idle before moving it. But there is no restriction on the number of views we may use (apart from the limited size of the GGTT itself), and so if the active vma does not meet our requirements, try and build a new one! Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200605165258.1483-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0cbcb9f54e7d..f1acd1889d37 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -933,6 +933,45 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } +static bool +discard_ggtt_vma(struct i915_vma *vma, const struct i915_ggtt_view *view) +{ + const struct i915_ggtt_view discard = { + .type = I915_GGTT_VIEW_PARTIAL, + }; + struct drm_i915_gem_object *obj = vma->obj; + + spin_lock(&obj->vma.lock); + if (i915_vma_compare(vma, vma->vm, &discard)) { + struct rb_node *rb, **p; + + rb_erase(&vma->obj_node, &obj->vma.tree); + vma->ggtt_view = discard; + GEM_BUG_ON(i915_vma_compare(vma, vma->vm, view)); + + rb = NULL; + p = &obj->vma.tree.rb_node; + while (*p) { + struct i915_vma *pos; + long cmp; + + rb = *p; + pos = rb_entry(rb, struct i915_vma, obj_node); + + cmp = i915_vma_compare(pos, vma->vm, &discard); + if (cmp < 0) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&vma->obj_node, rb, p); + rb_insert_color(&vma->obj_node, &obj->vma.tree); + } + spin_unlock(&obj->vma.lock); + + return i915_vma_compare(vma, vma->vm, view); +} + struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -979,6 +1018,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); } +new_vma: vma = i915_vma_instance(obj, &ggtt->vm, view); if (IS_ERR(vma)) return vma; @@ -993,6 +1033,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); } + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) { + if (discard_ggtt_vma(vma, view)) + goto new_vma; + } + ret = i915_vma_unbind(vma); if (ret) return ERR_PTR(ret); From 84d24cb5247a356a4310a25761f8aa56b8814538 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2020 19:48:44 +0100 Subject: [PATCH 139/222] drm/i915: Correct discard i915_vma_compare assertion As a last minute addition, I added an assertion to make sure that the new i915_vma view would be equal to the discard. However, the positive encouragement from CI only goes to show that we rarely take this path, and it wasn't until the post-merge run did we hit the assert -- because it compared the wrong view. Fixup the copy'n'paste error and compare against both the old view and the expected new view. Fixes: 9bdcaa5e3a2f ("drm/i915: Discard a misplaced GGTT vma") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200605184844.24644-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f1acd1889d37..41553e9e57a9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -947,7 +947,8 @@ discard_ggtt_vma(struct i915_vma *vma, const struct i915_ggtt_view *view) rb_erase(&vma->obj_node, &obj->vma.tree); vma->ggtt_view = discard; - GEM_BUG_ON(i915_vma_compare(vma, vma->vm, view)); + GEM_BUG_ON(i915_vma_compare(vma, vma->vm, &discard)); + GEM_BUG_ON(i915_vma_compare(vma, vma->vm, view) == 0); rb = NULL; p = &obj->vma.tree.rb_node; From 7ac2d2536dfa71c275a74813345779b1e7522c91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2020 21:03:57 +0100 Subject: [PATCH 140/222] drm/i915/gem: Delete unused code Unused as of commit 9e0f9464e2ab ("drm/i915/gem: Async GPU relocations only"), but left behind. >> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:933:21: error: unused function 'unmask_page' [-Werror,-Wunused-function] static inline void *unmask_page(unsigned long p) ^ >> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:938:28: error: unused function 'unmask_flags' [-Werror,-Wunused-function] static inline unsigned int unmask_flags(unsigned long p) ^ >> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:945:33: error: unused function 'cache_to_ggtt' [-Werror,-Wunused-function] static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) Reported-by: kernel test robot Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200605200357.13069-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cfe6d2cdbef1..23db79b806db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -930,25 +930,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->target = NULL; } -static inline void *unmask_page(unsigned long p) -{ - return (void *)(uintptr_t)(p & PAGE_MASK); -} - -static inline unsigned int unmask_flags(unsigned long p) -{ - return p & ~PAGE_MASK; -} - -#define KMAP 0x4 /* after CLFLUSH_FLAGS */ - -static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) -{ - struct drm_i915_private *i915 = - container_of(cache, struct i915_execbuffer, reloc_cache)->i915; - return &i915->ggtt; -} - #define RELOC_TAIL 4 static int reloc_gpu_chain(struct reloc_cache *cache) From 8c1a8f12f4a3b4223348a109f2f5c8d46819a691 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 5 Jun 2020 20:18:03 -0700 Subject: [PATCH 141/222] drm/i915: Restore DP-E to VBT mapping table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We accidentally dropped matching for DVO_PORT_DPE from the VBT mapping table when we refactored the function. Restore it. Fixes: 4628142aeccc ("drm/i915/rkl: provide port/phy mapping for vbt") Cc: Lucas De Marchi Cc: Matt Roper Cc: Ville Syrjälä Cc: Chris Wilson Signed-off-by: Matt Roper Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200606031803.3309624-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 4f1a72a90b8f..c974c716f859 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1650,7 +1650,7 @@ static enum port dvo_port_to_port(struct drm_i915_private *dev_priv, [PORT_B] = { DVO_PORT_HDMIB, DVO_PORT_DPB, -1 }, [PORT_C] = { DVO_PORT_HDMIC, DVO_PORT_DPC, -1 }, [PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1 }, - [PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, -1 }, + [PORT_E] = { DVO_PORT_HDMIE, DVO_PORT_DPE, DVO_PORT_CRT }, [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1 }, [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1 }, }; From 64cf40a125ffd293a37b0a299372e3602dd62932 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Sun, 7 Jun 2020 17:36:14 +0300 Subject: [PATCH 142/222] drm/i915/psr: Program default IO buffer Wake and Fast Wake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IO buffer Wake and Fast Wake bit size and value have been changed from Gen12+. It programs the default value of IO buffer Wake and Fast Wake on Gen12+. It adds definitions of IO buffer Wake and Fast Wake for pre Gen12 and Gen12+. And it aligns PSR2 definition macros. v2: Fix macro definitions. (José) v3: Addressed review comments from José - Add missing default values of IO_BUFFER_WAKE and FAST_WAKE for GEN9+ - Change a style of macro naming in order to use lines as input. - Update Todo comments. v4: Add parentheses to macros to avoid precedence issues. Cc: José Roberto de Souza Signed-off-by: Gwan-gyeong Mun Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200607143614.185246-1-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 16 ++++++++ drivers/gpu/drm/i915/i915_reg.h | 52 +++++++++++++++--------- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 7a0011e42e00..ab380e6dc674 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -537,6 +537,22 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); val |= intel_psr2_get_tp_time(intel_dp); + if (INTEL_GEN(dev_priv) >= 12) { + /* + * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default + * values from BSpec. In order to setting an optimal power + * consumption, lower than 4k resoluition mode needs to decrese + * IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution + * mode needs to increase IO_BUFFER_WAKE and FAST_WAKE. + */ + val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2; + val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(7); + val |= TGL_EDP_PSR2_FAST_WAKE(7); + } else if (INTEL_GEN(dev_priv) >= 9) { + val |= EDP_PSR2_IO_BUFFER_WAKE(7); + val |= EDP_PSR2_FAST_WAKE(7); + } + /* * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 814a70945468..4066f67175dc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4511,25 +4511,39 @@ enum { #define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1 << 16) /* Reserved in ICL+ */ #define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1 << 15) /* SKL+ */ -#define _PSR2_CTL_A 0x60900 -#define _PSR2_CTL_EDP 0x6f900 -#define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) -#define EDP_PSR2_ENABLE (1 << 31) -#define EDP_SU_TRACK_ENABLE (1 << 30) -#define EDP_Y_COORDINATE_VALID (1 << 26) /* GLK and CNL+ */ -#define EDP_Y_COORDINATE_ENABLE (1 << 25) /* GLK and CNL+ */ -#define EDP_MAX_SU_DISABLE_TIME(t) ((t) << 20) -#define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f << 20) -#define EDP_PSR2_TP2_TIME_500us (0 << 8) -#define EDP_PSR2_TP2_TIME_100us (1 << 8) -#define EDP_PSR2_TP2_TIME_2500us (2 << 8) -#define EDP_PSR2_TP2_TIME_50us (3 << 8) -#define EDP_PSR2_TP2_TIME_MASK (3 << 8) -#define EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4 -#define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf << 4) -#define EDP_PSR2_FRAME_BEFORE_SU(a) ((a) << 4) -#define EDP_PSR2_IDLE_FRAME_MASK 0xf -#define EDP_PSR2_IDLE_FRAME_SHIFT 0 +#define _PSR2_CTL_A 0x60900 +#define _PSR2_CTL_EDP 0x6f900 +#define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) +#define EDP_PSR2_ENABLE (1 << 31) +#define EDP_SU_TRACK_ENABLE (1 << 30) +#define TGL_EDP_PSR2_BLOCK_COUNT_NUM_2 (0 << 28) +#define TGL_EDP_PSR2_BLOCK_COUNT_NUM_3 (1 << 28) +#define EDP_Y_COORDINATE_VALID (1 << 26) /* GLK and CNL+ */ +#define EDP_Y_COORDINATE_ENABLE (1 << 25) /* GLK and CNL+ */ +#define EDP_MAX_SU_DISABLE_TIME(t) ((t) << 20) +#define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f << 20) +#define EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES 8 +#define EDP_PSR2_IO_BUFFER_WAKE(lines) ((EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES - (lines)) << 13) +#define EDP_PSR2_IO_BUFFER_WAKE_MASK (3 << 13) +#define TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES 5 +#define TGL_EDP_PSR2_IO_BUFFER_WAKE(lines) (((lines) - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES) << 13) +#define TGL_EDP_PSR2_IO_BUFFER_WAKE_MASK (7 << 13) +#define EDP_PSR2_FAST_WAKE_MAX_LINES 8 +#define EDP_PSR2_FAST_WAKE(lines) ((EDP_PSR2_FAST_WAKE_MAX_LINES - (lines)) << 11) +#define EDP_PSR2_FAST_WAKE_MASK (3 << 11) +#define TGL_EDP_PSR2_FAST_WAKE_MIN_LINES 5 +#define TGL_EDP_PSR2_FAST_WAKE(lines) (((lines) - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES) << 10) +#define TGL_EDP_PSR2_FAST_WAKE_MASK (7 << 10) +#define EDP_PSR2_TP2_TIME_500us (0 << 8) +#define EDP_PSR2_TP2_TIME_100us (1 << 8) +#define EDP_PSR2_TP2_TIME_2500us (2 << 8) +#define EDP_PSR2_TP2_TIME_50us (3 << 8) +#define EDP_PSR2_TP2_TIME_MASK (3 << 8) +#define EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4 +#define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf << 4) +#define EDP_PSR2_FRAME_BEFORE_SU(a) ((a) << 4) +#define EDP_PSR2_IDLE_FRAME_MASK 0xf +#define EDP_PSR2_IDLE_FRAME_SHIFT 0 #define _PSR_EVENT_TRANS_A 0x60848 #define _PSR_EVENT_TRANS_B 0x61848 From 46d53e271cea5740a19384c4365217c9cb86bdfc Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Mon, 8 Jun 2020 09:55:52 +0300 Subject: [PATCH 143/222] Revert "drm/i915: Remove unneeded hack now for CDCLK" This reverts commit 82ea174dc5425d4e85e25d0c4ba961a2e494392a. Unfortunately according to our recent findings there is still some unidentified factor, requiring CDCLK to be set higher - otherwise we still get underruns on some multipipe configurations, despite CDCLK being set according to BSpec formula. So getting again back into debug mode to indentify the cause, meanwhile setting CDCLK=Pixel rate back in order to remove regression in 10% of the cases due to FIFO underruns. Signed-off-by: Stanislav Lisovskiy Fixes: cd1915460861 ("drm/i915: Adjust CDCLK accordingly to our DBuf bw needs") Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200608065552.21728-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 08468b121d02..45f7f33d1144 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2071,6 +2071,18 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) /* Account for additional needs from the planes */ min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); + /* + * HACK. Currently for TGL platforms we calculate + * min_cdclk initially based on pixel_rate divided + * by 2, accounting for also plane requirements, + * however in some cases the lowest possible CDCLK + * doesn't work and causing the underruns. + * Explicitly stating here that this seems to be currently + * rather a Hack, than final solution. + */ + if (IS_TIGERLAKE(dev_priv)) + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); + if (min_cdclk > dev_priv->max_cdclk_freq) { drm_dbg_kms(&dev_priv->drm, "required cdclk (%d kHz) exceeds max (%d kHz)\n", From 8733a06323d40ecfb8208ee5f85bf9d39ce6fd34 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Sun, 7 Jun 2020 23:20:41 +0100 Subject: [PATCH 144/222] drm/i915: Adjust the sentinel assert to match implementation Sentinels are supposed to be last requests in the elsp queue, not the only one, so adjust the assert accordingly. Signed-off-by: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200607222108.14401-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_lrc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d55a5e0466e5..a057f7a2a521 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1635,9 +1635,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, ccid = ce->lrc.ccid; /* - * Sentinels are supposed to be lonely so they flush the - * current exection off the HW. Check that they are the - * only request in the pending submission. + * Sentinels are supposed to be the last request so they flush + * the current execution off the HW. Check that they are the only + * request in the pending submission. */ if (sentinel) { GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", @@ -1646,15 +1646,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, port - execlists->pending); return false; } - sentinel = i915_request_has_sentinel(rq); - if (sentinel && port != execlists->pending) { - GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - return false; - } /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) From 94ed47531d7cd9c60109f52bba582aa85b7c4415 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 7 Jun 2020 23:20:42 +0100 Subject: [PATCH 145/222] drm/i915/selftests: Make the hanging request non-preemptible In some of our hangtests, we try to reset an active engine while it is spinning inside the recursive spinner. However, we also try to flood the engine with requests that preempt the hang, and so should disable the preemption to be sure that we reset the right request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200607222108.14401-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 36 ++++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 4aa4cc917d8b..035f363fb0f8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -203,12 +203,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); @@ -217,12 +217,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); } else if (INTEL_GEN(gt->i915) >= 4) { @@ -230,24 +230,24 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } else { *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } @@ -866,13 +866,29 @@ static int __igt_reset_engines(struct intel_gt *gt, count++; if (rq) { + if (rq->fence.error != -EIO) { + pr_err("i915_reset_engine(%s:%s):" + " failed to reset request %llx:%lld\n", + engine->name, test_name, + rq->fence.context, + rq->fence.seqno); + i915_request_put(rq); + + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + break; + } + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" - " failed to complete request after reset\n", - engine->name, test_name); + " failed to complete request %llx:%lld after reset\n", + engine->name, test_name, + rq->fence.context, + rq->fence.seqno); intel_engine_dump(engine, &p, "%s\n", engine->name); i915_request_put(rq); From 62afef2811e4171ddde5bf4985008f06e57a8431 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 5 Jun 2020 19:57:34 -0700 Subject: [PATCH 146/222] drm/i915/rkl: RKL uses ABOX0 for pixel transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rocket Lake uses the same 'abox0' mechanism to handle pixel data transfers from memory that gen11 platforms used, rather than the abox1/abox2 interfaces used by TGL/DG1. For the most part this is a hardware implementation detail that's transparent to driver software, but we do have to program a couple of tuning registers (MBUS_ABOX_CTL and BW_BUDDY registers) according to which ABOX instances are used by a platform. Let's track the platform's ABOX usage in the device info structure and use that to determine which instances of these registers to program. As an exception to this rule is that even though TGL/DG1 use ABOX1+ABOX2 for data transfers, we're still directed to program the ABOX_CTL register for ABOX0; so we'll handle that as a special case. v2: - Store the mask of platform-specific abox registers in the device info structure. - Add a TLB_REQ_TIMER() helper macro. (Aditya) v3: - Squash ABOX and BW_BUDDY patches together and use a single mask for both of them, plus a special-case for programming the ABOX0 instance on all gen12. (Ville) Bspec: 50096 Bspec: 49218 Cc: Ville Syrjälä Cc: Aditya Swarup Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200606025740.3308880-2-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- .../drm/i915/display/intel_display_power.c | 53 ++++++++++--------- drivers/gpu/drm/i915/i915_pci.c | 3 ++ drivers/gpu/drm/i915/i915_reg.h | 24 ++++++--- drivers/gpu/drm/i915/intel_device_info.h | 2 + 4 files changed, 51 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 72312b67b57a..24a2aa1fdc9c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4760,7 +4760,8 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) static void icl_mbus_init(struct drm_i915_private *dev_priv) { - u32 mask, val; + unsigned long abox_regs = INTEL_INFO(dev_priv)->abox_mask; + u32 mask, val, i; mask = MBUS_ABOX_BT_CREDIT_POOL1_MASK | MBUS_ABOX_BT_CREDIT_POOL2_MASK | @@ -4771,11 +4772,16 @@ static void icl_mbus_init(struct drm_i915_private *dev_priv) MBUS_ABOX_B_CREDIT(1) | MBUS_ABOX_BW_CREDIT(1); - intel_de_rmw(dev_priv, MBUS_ABOX_CTL, mask, val); - if (INTEL_GEN(dev_priv) >= 12) { - intel_de_rmw(dev_priv, MBUS_ABOX1_CTL, mask, val); - intel_de_rmw(dev_priv, MBUS_ABOX2_CTL, mask, val); - } + /* + * gen12 platforms that use abox1 and abox2 for pixel data reads still + * expect us to program the abox_ctl0 register as well, even though + * we don't have to program other instance-0 registers like BW_BUDDY. + */ + if (IS_GEN(dev_priv, 12)) + abox_regs |= BIT(0); + + for_each_set_bit(i, &abox_regs, sizeof(abox_regs)) + intel_de_rmw(dev_priv, MBUS_ABOX_CTL(i), mask, val); } static void hsw_assert_cdclk(struct drm_i915_private *dev_priv) @@ -5254,7 +5260,8 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) enum intel_dram_type type = dev_priv->dram_info.type; u8 num_channels = dev_priv->dram_info.num_channels; const struct buddy_page_mask *table; - int i; + unsigned long abox_mask = INTEL_INFO(dev_priv)->abox_mask; + int config, i; if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B0)) /* Wa_1409767108: tgl */ @@ -5262,29 +5269,27 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) else table = tgl_buddy_page_masks; - for (i = 0; table[i].page_mask != 0; i++) - if (table[i].num_channels == num_channels && - table[i].type == type) + for (config = 0; table[config].page_mask != 0; config++) + if (table[config].num_channels == num_channels && + table[config].type == type) break; - if (table[i].page_mask == 0) { + if (table[config].page_mask == 0) { drm_dbg(&dev_priv->drm, "Unknown memory configuration; disabling address buddy logic.\n"); - intel_de_write(dev_priv, BW_BUDDY1_CTL, BW_BUDDY_DISABLE); - intel_de_write(dev_priv, BW_BUDDY2_CTL, BW_BUDDY_DISABLE); + for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) + intel_de_write(dev_priv, BW_BUDDY_CTL(i), + BW_BUDDY_DISABLE); } else { - intel_de_write(dev_priv, BW_BUDDY1_PAGE_MASK, - table[i].page_mask); - intel_de_write(dev_priv, BW_BUDDY2_PAGE_MASK, - table[i].page_mask); + for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) { + intel_de_write(dev_priv, BW_BUDDY_PAGE_MASK(i), + table[config].page_mask); - /* Wa_22010178259:tgl */ - intel_de_rmw(dev_priv, BW_BUDDY1_CTL, - BW_BUDDY_TLB_REQ_TIMER_MASK, - REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, 0x8)); - intel_de_rmw(dev_priv, BW_BUDDY2_CTL, - BW_BUDDY_TLB_REQ_TIMER_MASK, - REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, 0x8)); + /* Wa_22010178259:tgl,rkl */ + intel_de_rmw(dev_priv, BW_BUDDY_CTL(i), + BW_BUDDY_TLB_REQ_TIMER_MASK, + BW_BUDDY_TLB_REQ_TIMER(0x8)); + } } } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 8d0212d65828..498d8c982540 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -804,6 +804,7 @@ static const struct intel_device_info cnl_info = { #define GEN11_FEATURES \ GEN10_FEATURES, \ GEN11_DEFAULT_PAGE_SIZES, \ + .abox_mask = BIT(0), \ .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \ BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1), \ @@ -847,6 +848,7 @@ static const struct intel_device_info ehl_info = { #define GEN12_FEATURES \ GEN11_FEATURES, \ GEN(12), \ + .abox_mask = GENMASK(2, 1), \ .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_D) | \ @@ -882,6 +884,7 @@ static const struct intel_device_info tgl_info = { static const struct intel_device_info rkl_info = { GEN12_FEATURES, PLATFORM(INTEL_ROCKETLAKE), + .abox_mask = BIT(0), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4066f67175dc..9aca6d778220 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2879,9 +2879,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define LM_FIFO_WATERMARK 0x0000001F #define MI_ARB_STATE _MMIO(0x20e4) /* 915+ only */ -#define MBUS_ABOX_CTL _MMIO(0x45038) -#define MBUS_ABOX1_CTL _MMIO(0x45048) -#define MBUS_ABOX2_CTL _MMIO(0x4504C) +#define _MBUS_ABOX0_CTL 0x45038 +#define _MBUS_ABOX1_CTL 0x45048 +#define _MBUS_ABOX2_CTL 0x4504C +#define MBUS_ABOX_CTL(x) _MMIO(_PICK(x, _MBUS_ABOX0_CTL, \ + _MBUS_ABOX1_CTL, \ + _MBUS_ABOX2_CTL)) #define MBUS_ABOX_BW_CREDIT_MASK (3 << 20) #define MBUS_ABOX_BW_CREDIT(x) ((x) << 20) #define MBUS_ABOX_B_CREDIT_MASK (0xF << 16) @@ -7853,13 +7856,20 @@ enum { #define WAIT_FOR_PCH_RESET_ACK (1 << 1) #define WAIT_FOR_PCH_FLR_ACK (1 << 0) -#define BW_BUDDY1_CTL _MMIO(0x45140) -#define BW_BUDDY2_CTL _MMIO(0x45150) +#define _BW_BUDDY0_CTL 0x45130 +#define _BW_BUDDY1_CTL 0x45140 +#define BW_BUDDY_CTL(x) _MMIO(_PICK_EVEN(x, \ + _BW_BUDDY0_CTL, \ + _BW_BUDDY1_CTL)) #define BW_BUDDY_DISABLE REG_BIT(31) #define BW_BUDDY_TLB_REQ_TIMER_MASK REG_GENMASK(21, 16) +#define BW_BUDDY_TLB_REQ_TIMER(x) REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, x) -#define BW_BUDDY1_PAGE_MASK _MMIO(0x45144) -#define BW_BUDDY2_PAGE_MASK _MMIO(0x45154) +#define _BW_BUDDY0_PAGE_MASK 0x45134 +#define _BW_BUDDY1_PAGE_MASK 0x45144 +#define BW_BUDDY_PAGE_MASK(x) _MMIO(_PICK_EVEN(x, \ + _BW_BUDDY0_PAGE_MASK, \ + _BW_BUDDY1_PAGE_MASK)) #define HSW_NDE_RSTWRN_OPT _MMIO(0x46408) #define RESET_PCH_HANDSHAKE_ENABLE (1 << 4) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 34dbffd65bad..8d62b8538585 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -175,6 +175,8 @@ struct intel_device_info { u8 pipe_mask; u8 cpu_transcoder_mask; + u8 abox_mask; + #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG From 2cf122070c5681ea78d384c4e86a7d80f16bd1b7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 5 Jun 2020 19:57:36 -0700 Subject: [PATCH 147/222] drm/i915/rkl: Update TGP's pin mapping when paired with RKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HPD pin handling for RKL+TGP is a special case; we effectively select the HPD pin based on the DDI (A,B,D,E) rather than the PHY (A,B,C,D). This differs from the regular behavior of RKL+CMP (and also TGL+TGP). v2: - Rather than providing a custom hpd_pin mapping table, just assign encoder->hpd_pin in a custom manner for this setup. (Ville) Cc: Ville Syrjälä Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200606025740.3308880-4-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_hotplug.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 4f6f560e093e..d794dd5f170c 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -89,6 +89,15 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, { enum phy phy = intel_port_to_phy(dev_priv, port); + /* + * RKL + TGP PCH is a special case; we effectively choose the hpd_pin + * based on the DDI rather than the PHY (i.e., the last two outputs + * shold be HPD_PORT_{D,E} rather than {C,D}. Note that this differs + * from the behavior of both TGL+TGP and RKL+CMP. + */ + if (IS_ROCKETLAKE(dev_priv) && HAS_PCH_TGP(dev_priv)) + return HPD_PORT_A + port - PORT_A; + switch (phy) { case PHY_F: return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F; From 617458cdc366c1c00e5934b6513f3d2b2c264c14 Mon Sep 17 00:00:00 2001 From: Aditya Swarup Date: Fri, 5 Jun 2020 19:57:37 -0700 Subject: [PATCH 148/222] drm/i915/rkl: Don't try to read out DSI transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RKL doesn't have DSI outputs, so we shouldn't try to read out the DSI transcoder registers. v2(MattR): - Just set the 'extra panel mask' to edp | dsi0 | dsi1 and then mask against the platform's cpu_transcoder_mask to filter out the ones that don't exist on a given platform. (Ville) v3(MattR): - Only include DSI transcoders on gen11+ again. (Ville) - Use for_each_cpu_transcoder_masked() for loop. (Ville) Cc: Ville Syrjälä Signed-off-by: Aditya Swarup Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200606025740.3308880-5-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_display.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3e6ef5bf1284..34d1b7f1b140 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10897,7 +10897,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum intel_display_power_domain power_domain; - unsigned long panel_transcoder_mask = 0; + unsigned long panel_transcoder_mask = BIT(TRANSCODER_EDP); unsigned long enabled_panel_transcoders = 0; enum transcoder panel_transcoder; intel_wakeref_t wf; @@ -10907,9 +10907,6 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, panel_transcoder_mask |= BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1); - if (HAS_TRANSCODER(dev_priv, TRANSCODER_EDP)) - panel_transcoder_mask |= BIT(TRANSCODER_EDP); - /* * The pipe->transcoder mapping is fixed with the exception of the eDP * and DSI transcoders handled below. @@ -10920,9 +10917,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, * XXX: Do intel_display_power_get_if_enabled before reading this (for * consistency and less surprising code; it's in always on power). */ - for_each_set_bit(panel_transcoder, - &panel_transcoder_mask, - ARRAY_SIZE(INTEL_INFO(dev_priv)->trans_offsets)) { + for_each_cpu_transcoder_masked(dev_priv, panel_transcoder, + panel_transcoder_mask) { bool force_thru = false; enum pipe trans_pipe; From 4f72a8ee819d57d7329d88f487a2fc9b45153177 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Mon, 8 Jun 2020 13:45:37 -0700 Subject: [PATCH 149/222] drm/i915/tc: fix the reset of ln0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting ln0 similar to ln1 Fixes: 3b51be4e4061b ("drm/i915/tc: Update DP_MODE programming") Cc: # v5.5+ Signed-off-by: Khaled Almahallawy Reviewed-by: José Roberto de Souza Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200608204537.28468-1-khaled.almahallawy@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 96eaa4b39c68..1c0c369573e7 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3025,7 +3025,7 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); } - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); /* DPPATC */ From e36ba817fa966f81fb1c8d16f3721b5a644b2fa9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Jun 2020 16:17:23 +0100 Subject: [PATCH 150/222] drm/i915/gt: Incrementally check for rewinding In commit 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL"), we placed the check for rewinding a context on actually submitting the next request in that context. This was so that we only had to check once, and could do so with precision avoiding as many forced restores as possible. For example, to ensure that we can resubmit the same request a couple of times, we include a small wa_tail such that on the next submission, the ring->tail will appear to move forwards when resubmitting the same request. This is very common as it will happen for every lite-restore to fill the second port after a context switch. However, intel_ring_direction() is limited in precision to movements of upto half the ring size. The consequence being that if we tried to unwind many requests, we could exceed half the ring and flip the sense of the direction, so missing a force restore. As no request can be greater than half the ring (i.e. 2048 bytes in the smallest case), we can check for rollback incrementally. As we check against the tail that would be submitted, we do not lose any sensitivity and allow lite restores for the simple case. We still need to double check upon submitting the context, to allow for multiple preemptions and resubmissions. Fixes: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Bruce Chang Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200609151723.12971-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++- drivers/gpu/drm/i915/gt/intel_ring.c | 4 + drivers/gpu/drm/i915/gt/selftest_mocs.c | 18 ++- drivers/gpu/drm/i915/gt/selftest_ring.c | 110 ++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + 6 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/selftest_ring.c diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e5141a897786..0a05301e00fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) struct measure_breadcrumb { struct i915_request rq; struct intel_ring ring; - u32 cs[1024]; + u32 cs[2048]; }; static int measure_breadcrumb_dw(struct intel_context *ce) @@ -667,6 +667,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); + frame->ring.wrap = + BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); frame->ring.effective_size = frame->ring.size; intel_ring_update_space(&frame->ring); frame->rq.ring = &frame->ring; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a057f7a2a521..5ab0ed35af84 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1137,6 +1137,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + /* Check in case we rollback so far we wrap [size/2] */ + if (intel_ring_direction(rq->ring, + intel_ring_wrap(rq->ring, + rq->tail), + rq->ring->tail) > 0) + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -1505,8 +1512,9 @@ static u64 execlists_update_context(struct i915_request *rq) * HW has a tendency to ignore us rewinding the TAIL to the end of * an earlier request. */ + GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); + prev = rq->ring->tail; tail = intel_ring_set_tail(rq->ring, rq->tail); - prev = ce->lrc_reg_state[CTX_RING_TAIL]; if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; @@ -4758,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) return 0; } +static void assert_request_valid(struct i915_request *rq) +{ + struct intel_ring *ring __maybe_unused = rq->ring; + + /* Can we unwind this request without appearing to go forwards? */ + GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4770,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + /* Check that entire request is less than half the ring */ + assert_request_valid(request); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 8cda1b7e17ba..bdb324167ef3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq) GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_ring.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 7bae64018ad9..b25eba50c88e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -18,6 +18,20 @@ struct live_mocs { void *vaddr; }; +static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ce; + + /* We build large requests to read the registers from the ring */ + ce->ring = __intel_context_ring_size(SZ_16K); + + return ce; +} + static int request_add_sync(struct i915_request *rq, int err) { i915_request_get(rq); @@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(engine); + ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); break; @@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(engine); + ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); break; diff --git a/drivers/gpu/drm/i915/gt/selftest_ring.c b/drivers/gpu/drm/i915/gt/selftest_ring.c new file mode 100644 index 000000000000..2a8c534dc125 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_ring.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2020 Intel Corporation + */ + +static struct intel_ring *mock_ring(unsigned long sz) +{ + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + kref_init(&ring->ref); + ring->size = sz; + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz); + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + atomic_set(&ring->pin_count, 1); + + intel_ring_update_space(ring); + + return ring; +} + +static void mock_ring_free(struct intel_ring *ring) +{ + kfree(ring); +} + +static int check_ring_direction(struct intel_ring *ring, + u32 next, u32 prev, + int expected) +{ + int result; + + result = intel_ring_direction(ring, next, prev); + if (result < 0) + result = -1; + else if (result > 0) + result = 1; + + if (result != expected) { + pr_err("intel_ring_direction(%u, %u):%d != %d\n", + next, prev, result, expected); + return -EINVAL; + } + + return 0; +} + +static int check_ring_step(struct intel_ring *ring, u32 x, u32 step) +{ + u32 prev = x, next = intel_ring_wrap(ring, x + step); + int err = 0; + + err |= check_ring_direction(ring, next, next, 0); + err |= check_ring_direction(ring, prev, prev, 0); + err |= check_ring_direction(ring, next, prev, 1); + err |= check_ring_direction(ring, prev, next, -1); + + return err; +} + +static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step) +{ + int err = 0; + + err |= check_ring_step(ring, x, step); + err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step); + err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step); + + return err; +} + +static int igt_ring_direction(void *dummy) +{ + struct intel_ring *ring; + unsigned int half = 2048; + int step, err = 0; + + ring = mock_ring(2 * half); + if (!ring) + return -ENOMEM; + + GEM_BUG_ON(ring->size != 2 * half); + + /* Precision of wrap detection is limited to ring->size / 2 */ + for (step = 1; step < half; step <<= 1) { + err |= check_ring_offset(ring, 0, step); + err |= check_ring_offset(ring, half, step); + } + err |= check_ring_step(ring, 0, half - 64); + + /* And check unwrapped handling for good measure */ + err |= check_ring_offset(ring, 0, 2 * half + 64); + err |= check_ring_offset(ring, 3 * half, 1); + + mock_ring_free(ring); + return err; +} + +int intel_ring_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ring_direction), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 1929feba4e8e..3db34d3eea58 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) +selftest(ring, intel_ring_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) From 174b976d56a8b57983bad43b798a56e7883c276f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 7 Jun 2020 23:20:43 +0100 Subject: [PATCH 151/222] drm/i915/selftests: Teach hang-self to target only itself We have a test case to exercise resetting an engine while the other engines are busy, all the TEST_SELF adds on top is that the target engine also has background activity. In this case it is useful to first test resetting the engine while there is background activity, as a separate flag from exercising all others. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200607222108.14401-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 035f363fb0f8..2af66f8ffbd2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -805,10 +805,10 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].resets = i915_reset_engine_count(global, other); - if (!(flags & TEST_OTHERS)) + if (other == engine && !(flags & TEST_SELF)) continue; - if (other == engine && !(flags & TEST_SELF)) + if (other != engine && !(flags & TEST_OTHERS)) continue; threads[tmp].engine = other; @@ -999,7 +999,7 @@ static int igt_reset_engines(void *arg) }, { "self-priority", - TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, + TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, }, { } }; From 3e48e836cf063b815d4ec39dde6e1f78cf401879 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 10 Jun 2020 16:40:46 +0100 Subject: [PATCH 152/222] drm/i915/gt: Include context status in debug dumps This may be useful to identify contexts that are running even though they are supposed to be closed or banned. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200610154046.22449-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0a05301e00fb..d613cf31970c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1421,9 +1421,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, int len; len = scnprintf(hdr, sizeof(hdr), - "\t\tActive[%d]: ccid:%08x, ", + "\t\tActive[%d]: ccid:%08x%s%s, ", (int)(port - execlists->active), - rq->context->lrc.ccid); + rq->context->lrc.ccid, + intel_context_is_closed(rq->context) ? "!" : "", + intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); @@ -1433,9 +1435,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, int len; len = scnprintf(hdr, sizeof(hdr), - "\t\tPending[%d]: ccid:%08x, ", + "\t\tPending[%d]: ccid:%08x%s%s, ", (int)(port - execlists->pending), - rq->context->lrc.ccid); + rq->context->lrc.ccid, + intel_context_is_closed(rq->context) ? "!" : "", + intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); From c980216dd224c52b5c70172753c209b653d84958 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 Jun 2020 01:06:16 +0300 Subject: [PATCH 153/222] drm/i915/icl: Disable DIP on MST ports with the transcoder clock still on According to BSpec the Data Island Packet should be disabled after disabling the transcoder, but before the transcoder clock select is set to none. On an ICL RVP, daisy-chained MST config not following this leads to a hang with the following MCE when disabling the output: [ 870.948739] mce: [Hardware Error]: CPU 0: Machine Check Exception: 5 Bank 6: ba00000011000402 [ 871.019212] mce: [Hardware Error]: RIP !INEXACT! 10: {poll_idle+0x92/0xb0} [ 871.019212] mce: [Hardware Error]: TSC 135a261fe61 [ 871.019212] mce: [Hardware Error]: PROCESSOR 0:706e5 TIME 1591739604 SOCKET 0 APIC 0 microcode 20 [ 871.019212] mce: [Hardware Error]: Run the above through 'mcelog --ascii' [ 871.019212] mce: [Hardware Error]: Machine check: Processor context corrupt [ 871.019212] Kernel panic - not syncing: Fatal machine check [ 871.019212] Kernel Offset: disabled Bspec: 4287 Fixes: fa37a213275c ("drm/i915: Stop sending DP SDPs on ddi disable") Cc: Gwan-gyeong Mun Cc: Uma Shankar Signed-off-by: Imre Deak Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200609220616.6015-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 1c0c369573e7..d1acc39cdc11 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3510,7 +3510,9 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, INTEL_OUTPUT_DP_MST); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state); + if (!is_mst) + intel_dp_set_infoframes(encoder, false, + old_crtc_state, old_conn_state); /* * Power down sink before disabling the port, otherwise we end diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8c732418a33f..c5cda3e24fd5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -399,6 +399,14 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, */ drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, false); + + /* + * BSpec 4287: disable DIP after the transcoder is disabled and before + * the transcoder clock select is set to none. + */ + if (last_mst_stream) + intel_dp_set_infoframes(&intel_dig_port->base, false, + old_crtc_state, NULL); /* * From TGL spec: "If multi-stream slave transcoder: Configure * Transcoder Clock Select to direct no clock to the transcoder" From f99fb309568dd561ad9c42b94a762247ac80325f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 9 Jun 2020 21:41:40 +0300 Subject: [PATCH 154/222] drm/i915: Fix the i915_dsc_fec_support debugfs file for DP MST connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSC is not supported on DP MST streams so just don't add this entry for MST connectors. This also fixes an OOPS, caused by the encoder->digport cast, which is not valid for MST encoders. v2: - Check encoder, which is unset for an MST connector, before it gets enabled. v3: - Just don't add this debugfs file for MST connectors. (Ville) Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20200609184140.4937-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 3d9dc27478b3..852f64946b96 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -2224,7 +2224,8 @@ int intel_connector_debugfs_add(struct drm_connector *connector) } if (INTEL_GEN(dev_priv) >= 10 && - (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && + !to_intel_connector(connector)->mst_port) || connector->connector_type == DRM_MODE_CONNECTOR_eDP)) debugfs_create_file("i915_dsc_fec_support", S_IRUGO, root, connector, &i915_dsc_fec_support_fops); From 8d712a7e01c8a94fb95c23722e383fe758ad586b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 5 Jun 2020 12:48:01 +0300 Subject: [PATCH 155/222] drm/i915/dp_mst: Fix disabling MST on a port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently MST on a port can get enabled/disabled from the hotplug work and get disabled from the short pulse work in a racy way. Fix this by relying on the MST state checking in the hotplug work and just schedule a hotplug work from the short pulse handler if some problem happened during the MST interrupt handling. This removes the explicit MST disabling in case of an AUX failure, but if AUX fails, then probably the detection will also fail during the scheduled hotplug work and it's not guaranteed that we'll see intermittent errors anyway. While at it also simplify the error checking of the MST interrupt handler. v2: - Convert intel_dp_check_mst_status() to return bool. (Ville) - Change the intel_dp->is_mst check to an assert, since after this patch the condition can't change after we checked it previously. - Document the return value from intel_dp_check_mst_status(). v3: - Remove the intel_dp->is_mst check from intel_dp_check_mst_status(). There is no point in checking the same condition twice, even though there is a chance that the hotplug work running concurrently changes it. Cc: José Roberto de Souza Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200605094801.17709-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 66 ++++++++++--------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 707791489122..709ab7407a96 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5556,35 +5556,46 @@ update_status: "Could not write test response to sink\n"); } -static int +/** + * intel_dp_check_mst_status - service any pending MST interrupts, check link status + * @intel_dp: Intel DP struct + * + * Read any pending MST interrupts, call MST core to handle these and ack the + * interrupts. Check if the main and AUX link state is ok. + * + * Returns: + * - %true if pending interrupts were serviced (or no interrupts were + * pending) w/o detecting an error condition. + * - %false if an error condition - like AUX failure or a loss of link - is + * detected, which needs servicing from the hotplug work. + */ +static bool intel_dp_check_mst_status(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); - bool need_retrain = false; - - if (!intel_dp->is_mst) - return -EINVAL; + bool link_ok = true; drm_WARN_ON_ONCE(&i915->drm, intel_dp->active_mst_links < 0); for (;;) { u8 esi[DP_DPRX_ESI_LEN] = {}; - bool bret, handled; + bool handled; int retry; - bret = intel_dp_get_sink_irq_esi(intel_dp, esi); - if (!bret) { + if (!intel_dp_get_sink_irq_esi(intel_dp, esi)) { drm_dbg_kms(&i915->drm, "failed to get ESI - device may have failed\n"); - return -EINVAL; + link_ok = false; + + break; } /* check link status - esi[10] = 0x200c */ - if (intel_dp->active_mst_links > 0 && !need_retrain && + if (intel_dp->active_mst_links > 0 && link_ok && !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { drm_dbg_kms(&i915->drm, "channel EQ not ok, retraining\n"); - need_retrain = true; + link_ok = false; } drm_dbg_kms(&i915->drm, "got esi %3ph\n", esi); @@ -5604,7 +5615,7 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp) } } - return need_retrain; + return link_ok; } static bool @@ -7255,35 +7266,10 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (intel_dp->is_mst) { - switch (intel_dp_check_mst_status(intel_dp)) { - case -EINVAL: - /* - * If we were in MST mode, and device is not - * there, get out of MST mode - */ - drm_dbg_kms(&i915->drm, - "MST device may have disappeared %d vs %d\n", - intel_dp->is_mst, - intel_dp->mst_mgr.mst_state); - intel_dp->is_mst = false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, - intel_dp->is_mst); - - return IRQ_NONE; - case 1: - return IRQ_NONE; - default: - break; - } - } - - if (!intel_dp->is_mst) { - bool handled; - - handled = intel_dp_short_pulse(intel_dp); - - if (!handled) + if (!intel_dp_check_mst_status(intel_dp)) return IRQ_NONE; + } else if (!intel_dp_short_pulse(intel_dp)) { + return IRQ_NONE; } return IRQ_HANDLED; From ad2ad80e646242bc9b0bb91f72a1dd211b7d8b4c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 7 Jun 2020 23:20:44 +0100 Subject: [PATCH 156/222] drm/i915/selftests: Remove live_suppress_wait_preempt With the removal of the internal wait-priority boosting, we can also remove the selftest to ensure that those waits were being suppressed from causing preemptions. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200607222108.14401-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 178 ------------------------- 1 file changed, 178 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 67d74e6432a8..e838e38a262c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2379,183 +2379,6 @@ err_wedged: goto err_client_b; } -static int __i915_sw_fence_call -dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - return NOTIFY_DONE; -} - -static struct i915_request *dummy_request(struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - rq = kzalloc(sizeof(*rq), GFP_KERNEL); - if (!rq) - return NULL; - - rq->engine = engine; - - spin_lock_init(&rq->lock); - INIT_LIST_HEAD(&rq->fence.cb_list); - rq->fence.lock = &rq->lock; - rq->fence.ops = &i915_fence_ops; - - i915_sched_node_init(&rq->sched); - - /* mark this request as permanently incomplete */ - rq->fence.seqno = 1; - BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ - rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; - GEM_BUG_ON(i915_request_completed(rq)); - - i915_sw_fence_init(&rq->submit, dummy_notify); - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_lock_init(&rq->lock); - rq->fence.lock = &rq->lock; - INIT_LIST_HEAD(&rq->fence.cb_list); - - return rq; -} - -static void dummy_request_free(struct i915_request *dummy) -{ - /* We have to fake the CS interrupt to kick the next request */ - i915_sw_fence_commit(&dummy->submit); - - i915_request_mark_complete(dummy); - dma_fence_signal(&dummy->fence); - - i915_sched_node_fini(&dummy->sched); - i915_sw_fence_fini(&dummy->submit); - - dma_fence_free(&dummy->fence); -} - -static int live_suppress_wait_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct preempt_client client[4]; - struct i915_request *rq[ARRAY_SIZE(client)] = {}; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENOMEM; - int i; - - /* - * Waiters are given a little priority nudge, but not enough - * to actually cause any preemption. Double check that we do - * not needlessly generate preempt-to-idle cycles. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ - return -ENOMEM; - if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ - goto err_client_0; - if (preempt_client_init(gt, &client[2])) /* head of queue */ - goto err_client_1; - if (preempt_client_init(gt, &client[3])) /* bystander */ - goto err_client_2; - - for_each_engine(engine, gt, id) { - int depth; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (!engine->emit_init_breadcrumb) - continue; - - for (depth = 0; depth < ARRAY_SIZE(client); depth++) { - struct i915_request *dummy; - - engine->execlists.preempt_hang.count = 0; - - dummy = dummy_request(engine); - if (!dummy) - goto err_client_3; - - for (i = 0; i < ARRAY_SIZE(client); i++) { - struct i915_request *this; - - this = spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); - if (IS_ERR(this)) { - err = PTR_ERR(this); - goto err_wedged; - } - - /* Disable NEWCLIENT promotion */ - __i915_active_fence_set(&i915_request_timeline(this)->last_request, - &dummy->fence); - - rq[i] = i915_request_get(this); - i915_request_add(this); - } - - dummy_request_free(dummy); - - GEM_BUG_ON(i915_request_completed(rq[0])); - if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { - pr_err("%s: First client failed to start\n", - engine->name); - goto err_wedged; - } - GEM_BUG_ON(!i915_request_started(rq[0])); - - if (i915_request_wait(rq[depth], - I915_WAIT_PRIORITY, - 1) != -ETIME) { - pr_err("%s: Waiter depth:%d completed!\n", - engine->name, depth); - goto err_wedged; - } - - for (i = 0; i < ARRAY_SIZE(client); i++) { - igt_spinner_end(&client[i].spin); - i915_request_put(rq[i]); - rq[i] = NULL; - } - - if (igt_flush_test(gt->i915)) - goto err_wedged; - - if (engine->execlists.preempt_hang.count) { - pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", - engine->name, - engine->execlists.preempt_hang.count, - depth); - err = -EINVAL; - goto err_client_3; - } - } - } - - err = 0; -err_client_3: - preempt_client_fini(&client[3]); -err_client_2: - preempt_client_fini(&client[2]); -err_client_1: - preempt_client_fini(&client[1]); -err_client_0: - preempt_client_fini(&client[0]); - return err; - -err_wedged: - for (i = 0; i < ARRAY_SIZE(client); i++) { - igt_spinner_end(&client[i].spin); - i915_request_put(rq[i]); - } - intel_gt_set_wedged(gt); - err = -EIO; - goto err_client_3; -} - static int live_chain_preempt(void *arg) { struct intel_gt *gt = arg; @@ -4592,7 +4415,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nopreempt), SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), - SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), SUBTEST(live_preempt_gang), SUBTEST(live_preempt_timeout), From f93ec5fb563779bda4501890b1854526de58e0f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 10:30:15 +0100 Subject: [PATCH 157/222] drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. v2: Leave HSW_SCRATCH to set an explicit value, not or in our disable bit. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2011 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611093015.11370-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 48 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 39 +---------------- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3eec31c5a714..fb337e2d8a27 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -178,6 +178,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write_masked_or(wal, reg, set, set); } +static void +wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) +{ + wa_write_masked_or(wal, reg, clr, 0); +} + static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { @@ -708,6 +714,46 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* L3 caching of data atomics doesn't work -- disable it. */ + wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + + wa_add(wal, + HSW_ROW_CHICKEN3, 0, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), + 0 /* XXX does this reg exist? */); + + /* WaVSRefCountFullforceMissDisable:hsw */ + wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); + + wa_masked_dis(wal, + CACHE_MODE_0_GEN7, + /* WaDisable_RenderCache_OperationalFlush:hsw */ + RC_OP_FLUSH_ENABLE | + /* enable HiZ Raw Stall Optimization */ + HIZ_RAW_STALL_OPT_DISABLE); + + /* WaDisable4x2SubspanOptimization:hsw */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* WaSampleCChickenBitEnable:hsw */ + wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); +} + static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -985,6 +1031,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) bxt_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); + else if (IS_HASWELL(i915)) + hsw_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 26b670fa3f88..249ee720874c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7321,45 +7321,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - /* L3 caching of data atomics doesn't work -- disable it. */ - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); - I915_WRITE(HSW_ROW_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); - /* This is required by WaCatErrorRejectionIssue:hsw */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - - /* WaVSRefCountFullforceMissDisable:hsw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); - - /* WaDisable_RenderCache_OperationalFlush:hsw */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - - /* WaDisable4x2SubspanOptimization:hsw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* WaSampleCChickenBitEnable:hsw */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); From 19f1f627b33385a2f0855cbc7d33d86d7f4a1e78 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:36 +0100 Subject: [PATCH 158/222] drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 62 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 48 ---------------- 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index fb337e2d8a27..8efd337eb481 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -714,6 +714,66 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:ivb */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(i915)) + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:ivb */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ + wa_masked_dis(wal, + GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); + + /* WaApplyL3ControlAndL3ChickenMode:ivb */ + wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); + wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + + /* WaForceL3Serialization:ivb */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + } + + /* WaDisable4x2SubspanOptimization:ivb */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); +} + static void hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1033,6 +1093,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) hsw_gt_workarounds_init(i915, wal); + else if (IS_IVYBRIDGE(i915)) + ivb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9aca6d778220..19e1fed198c3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7924,7 +7924,7 @@ enum { /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) - #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26)) + #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10) #define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249ee720874c..b835e5e97515 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7338,32 +7338,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - /* WaDisableEarlyCull:ivb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:ivb */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(dev_priv)) - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ivb */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); - - /* WaApplyL3ControlAndL3ChickenMode:ivb */ - I915_WRITE(GEN7_L3CNTLREG1, - GEN7_WA_FOR_GEN7_L3_CONTROL); - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); if (IS_IVB_GT1(dev_priv)) I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7375,10 +7354,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); } - /* WaForceL3Serialization:ivb */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:ivb workaround. @@ -7393,29 +7368,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) g4x_disable_trickle_feed(dev_priv); - gen7_setup_fixed_func_scheduler(dev_priv); - - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - } - - /* WaDisable4x2SubspanOptimization:ivb */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); snpcr &= ~GEN6_MBC_SNPCR_MASK; snpcr |= GEN6_MBC_SNPCR_MED; From 7331c356b6d2d8a01422cacab27478a1dba9fa2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:37 +0100 Subject: [PATCH 159/222] drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 59 ++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 61 --------------------- 2 files changed, 59 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 8efd337eb481..6f78c2f02ca1 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -774,6 +774,63 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN6_WIZ_HASHING_16x4); } +static void +vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:vlv */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:vlv */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* WaForceL3Serialization:vlv */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, + GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* + * WaIncreaseL3CreditsForVLVB0:vlv + * This is the hardware default actually. + */ + wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); +} + static void hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1093,6 +1150,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) hsw_gt_workarounds_init(i915, wal); + else if (IS_VALLEYVIEW(i915)) + vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b835e5e97515..29abde47e987 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7077,24 +7077,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) gen6_check_mch_setup(dev_priv); } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ - u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; - - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} - static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -7381,28 +7363,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) { - /* WaDisableEarlyCull:vlv */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:vlv */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:vlv */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* WaForceL3Serialization:vlv */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* WaDisableDopClockGating:vlv */ I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7412,8 +7377,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - gen7_setup_fixed_func_scheduler(dev_priv); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:vlv workaround. @@ -7427,30 +7390,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_UCGCTL4, I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* - * WaIncreaseL3CreditsForVLVB0:vlv - * This is the hardware default actually. - */ - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); - /* * WaDisableVLVClockGating_VBIIssue:vlv * Disable clock gating on th GCFG unit to prevent a delay From c3b93a943f2c9ee4a106db100a2fc3b2f126bfc5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:38 +0100 Subject: [PATCH 160/222] drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 41 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 33 ----------------- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6f78c2f02ca1..239ce935653b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -714,6 +714,45 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + wa_masked_en(wal, + _3D_CHICKEN, + _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, + GEN6_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); + + wa_masked_en(wal, + _3D_CHICKEN3, + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | + /* + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set + * to normal and 3DSTATE_SF number of SF output attributes + * is more than 16." + */ + _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); +} + static void ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1154,6 +1193,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 6)) + snb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 29abde47e987..b4bea6451418 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6993,27 +6993,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN6_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_BLBUNIT_CLOCK_GATE_DISABLE | @@ -7036,18 +7015,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); - /* * According to the spec the following bits should be * set in order to enable memory self-refresh and fbc: From 806a45c0838d253e306a6384057e851b65d11099 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:39 +0100 Subject: [PATCH 161/222] drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 ++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 10 ---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 239ce935653b..2a27f2550e15 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -714,6 +714,18 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); + + /* WaDisableRenderCachePipelinedFlush:ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); + + /* WaDisable_RenderCache_OperationalFlush:ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + static void snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1195,6 +1207,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) ivb_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 6)) snb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 5)) + ilk_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b4bea6451418..7d82a7144a13 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6921,16 +6921,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); g4x_disable_trickle_feed(dev_priv); From 2bcefd0d263ab4a72f0d61921ae6b0dc81606551 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:40 +0100 Subject: [PATCH 162/222] drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 29 ++++++++++++++++----- drivers/gpu/drm/i915/intel_pm.c | 15 ----------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 2a27f2550e15..2da366821dda 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -714,16 +714,29 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +gen4_gt_workarounds_init(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + +static void +g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen4_gt_workarounds_init(i915, wal); + + /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); +} + static void ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { + g4x_gt_workarounds_init(i915, wal); + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); } static void @@ -1209,6 +1222,10 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) snb_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 5)) ilk_gt_workarounds_init(i915, wal); + else if (IS_G4X(i915)) + g4x_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 4)) + gen4_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7d82a7144a13..2a32d6230795 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7399,13 +7399,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, dspclk_gate); - /* WaDisableRenderCachePipelinedFlush */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:g4x */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - g4x_disable_trickle_feed(dev_priv); } @@ -7421,11 +7414,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - intel_uncore_write(uncore, - CACHE_MODE_0, - _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7438,9 +7426,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(RENCLK_GATE_D2, 0); I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) From 94641eb6c69682884abbecf22fe5b7c185af6a06 Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Fri, 12 Jun 2020 13:52:37 +0530 Subject: [PATCH 163/222] drm/i915/display: Fix the encoder type check For all ddi, encoder->type holds output type as ddi, assigning it to individual o/p types is no more valid. Fixes: 362bfb995b78 ("drm/i915/tgl: Add DKL PHY vswing table for HDMI") v2: Rebase, no functional change. Signed-off-by: Vandita Kulkarni Reviewed-by: Uma Shankar Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200612082237.11886-1-vandita.kulkarni@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index d1acc39cdc11..ca7bb2294d2b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2615,7 +2615,7 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, static void tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, - u32 level) + u32 level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); @@ -2623,7 +2623,7 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; int rate = 0; - if (encoder->type != INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); rate = intel_dp->link_rate; @@ -2676,7 +2676,7 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, if (intel_phy_is_combo(dev_priv, phy)) icl_combo_phy_ddi_vswing_sequence(encoder, level, type); else - tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level, type); } static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels) From 51dc276dd2af20ae9468afbd864fa8b0e1c75827 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 19:04:21 +0100 Subject: [PATCH 164/222] drm/i915: Leave vma intact as they are discarded If we find ourselves trying to reuse a misplaced but active vma, we currently try to discard it to avoid having to wait to unbind it (upsetting the current user fo the vma). An alternative to marking it as a dicarded vma and keeping it in both the obj->vma.list and obj->vma.tree, is to simply remove it from the lookup rbtree. While it remains in the list of vma, it will be unbound under eviction pressure and freed along with the object. We will never reuse it again for new instances. As before, with no pruning, the list may continually grow, but eventually we will have the most constrained version of the ggtt view that meets all requirements -- so the list of vma should not grow without bound. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2012 Fixes: 9bdcaa5e3a2f ("drm/i915: Discard a misplaced GGTT vma") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200611180421.23262-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 38 +++++---------------------------- drivers/gpu/drm/i915/i915_vma.c | 3 ++- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 41553e9e57a9..9aa3066cb75d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -933,44 +933,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -static bool -discard_ggtt_vma(struct i915_vma *vma, const struct i915_ggtt_view *view) +static void discard_ggtt_vma(struct i915_vma *vma) { - const struct i915_ggtt_view discard = { - .type = I915_GGTT_VIEW_PARTIAL, - }; struct drm_i915_gem_object *obj = vma->obj; spin_lock(&obj->vma.lock); - if (i915_vma_compare(vma, vma->vm, &discard)) { - struct rb_node *rb, **p; - + if (!RB_EMPTY_NODE(&vma->obj_node)) { rb_erase(&vma->obj_node, &obj->vma.tree); - vma->ggtt_view = discard; - GEM_BUG_ON(i915_vma_compare(vma, vma->vm, &discard)); - GEM_BUG_ON(i915_vma_compare(vma, vma->vm, view) == 0); - - rb = NULL; - p = &obj->vma.tree.rb_node; - while (*p) { - struct i915_vma *pos; - long cmp; - - rb = *p; - pos = rb_entry(rb, struct i915_vma, obj_node); - - cmp = i915_vma_compare(pos, vma->vm, &discard); - if (cmp < 0) - p = &rb->rb_right; - else - p = &rb->rb_left; - } - rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma.tree); + RB_CLEAR_NODE(&vma->obj_node); } spin_unlock(&obj->vma.lock); - - return i915_vma_compare(vma, vma->vm, view); } struct i915_vma * @@ -1035,8 +1007,8 @@ new_vma: } if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) { - if (discard_ggtt_vma(vma, view)) - goto new_vma; + discard_ggtt_vma(vma); + goto new_vma; } ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9b30ddc49e4b..1f63c4a1f055 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1087,7 +1087,8 @@ void i915_vma_release(struct kref *ref) spin_lock(&obj->vma.lock); list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &obj->vma.tree); + if (!RB_EMPTY_NODE(&vma->obj_node)) + rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); } From 3d09677a07f017c42c80d04ba70ef23d7499fc46 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Jun 2020 23:11:13 +0100 Subject: [PATCH 165/222] drm/i915/execlists: Lift opportunistic process_csb to before engine lock Since the process_csb() does not require us to hold the engine->active.lock, we can move the opportunistic flush before direction submission to outside of the lock. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200612221113.9129-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 5ab0ed35af84..e866b8d721ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3170,13 +3170,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) if (reset_in_progress(execlists)) return; /* defer until we restart the engine following reset */ - /* Hopefully we clear execlists->pending[] to let us through */ - if (READ_ONCE(execlists->pending[0]) && - tasklet_trylock(&execlists->tasklet)) { - process_csb(engine); - tasklet_unlock(&execlists->tasklet); - } - __execlists_submission_tasklet(engine); } @@ -3199,11 +3192,25 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, return !list_empty(&engine->active.hold) && hold_request(rq); } +static void flush_csb(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *el = &engine->execlists; + + if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) { + if (!reset_in_progress(el)) + process_csb(engine); + tasklet_unlock(&el->tasklet); + } +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; + /* Hopefully we clear execlists->pending[] to let us through */ + flush_csb(engine); + /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); From d4b02a4c613e82a4c47bf9dd228c38a2c3c1a6d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 7 Jun 2020 23:20:45 +0100 Subject: [PATCH 166/222] drm/i915/selftests: Trim execlists runtime Reduce the smoke depth by trimming the number of contexts, repetitions and wait times. This is in preparation for a less greedy scheduler that tries to be fair across contexts, resulting in a great many more context switches. A thousand context switches may be 50-100ms, causing us to timeout as the HW is not fast enough to complete the deep smoketests. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200607222108.14401-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 66 ++++++-------------- drivers/gpu/drm/i915/selftests/igt_spinner.c | 4 +- 2 files changed, 21 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e838e38a262c..f651bdf7f191 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -845,10 +845,11 @@ static int live_timeslice_preempt(void *arg) { struct intel_gt *gt = arg; struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct i915_vma *vma; void *vaddr; int err = 0; - int count; /* * If a request takes too long, we would like to give other users @@ -885,26 +886,21 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - for_each_prime_number_from(count, 1, 16) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; - for_each_engine(engine, gt, id) { - if (!intel_engine_has_preemption(engine)) - continue; + memset(vaddr, 0, PAGE_SIZE); - memset(vaddr, 0, PAGE_SIZE); + engine_heartbeat_disable(engine); + err = slice_semaphore_queue(engine, vma, 5); + engine_heartbeat_enable(engine); + if (err) + goto err_pin; - engine_heartbeat_disable(engine); - err = slice_semaphore_queue(engine, vma, count); - engine_heartbeat_enable(engine); - if (err) - goto err_pin; - - if (igt_flush_test(gt->i915)) { - err = -EIO; - goto err_pin; - } + if (igt_flush_test(gt->i915)) { + err = -EIO; + goto err_pin; } } @@ -1251,22 +1247,6 @@ static int live_timeslice_queue(void *arg) intel_engine_flush_submission(engine); } while (READ_ONCE(engine->execlists.pending[0])); - if (!READ_ONCE(engine->execlists.timer.expires) && - execlists_active(&engine->execlists) == rq && - !i915_request_completed(rq)) { - struct drm_printer p = - drm_info_printer(gt->i915->drm.dev); - - GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - GEM_TRACE_DUMP(); - - memset(vaddr, 0xff, PAGE_SIZE); - err = -EINVAL; - } - /* Timeslice every jiffy, so within 2 we should signal */ if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { struct drm_printer p = @@ -2671,16 +2651,8 @@ static int live_preempt_gang(void *arg) /* Submit each spinner at increasing priority */ engine->schedule(rq, &attr); - - if (prio < attr.priority) - break; - - if (prio <= I915_PRIORITY_MAX) - continue; - - if (__igt_timeout(end_time, NULL)) - break; - } while (1); + } while (prio <= I915_PRIORITY_MAX && + !__igt_timeout(end_time, NULL)); pr_debug("%s: Preempt chain of %d requests\n", engine->name, prio); @@ -3248,7 +3220,7 @@ static int smoke_crescendo_thread(void *arg) return err; count++; - } while (!__igt_timeout(end_time, NULL)); + } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); smoke->count = count; return 0; @@ -3324,7 +3296,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count++; } - } while (!__igt_timeout(end_time, NULL)); + } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, @@ -3337,7 +3309,7 @@ static int live_preempt_smoke(void *arg) struct preempt_smoke smoke = { .gt = arg, .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), - .ncontext = 1024, + .ncontext = 256, }; const unsigned int phase[] = { 0, BATCH }; struct igt_live_test t; diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 699bfe0328fb..ec0ecb4e4ca6 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -221,8 +221,8 @@ bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), - 10) && + 100) && wait_for(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), - 1000)); + 50)); } From 2267f68404d48b99ddda1728ceeedf6157b493fa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Jun 2020 13:39:49 +0100 Subject: [PATCH 167/222] drm/i915/gt: Flush gen3 relocs harder, again gen3 does not fully flush MI stores to memory on MI_FLUSH, such that a subsequent read from e.g. the sampler can bypass the store and read the stale value from memory. This is a serious issue when we are using MI stores to rewrite the batches for relocation, as it means that the batch is reading from random user/kernel memory. While it is particularly sensitive [and detectable] for relocations, reading stale data at any time is a worry. Having started with a small number of delaying stores and doubling until no more incoherency was seen over a few hours (with and without background memory pressure), 32 was the magic number. Note that it definitely doesn't fix the issue, merely adds a long delay between requests, sufficient to mostly hide the problem, enough to raise the mtbf to several hours. This is merely a stop gap. v2: Follow more closer with the gen5 w/a and include some post-invalidate flushes as well. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2018 References: a889580c087a ("drm/i915: Flush GPU relocs harder for gen3") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200612123949.7093-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/gen2_engine_cs.c | 59 ++++++++++-------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 3fb0dc1fb910..b491a64919c8 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -13,28 +13,25 @@ int gen2_emit_flush(struct i915_request *rq, u32 mode) { - unsigned int num_store_dw; + unsigned int num_store_dw = 12; u32 cmd, *cs; cmd = MI_FLUSH; - num_store_dw = 0; if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - if (mode & EMIT_FLUSH) - num_store_dw = 4; - cs = intel_ring_begin(rq, 2 + 3 * num_store_dw); + cs = intel_ring_begin(rq, 2 + 4 * num_store_dw); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; while (num_store_dw--) { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT); + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); *cs++ = 0; + *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; } - *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; + *cs++ = cmd; intel_ring_advance(rq, cs); @@ -142,38 +139,21 @@ int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) return 0; } -u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, + int flush, int post) { GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; + while (flush--) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); + *cs++ = rq->fence.seqno; + } - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN5_WA_STORES 8 /* must be at least 1! */ -u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - BUILD_BUG_ON(GEN5_WA_STORES < 1); - for (i = 0; i < GEN5_WA_STORES; i++) { + while (post--) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR; *cs++ = rq->fence.seqno; @@ -186,7 +166,16 @@ u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -#undef GEN5_WA_STORES + +u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return __gen2_emit_breadcrumb(rq, cs, 16, 8); +} + +u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return __gen2_emit_breadcrumb(rq, cs, 8, 8); +} /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ #define I830_BATCH_LIMIT SZ_256K From 587a87b9d7e94927edcdea018565bc1939381eb1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 12 Jun 2020 15:17:31 +0300 Subject: [PATCH 168/222] drm/i915/icl+: Fix hotplug interrupt disabling after storm detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, hotplug interrupts on TypeC ports are left enabled after detecting an interrupt storm, fix this. Reported-by: Kunal Joshi References: https://gitlab.freedesktop.org/drm/intel/-/issues/351 Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/1964 Cc: Kunal Joshi Cc: stable@vger.kernel.org Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200612121731.19596-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8e823ba25f5f..710224d930c5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3132,6 +3132,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) val = I915_READ(GEN11_DE_HPD_IMR); val &= ~hotplug_irqs; + val |= ~enabled_irqs & hotplug_irqs; I915_WRITE(GEN11_DE_HPD_IMR, val); POSTING_READ(GEN11_DE_HPD_IMR); From 7102a76043eb520d3e2856697b9407b01d246d8b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jun 2020 17:50:11 +0100 Subject: [PATCH 169/222] drm/i915/selftests: Disable preemptive heartbeats over preemption tests Since the heartbeat may cause a preemption event, disable it over the preemption suppression tests. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200615165013.22973-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index f651bdf7f191..91543494f595 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2282,7 +2282,7 @@ static int live_suppress_self_preempt(void *arg) if (igt_flush_test(gt->i915)) goto err_wedged; - intel_engine_pm_get(engine); + engine_heartbeat_disable(engine); engine->execlists.preempt_hang.count = 0; rq_a = spinner_create_request(&a.spin, @@ -2290,14 +2290,14 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine); goto err_wedged; } @@ -2309,7 +2309,7 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine); goto err_client_b; } i915_request_add(rq_b); @@ -2320,7 +2320,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine); goto err_wedged; } @@ -2334,12 +2334,12 @@ static int live_suppress_self_preempt(void *arg) engine->name, engine->execlists.preempt_hang.count, depth); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine); err = -EINVAL; goto err_client_b; } - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) goto err_wedged; } From f2e85e57367e8ac3a6106e3919f450354a2cf725 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jun 2020 17:50:12 +0100 Subject: [PATCH 170/222] drm/i915/selftests: Dump engine state and trace upon hanging after reset If the engine dies after a reset, and so we fail to submit a request but need to be interrupted by the CI runner, dump the engine state. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200615165013.22973-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2af66f8ffbd2..7461936d549d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -499,6 +499,20 @@ static int igt_reset_nop_engine(void *arg) rq = intel_context_create_request(ce); if (IS_ERR(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + intel_engine_dump(engine, &p, + "%s(%s): failed to submit request\n", + __func__, + engine->name); + + GEM_TRACE("%s(%s): failed to submit request\n", + __func__, + engine->name); + GEM_TRACE_DUMP(); + + intel_gt_set_wedged(gt); + err = PTR_ERR(rq); break; } From 5948938700446ac0eb21248cb354f602d8d7b237 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jun 2020 17:50:13 +0100 Subject: [PATCH 171/222] drm/i915/gt: Add a safety submission flush in the heartbeat Just in case everything fails (like for example "missed interrupt syndrome" on Sandybridge), always flush the submission tasklet from the heartbeat. This papers over such issues, but will still appear as a second long glitch, and prevents us from detecting it unless we happen to be performing a timed test. v2: We rely on flush_submission() synchronizing with the tasklet on another CPU. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200615165013.22973-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 23 +++++++++---------- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 3 +++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d613cf31970c..31049e0bdb57 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1094,19 +1094,18 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine) { struct tasklet_struct *t = &engine->execlists.tasklet; - if (__tasklet_is_scheduled(t)) { - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - } + /* Synchronise and wait for the tasklet on another CPU */ + tasklet_kill(t); - /* Otherwise flush the tasklet if it was running on another cpu */ - tasklet_unlock_wait(t); + /* Having cancelled the tasklet, ensure that is run */ + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index f67ad937eefb..cd20fb549b38 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -65,6 +65,9 @@ static void heartbeat(struct work_struct *wrk) struct intel_context *ce = engine->kernel_context; struct i915_request *rq; + /* Just in case everything has gone horribly wrong, give it a kick */ + intel_engine_flush_submission(engine); + rq = engine->heartbeat.systole; if (rq && i915_request_completed(rq)) { i915_request_put(rq); From 5a7eeb8ba143d860050ecea924a8f074f02d8023 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 10 Jun 2020 13:18:07 -0700 Subject: [PATCH 172/222] drm/i915: Include asm sources for {ivb, hsw}_clear_kernel.c Alexandre Oliva has recently removed these files from Linux Libre with concerns that the sources weren't available. The sources are available on IGT repository, and only open source tools are used to generate the {ivb,hsw}_clear_kernel.c files. However, the remaining concern from Alexandre Oliva was around GPL license and the source not been present when distributing the code. So, it looks like 2 alternatives are possible, the use of linux-firmware.git repository to store the blob or making sure that the source is also present in our tree. Since the goal is to limit the i915 firmware to only the micro-controller blobs let's make sure that we do include the asm sources here in our tree. Btw, I tried to have some diligence here and make sure that the asms that these commits are adding are truly the source for the mentioned files: igt$ ./scripts/generate_clear_kernel.sh -g ivb \ -m ~/mesa/build/src/intel/tools/i965_asm Output file not specified - using default file "ivb-cb_assembled" Generating gen7 CB Kernel assembled file "ivb_clear_kernel.c" for i915 driver... igt$ diff ~/i915/drm-tip/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c \ ivb_clear_kernel.c < * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC > * Generated by: IGT Gpu Tools on Mon 08 Jun 2020 10:00:54 AM PDT 61c61 < }; > }; \ No newline at end of file igt$ ./scripts/generate_clear_kernel.sh -g hsw \ -m ~/mesa/build/src/intel/tools/i965_asm Output file not specified - using default file "hsw-cb_assembled" Generating gen7.5 CB Kernel assembled file "hsw_clear_kernel.c" for i915 driver... igt$ diff ~/i915/drm-tip/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c \ hsw_clear_kernel.c 5c5 < * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC > * Generated by: IGT Gpu Tools on Mon 08 Jun 2020 10:01:42 AM PDT 61c61 < }; > }; \ No newline at end of file Used IGT and Mesa master repositories from Fri Jun 5 2020) IGT: 53e8c878a6fb ("tests/kms_chamelium: Force reprobe after replugging the connector") Mesa: 5d13c7477eb1 ("radv: set keep_statistic_info with RADV_DEBUG=shaderstats") Mesa built with: meson build -D platforms=drm,x11 -D dri-drivers=i965 \ -D gallium-drivers=iris -D prefix=/usr \ -D libdir=/usr/lib64/ -Dtools=intel \ -Dkulkan-drivers=intel && ninja -C build v2: Header clean-up and include build instructions in a readme (Chris) Modified commit message to respect check-patch Reference: http://www.fsfla.org/pipermail/linux-libre/2020-June/003374.html Reference: http://www.fsfla.org/pipermail/linux-libre/2020-June/003375.html Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Cc: # v5.7+ Cc: Alexandre Oliva Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Mika Kuoppala Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Signed-off-by: Rodrigo Vivi Reviewed-by: Jon Bloomfield Link: https://patchwork.freedesktop.org/patch/msgid/20200610201807.191440-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/gt/shaders/README | 46 +++++++ .../drm/i915/gt/shaders/clear_kernel/hsw.asm | 119 ++++++++++++++++++ .../drm/i915/gt/shaders/clear_kernel/ivb.asm | 117 +++++++++++++++++ 3 files changed, 282 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/shaders/README create mode 100644 drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm create mode 100644 drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README new file mode 100644 index 000000000000..e7e96d7073c7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/README @@ -0,0 +1,46 @@ +ASM sources for auto generated shaders +====================================== + +The i915/gt/hsw_clear_kernel.c and i915/gt/ivb_clear_kernel.c files contain +pre-compiled batch chunks that will clear any residual render cache during +context switch. + +They are generated from their respective platform ASM files present on +i915/gt/shaders/clear_kernel directory. + +The generated .c files should never be modified directly. Instead, any modification +needs to be done on the on their respective ASM files and build instructions below +needes to be followed. + +Building +======== + +Environment +----------- + +IGT GPU tool scripts and the Mesa's i965 instruction assembler tool are used +on building. + +Please make sure your Mesa tool is compiled with "-Dtools=intel" and +"-Ddri-drivers=i965", and run this script from IGT source root directory" + +The instructions bellow assume: + * IGT gpu tools source code is located on your home directory (~) as ~/igt + * Mesa source code is located on your home directory (~) as ~/mesa + and built under the ~/mesa/build directory + * Linux kernel source code is under your home directory (~) as ~/linux + +Instructions +------------ + +~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm \ + ~/igt/lib/i915/shaders/clear_kernel/ivb.asm +~ $ cd ~/igt +igt $ ./scripts/generate_clear_kernel.sh -g ivb \ + -m ~/mesa/build/src/intel/tools/i965_asm + +~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm \ + ~/igt/lib/i915/shaders/clear_kernel/hsw.asm +~ $ cd ~/igt +igt $ ./scripts/generate_clear_kernel.sh -g hsw \ + -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm new file mode 100644 index 000000000000..5fdf384bb621 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* + * Kernel for PAVP buffer clear. + * + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears + * 512 bytes of Render Cache. + */ + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/** + * Curbe Format + * + * DW 1.0 - Block Offset to write Render Cache + * DW 1.1 [15:0] - Clear Word + * DW 1.2 - Delay iterations + * DW 1.3 - Enable Instrumentation (only for debug) + * DW 1.4 - Rsvd (intended for context ID) + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * + * Binding Table + * + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) + * BTI 1: Wait/Instrumentation Buffer + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) + * Expected to be initialized to 0 by driver/another kernel + * Layout: + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N + */ +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; +(+f0.0) jmpi(1) 352D { align1 WE_all 1N }; + +/** + * State Register has info on where this thread is running + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + */ +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ + +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; + +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; + +/* Media block read to fetch current value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 + + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; + +/* Media block write for updated value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; + +/* Delay thread for specified parameter */ +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; +(+f0.0) jmpi(1) -32D { align1 WE_all 1N }; + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/* Initialize looping parameters */ +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ + +/* Write 32x16 all "0" block */ +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; +mov(16) g3<1>UD 0x00000000UD { align1 1H }; +mov(16) g4<1>UD 0x00000000UD { align1 1H }; +mov(16) g5<1>UD 0x00000000UD { align1 1H }; +mov(16) g6<1>UD 0x00000000UD { align1 1H }; +mov(16) g7<1>UD 0x00000000UD { align1 1H }; +mov(16) g8<1>UD 0x00000000UD { align1 1H }; +mov(16) g9<1>UD 0x00000000UD { align1 1H }; +mov(16) g10<1>UD 0x00000000UD { align1 1H }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; + +/* Now, clear all GRF registers */ +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; +(+f0.0) jmpi(1) -64D { align1 WE_all 1N }; + +/* Terminante the thread */ +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm new file mode 100644 index 000000000000..97c7ac9e3854 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* + * Kernel for PAVP buffer clear. + * + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears + * 512 bytes of Render Cache. + */ + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/** + * Curbe Format + * + * DW 1.0 - Block Offset to write Render Cache + * DW 1.1 [15:0] - Clear Word + * DW 1.2 - Delay iterations + * DW 1.3 - Enable Instrumentation (only for debug) + * DW 1.4 - Rsvd (intended for context ID) + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * + * Binding Table + * + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) + * BTI 1: Wait/Instrumentation Buffer + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) + * Expected to be initialized to 0 by driver/another kernel + * Layout : + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N + */ +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; +(+f0.0) jmpi(1) 44D { align1 WE_all 1N }; + +/** + * State Register has info on where this thread is running + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + */ +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ + +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; + +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; + +/* Media block read to fetch current value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; + +/* Media block write for updated value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; +/* Delay thread for specified parameter */ +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; +(+f0.0) jmpi(1) -4D { align1 WE_all 1N }; + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/* Initialize looping parameters */ +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ + +/* Write 32x16 all "0" block */ +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; +mov(16) g3<1>UD 0x00000000UD { align1 1H }; +mov(16) g4<1>UD 0x00000000UD { align1 1H }; +mov(16) g5<1>UD 0x00000000UD { align1 1H }; +mov(16) g6<1>UD 0x00000000UD { align1 1H }; +mov(16) g7<1>UD 0x00000000UD { align1 1H }; +mov(16) g8<1>UD 0x00000000UD { align1 1H }; +mov(16) g9<1>UD 0x00000000UD { align1 1H }; +mov(16) g10<1>UD 0x00000000UD { align1 1H }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; + +/* Now, clear all GRF registers */ +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; +(+f0.0) jmpi(1) -8D { align1 WE_all 1N }; + +/* Terminante the thread */ +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; From 570af07d794fc746c42f0aeaaa82362416f33a54 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jun 2020 19:39:35 +0100 Subject: [PATCH 173/222] drm/i915/gt: Don't flush the tasklet if not setup If the tasklet is not being used, don't try and flush it. Fixes: 594893870044 ("drm/i915/gt: Add a safety submission flush in the heartbeat") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200615183935.17389-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 31049e0bdb57..045179c65c44 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1094,6 +1094,9 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine) { struct tasklet_struct *t = &engine->execlists.tasklet; + if (!t->func) + return; + /* Synchronise and wait for the tasklet on another CPU */ tasklet_kill(t); From f29e08800b6d4d80906d7d33f3da39e1d05fab38 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Jun 2020 09:54:52 -0500 Subject: [PATCH 174/222] drm/i915/selftests: Fix inconsistent IS_ERR and PTR_ERR Fix inconsistent IS_ERR and PTR_ERR in live_timeslice_nopreempt(). The proper pointer to be passed as argument to PTR_ERR() is ce. This bug was detected with the help of Coccinelle. Fixes: b72f02d78e4f ("drm/i915/gt: Prevent timeslicing into unpreemptable requests") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200616145452.GA25291@embeddedor --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 91543494f595..393339de0910 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1337,7 +1337,7 @@ static int live_timeslice_nopreempt(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { - err = PTR_ERR(rq); + err = PTR_ERR(ce); goto out_spin; } From e971fe9128a742537a1d8803b852c69777a885eb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Jun 2020 19:31:39 +0100 Subject: [PATCH 175/222] drm/i915: Mark up inline getters as taking a const i915_request Since these inline routines only return the desired pointer from the i915_request(after checking the preconditions for acquiring said pointer), they can be const. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200616183139.4061-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 118ab6650d1f..590762820761 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -561,7 +561,7 @@ static inline void i915_request_clear_hold(struct i915_request *rq) } static inline struct intel_timeline * -i915_request_timeline(struct i915_request *rq) +i915_request_timeline(const struct i915_request *rq) { /* Valid only while the request is being constructed (or retired). */ return rcu_dereference_protected(rq->timeline, @@ -569,14 +569,14 @@ i915_request_timeline(struct i915_request *rq) } static inline struct i915_gem_context * -i915_request_gem_context(struct i915_request *rq) +i915_request_gem_context(const struct i915_request *rq) { /* Valid only while the request is being constructed (or retired). */ return rcu_dereference_protected(rq->context->gem_context, true); } static inline struct intel_timeline * -i915_request_active_timeline(struct i915_request *rq) +i915_request_active_timeline(const struct i915_request *rq) { /* * When in use during submission, we are protected by a guarantee that From 223128f767102127bf651a509df745fb6d56b3fb Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 15 Jun 2020 16:14:49 +0100 Subject: [PATCH 176/222] drm/i915: Remove redundant i915_request_await_object in blit clears One i915_request_await_object is enough and we keep the one under the object lock so it is final. At the same time move async clflushing setup under the same locked section and consolidate common code into a helper function. v2: * Emit initial breadcrumbs after aways are set up. (Chris) Signed-off-by: Tvrtko Ursulin Cc: Matthew Auld Cc: Chris Wilson Cc: Michael J. Ruhl Reviewed-by: Chris Wilson Reviewed-by: Michael J. Ruhl Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200615151449.32605-1-tvrtko.ursulin@linux.intel.com --- .../gpu/drm/i915/gem/i915_gem_object_blt.c | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index f457d7130491..bfdb32d46877 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -126,6 +126,17 @@ void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) intel_engine_pm_put(ce->engine); } +static int +move_obj_to_gpu(struct drm_i915_gem_object *obj, + struct i915_request *rq, + bool write) +{ + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + + return i915_request_await_object(rq, obj, write); +} + int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) @@ -143,12 +154,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (unlikely(err)) return err; - if (obj->cache_dirty & ~obj->cache_coherent) { - i915_gem_object_lock(obj); - i915_gem_clflush_object(obj, 0); - i915_gem_object_unlock(obj); - } - batch = intel_emit_vma_fill_blt(ce, vma, value); if (IS_ERR(batch)) { err = PTR_ERR(batch); @@ -165,27 +170,22 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (unlikely(err)) goto out_request; - err = i915_request_await_object(rq, obj, true); - if (unlikely(err)) - goto out_request; - - if (ce->engine->emit_init_breadcrumb) { - err = ce->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, true); + err = move_obj_to_gpu(vma->obj, rq, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (unlikely(err)) goto out_request; - err = ce->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); + if (ce->engine->emit_init_breadcrumb) + err = ce->engine->emit_init_breadcrumb(rq); + + if (likely(!err)) + err = ce->engine->emit_bb_start(rq, + batch->node.start, + batch->node.size, + 0); out_request: if (unlikely(err)) i915_request_set_error_once(rq, err); @@ -317,16 +317,6 @@ out_pm: return ERR_PTR(err); } -static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write) -{ - struct drm_i915_gem_object *obj = vma->obj; - - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - - return i915_request_await_object(rq, obj, write); -} - int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, struct drm_i915_gem_object *dst, struct intel_context *ce) @@ -375,7 +365,7 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, goto out_request; for (i = 0; i < ARRAY_SIZE(vma); i++) { - err = move_to_gpu(vma[i], rq, i); + err = move_obj_to_gpu(vma[i]->obj, rq, i); if (unlikely(err)) goto out_unlock; } From 9199c070cdde5ef5959c81d76608f992846fe894 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Jun 2020 19:55:17 +0100 Subject: [PATCH 177/222] drm/i915/selftests: Exercise far preemption rollbacks Not too long ago, we realised we had issues with a rolling back a context so far for a preemption request we considered the resubmit not to be a rollback but a forward roll. This means we would issue a lite restore instead of forcing a full restore, continuing execution of the old requests rather than causing a preemption. Add a selftest to exercise such a far rollback, such that if we were to skip the full restore, we would execute invalid instructions in the ring and hang. Note that while I was able to confirm that this causes us to do a lite-restore preemption rollback (with commit e36ba817fa96 ("drm/i915/gt: Incrementally check for rewinding") disabled), it did not trick the HW into rolling past the old RING_TAIL. Myybe on other HW. References: e36ba817fa96 ("drm/i915/gt: Incrementally check for rewinding") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200616185518.11948-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 151 +++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 393339de0910..ad8692eccd65 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -363,6 +363,156 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_unlite_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct igt_spinner spin; + enum intel_engine_id id; + int err = 0; + + /* + * Setup a preemption event that will cause almost the entire ring + * to be unwound, potentially fooling our intel_ring_direction() + * into emitting a forward lite-restore instead of the rollback. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq; + struct igt_live_test t; + int n; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + engine_heartbeat_disable(engine); + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + memset32(tmp->ring->vaddr, + 0xdeadbeef, /* trigger a hang if executed */ + tmp->ring->vma->size / sizeof(u32)); + + ce[n] = tmp; + } + + /* Create max prio spinner, followed by N low prio nops */ + rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + i915_request_get(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ce; + } + + /* Fill the ring, until we will cause a wrap */ + n = 0; + while (intel_ring_direction(ce[0]->ring, + rq->wa_tail, + ce[0]->ring->tail) <= 0) { + struct i915_request *tmp; + + tmp = intel_context_create_request(ce[0]); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + i915_request_put(rq); + goto err_ce; + } + + i915_request_add(tmp); + intel_engine_flush_submission(engine); + n++; + } + intel_engine_flush_submission(engine); + pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", + engine->name, n, + ce[0]->ring->size, + ce[0]->ring->tail, + ce[0]->ring->emit, + rq->tail); + GEM_BUG_ON(intel_ring_direction(ce[0]->ring, + rq->tail, + ce[0]->ring->tail) <= 0); + i915_request_put(rq); + + /* Create a second ring to preempt the first ring after rq[0] */ + rq = intel_context_create_request(ce[1]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + err = wait_for_submit(engine, rq, HZ / 2); + i915_request_put(rq); + if (err) { + pr_err("%s: preemption request was not submited\n", + engine->name); + err = -ETIME; + } + + pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", + engine->name, + ce[0]->ring->tail, ce[0]->ring->emit, + ce[1]->ring->tail, ce[1]->ring->emit); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + engine_heartbeat_enable(engine); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int live_pin_rewind(void *arg) { struct intel_gt *gt = arg; @@ -4374,6 +4524,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_unlite_ring), SUBTEST(live_pin_rewind), SUBTEST(live_hold_reset), SUBTEST(live_error_interrupt), From ba0cada97656fa963a97b476816c0dc4185e4aa8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Jun 2020 19:55:18 +0100 Subject: [PATCH 178/222] drm/i915/selftests: Use friendly request names for live_timeslice_rewind Rather than mixing [012] and (A1, A2, B2) for the request indices, use the enums throughout. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200616185518.11948-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ad8692eccd65..b8b7b91019f4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1177,18 +1177,18 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[0] = create_rewinder(ce, NULL, slot, X); - if (IS_ERR(rq[0])) { + rq[A1] = create_rewinder(ce, NULL, slot, X); + if (IS_ERR(rq[A1])) { intel_context_put(ce); goto err; } - rq[1] = create_rewinder(ce, NULL, slot, Y); + rq[A2] = create_rewinder(ce, NULL, slot, Y); intel_context_put(ce); - if (IS_ERR(rq[1])) + if (IS_ERR(rq[A2])) goto err; - err = wait_for_submit(engine, rq[1], HZ / 2); + err = wait_for_submit(engine, rq[A2], HZ / 2); if (err) { pr_err("%s: failed to submit first context\n", engine->name); @@ -1201,12 +1201,12 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[2] = create_rewinder(ce, rq[0], slot, Z); + rq[B1] = create_rewinder(ce, rq[A1], slot, Z); intel_context_put(ce); if (IS_ERR(rq[2])) goto err; - err = wait_for_submit(engine, rq[2], HZ / 2); + err = wait_for_submit(engine, rq[B1], HZ / 2); if (err) { pr_err("%s: failed to submit second context\n", engine->name); @@ -1214,6 +1214,7 @@ static int live_timeslice_rewind(void *arg) } /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ + ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ /* Wait for the timeslice to kick in */ del_timer(&engine->execlists.timer); From dfdfbd382348d53aeedaeee10eeea9a5958a3977 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Jun 2020 00:37:33 +0100 Subject: [PATCH 179/222] drm/i915/selftests: Check preemption rollback of different ring queue depths Like live_unlite_ring, but instead of simply looking at the impact of intel_ring_direction(), check that preemption more generally works with different depths of queued requests in the ring. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200616233733.18050-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 163 +++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b8b7b91019f4..4f3758a1cbcf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2758,6 +2758,168 @@ err_ce: return err; } +static int __live_preempt_ring(struct intel_engine_cs *engine, + struct igt_spinner *spin, + int queue_sz, int ring_sz) +{ + struct intel_context *ce[2] = {}; + struct i915_request *rq; + struct igt_live_test t; + int err = 0; + int n; + + if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) + return -EIO; + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + tmp->ring = __intel_context_ring_size(ring_sz); + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + memset32(tmp->ring->vaddr, + 0xdeadbeef, /* trigger a hang if executed */ + tmp->ring->vma->size / sizeof(u32)); + + ce[n] = tmp; + } + + rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + i915_request_get(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + if (!igt_wait_for_spinner(spin, rq)) { + intel_gt_set_wedged(engine->gt); + i915_request_put(rq); + err = -ETIME; + goto err_ce; + } + + /* Fill the ring, until we will cause a wrap */ + n = 0; + while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { + struct i915_request *tmp; + + tmp = intel_context_create_request(ce[0]); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + i915_request_put(rq); + goto err_ce; + } + + i915_request_add(tmp); + intel_engine_flush_submission(engine); + n++; + } + intel_engine_flush_submission(engine); + pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", + engine->name, queue_sz, n, + ce[0]->ring->size, + ce[0]->ring->tail, + ce[0]->ring->emit, + rq->tail); + i915_request_put(rq); + + /* Create a second request to preempt the first ring */ + rq = intel_context_create_request(ce[1]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + err = wait_for_submit(engine, rq, HZ / 2); + i915_request_put(rq); + if (err) { + pr_err("%s: preemption request was not submited\n", + engine->name); + err = -ETIME; + } + + pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", + engine->name, + ce[0]->ring->tail, ce[0]->ring->emit, + ce[1]->ring->tail, ce[1]->ring->emit); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int live_preempt_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct igt_spinner spin; + enum intel_engine_id id; + int err = 0; + + /* + * Check that we rollback large chunks of a ring in order to do a + * preemption event. Similar to live_unlite_ring, but looking at + * ring size rather than the impact of intel_ring_direction(). + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + int n; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + engine_heartbeat_disable(engine); + + for (n = 0; n <= 3; n++) { + err = __live_preempt_ring(engine, &spin, + n * SZ_4K / 4, SZ_4K); + if (err) + break; + } + + engine_heartbeat_enable(engine); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int live_preempt_gang(void *arg) { struct intel_gt *gt = arg; @@ -4540,6 +4702,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_ring), SUBTEST(live_preempt_gang), SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_user), From 0ff0fc97d3074c447a40e8b4375ba6a88cd571be Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Jun 2020 09:52:07 +0100 Subject: [PATCH 180/222] drm/i915/selftests: fix spelling mistake "submited" -> "submitted" There is a spelling mistake in a pr_err message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200617085207.167552-1-colin.king@canonical.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 4f3758a1cbcf..58e4e9aafe94 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -482,7 +482,7 @@ static int live_unlite_ring(void *arg) err = wait_for_submit(engine, rq, HZ / 2); i915_request_put(rq); if (err) { - pr_err("%s: preemption request was not submited\n", + pr_err("%s: preemption request was not submitted\n", engine->name); err = -ETIME; } From 1b90e4a43b7444fc4159c6dd338a44e8808e8718 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Jun 2020 14:09:15 +0100 Subject: [PATCH 181/222] drm/i915/selftests: Enable selftesting of busy-stats A couple of very simple tests to ensure that the basic properties of per-engine busyness accounting [0% and 100% busy] are faithful. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200617130916.15261-1-chris@chris-wilson.co.uk --- .../drm/i915/gt/selftest_engine_heartbeat.c | 47 ++++---- .../drm/i915/gt/selftest_engine_heartbeat.h | 14 +++ drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 103 ++++++++++++++++++ drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 29 ++--- drivers/gpu/drm/i915/gt/selftest_lrc.c | 79 ++++++-------- drivers/gpu/drm/i915/gt/selftest_rps.c | 68 +++++------- drivers/gpu/drm/i915/gt/selftest_timeline.c | 21 +--- drivers/gpu/drm/i915/selftests/i915_request.c | 21 +--- 8 files changed, 212 insertions(+), 170 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 697114dd1f47..f3034c613bc0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -10,6 +10,7 @@ #include "intel_gt_requests.h" #include "i915_selftest.h" +#include "selftest_engine_heartbeat.h" static int timeline_sync(struct intel_timeline *tl) { @@ -142,24 +143,6 @@ out: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) -{ - *saved = engine->props.heartbeat_interval_ms; - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = saved; -} - static int live_idle_flush(void *arg) { struct intel_gt *gt = arg; @@ -170,11 +153,9 @@ static int live_idle_flush(void *arg) /* Check that we can flush the idle barriers */ for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + st_engine_heartbeat_disable(engine); err = __live_idle_pulse(engine, intel_engine_flush_barriers); - engine_heartbeat_enable(engine, heartbeat); + st_engine_heartbeat_enable(engine); if (err) break; } @@ -192,11 +173,9 @@ static int live_idle_pulse(void *arg) /* Check that heartbeat pulses flush the idle barriers */ for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + st_engine_heartbeat_disable(engine); err = __live_idle_pulse(engine, intel_engine_pulse); - engine_heartbeat_enable(engine, heartbeat); + st_engine_heartbeat_enable(engine); if (err && err != -ENODEV) break; @@ -394,3 +373,19 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) i915_modparams.enable_hangcheck = saved_hangcheck; return err; } + +void st_engine_heartbeat_disable(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +void st_engine_heartbeat_enable(struct intel_engine_cs *engine) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h new file mode 100644 index 000000000000..cd27113d5400 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef SELFTEST_ENGINE_HEARTBEAT_H +#define SELFTEST_ENGINE_HEARTBEAT_H + +struct intel_engine_cs; + +void st_engine_heartbeat_disable(struct intel_engine_cs *engine); +void st_engine_heartbeat_enable(struct intel_engine_cs *engine); + +#endif /* SELFTEST_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index cbf6b0735272..dd54dcb5cca2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -6,7 +6,109 @@ #include "i915_selftest.h" #include "selftest_engine.h" +#include "selftest_engine_heartbeat.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_spinner.h" + +static int live_engine_busy_stats(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * Check that if an engine supports busy-stats, they tell the truth. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + ktime_t de; + u64 dt; + + if (!intel_engine_supports_stats(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (intel_gt_pm_wait_for_idle(gt)) { + err = -EBUSY; + break; + } + + st_engine_heartbeat_disable(engine); + + ENGINE_TRACE(engine, "measuring idle time\n"); + preempt_disable(); + dt = ktime_to_ns(ktime_get()); + de = intel_engine_get_busy_time(engine); + udelay(100); + de = ktime_sub(intel_engine_get_busy_time(engine), de); + dt = ktime_to_ns(ktime_get()) - dt; + preempt_enable(); + if (de < 0 || de > 10) { + pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n", + engine->name, + de, (int)div64_u64(100 * de, dt), dt); + GEM_TRACE_DUMP(); + err = -EINVAL; + goto end; + } + + /* 100% busy */ + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(engine->gt); + err = -ETIME; + goto end; + } + + ENGINE_TRACE(engine, "measuring busy time\n"); + preempt_disable(); + dt = ktime_to_ns(ktime_get()); + de = intel_engine_get_busy_time(engine); + udelay(100); + de = ktime_sub(intel_engine_get_busy_time(engine), de); + dt = ktime_to_ns(ktime_get()) - dt; + preempt_enable(); + if (100 * de < 95 * dt || 95 * de > 100 * dt) { + pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n", + engine->name, + de, (int)div64_u64(100 * de, dt), dt); + GEM_TRACE_DUMP(); + err = -EINVAL; + goto end; + } + +end: + st_engine_heartbeat_enable(engine); + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + return err; +} static int live_engine_pm(void *arg) { @@ -77,6 +179,7 @@ static int live_engine_pm(void *arg) int live_engine_pm_selftests(struct intel_gt *gt) { static const struct i915_subtest tests[] = { + SUBTEST(live_engine_busy_stats), SUBTEST(live_engine_pm), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7461936d549d..fb5ebf930ab2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -29,6 +29,7 @@ #include "intel_gt.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "selftest_engine_heartbeat.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -310,22 +311,6 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static int igt_hang_sanitycheck(void *arg) { struct intel_gt *gt = arg; @@ -482,7 +467,7 @@ static int igt_reset_nop_engine(void *arg) reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -540,7 +525,7 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -590,7 +575,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -642,7 +627,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; @@ -841,7 +826,7 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -931,7 +916,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 58e4e9aafe94..daa4aabab9a7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" +#include "gt/selftest_engine_heartbeat.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -51,22 +52,6 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static bool is_active(struct i915_request *rq) { if (i915_request_is_active(rq)) @@ -234,7 +219,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; @@ -342,7 +327,7 @@ err_ce: intel_context_put(ce[n]); } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_live_test_end(&t)) err = -EIO; if (err) @@ -396,7 +381,7 @@ static int live_unlite_ring(void *arg) err = -EIO; break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; @@ -502,7 +487,7 @@ err_ce: intel_context_unpin(ce[n]); intel_context_put(ce[n]); } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_live_test_end(&t)) err = -EIO; if (err) @@ -621,7 +606,7 @@ static int live_hold_reset(void *arg) break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -681,7 +666,7 @@ static int live_hold_reset(void *arg) i915_request_put(rq); out: - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_context_put(ce); if (err) break; @@ -728,7 +713,7 @@ static int live_error_interrupt(void *arg) const struct error_phase *p; int err = 0; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); for (p = phases; p->error[0] != GOOD; p++) { struct i915_request *client[ARRAY_SIZE(phases->error)]; @@ -827,7 +812,7 @@ out: } } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) { intel_gt_set_wedged(gt); return err; @@ -1042,9 +1027,9 @@ static int live_timeslice_preempt(void *arg) memset(vaddr, 0, PAGE_SIZE); - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); err = slice_semaphore_queue(engine, vma, 5); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) goto err_pin; @@ -1166,7 +1151,7 @@ static int live_timeslice_rewind(void *arg) * Expect execution/evaluation order XZY */ - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); slot = memset32(engine->status_page.addr + 1000, 0, 4); @@ -1261,7 +1246,7 @@ err: wmb(); engine->props.timeslice_duration_ms = timeslice; - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); for (i = 0; i < 3; i++) i915_request_put(rq[i]); if (igt_flush_test(gt->i915)) @@ -1353,7 +1338,7 @@ static int live_timeslice_queue(void *arg) if (!intel_engine_has_preemption(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ @@ -1414,7 +1399,7 @@ static int live_timeslice_queue(void *arg) err_rq: i915_request_put(rq); err_heartbeat: - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; } @@ -1460,7 +1445,7 @@ static int live_timeslice_nopreempt(void *arg) break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); /* Create an unpreemptible spinner */ @@ -1529,7 +1514,7 @@ out_spin: igt_spinner_end(&spin); out_heartbeat: xchg(&engine->props.timeslice_duration_ms, timeslice); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; @@ -2433,7 +2418,7 @@ static int live_suppress_self_preempt(void *arg) if (igt_flush_test(gt->i915)) goto err_wedged; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); engine->execlists.preempt_hang.count = 0; rq_a = spinner_create_request(&a.spin, @@ -2441,14 +2426,14 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); goto err_wedged; } @@ -2460,7 +2445,7 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); goto err_client_b; } i915_request_add(rq_b); @@ -2471,7 +2456,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); goto err_wedged; } @@ -2485,12 +2470,12 @@ static int live_suppress_self_preempt(void *arg) engine->name, engine->execlists.preempt_hang.count, depth); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); err = -EINVAL; goto err_client_b; } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -2902,7 +2887,7 @@ static int live_preempt_ring(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); for (n = 0; n <= 3; n++) { err = __live_preempt_ring(engine, &spin, @@ -2911,7 +2896,7 @@ static int live_preempt_ring(void *arg) break; } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; } @@ -4568,7 +4553,7 @@ static int reset_virtual_engine(struct intel_gt *gt, } for (n = 0; n < nsibling; n++) - engine_heartbeat_disable(siblings[n]); + st_engine_heartbeat_disable(siblings[n]); rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -4639,7 +4624,7 @@ out_rq: i915_request_put(rq); out_heartbeat: for (n = 0; n < nsibling; n++) - engine_heartbeat_enable(siblings[n]); + st_engine_heartbeat_enable(siblings[n]); intel_context_put(ve); out_spin: @@ -5314,7 +5299,7 @@ static int live_lrc_gpr(void *arg) return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); err = __live_lrc_gpr(engine, scratch, false); if (err) @@ -5325,7 +5310,7 @@ static int live_lrc_gpr(void *arg) goto err; err: - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -5474,7 +5459,7 @@ static int live_lrc_timestamp(void *arg) for_each_engine(data.engine, gt, id) { int i, err = 0; - engine_heartbeat_disable(data.engine); + st_engine_heartbeat_disable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { struct intel_context *tmp; @@ -5507,7 +5492,7 @@ static int live_lrc_timestamp(void *arg) } err: - engine_heartbeat_enable(data.engine); + st_engine_heartbeat_enable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { if (!data.ce[i]) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 5049c3dd08a6..bb753f0c12eb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -12,6 +12,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_rc6.h" +#include "selftest_engine_heartbeat.h" #include "selftest_rps.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_spinner.h" @@ -20,22 +21,6 @@ /* Try to isolate the impact of cstates from determing frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static void dummy_rps_work(struct work_struct *wrk) { } @@ -249,13 +234,13 @@ int live_rps_clock_interval(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -266,7 +251,7 @@ int live_rps_clock_interval(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -322,7 +307,7 @@ int live_rps_clock_interval(void *arg) intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); @@ -408,7 +393,7 @@ int live_rps_control(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, @@ -424,7 +409,7 @@ int live_rps_control(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -434,7 +419,7 @@ int live_rps_control(void *arg) pr_err("%s: could not set minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -451,7 +436,7 @@ int live_rps_control(void *arg) pr_err("%s: could not restore minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -466,7 +451,7 @@ int live_rps_control(void *arg) min_dt = ktime_sub(ktime_get(), min_dt); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", engine->name, @@ -637,14 +622,14 @@ int live_rps_frequency_cs(void *arg) int freq; } min, max; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, false, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); break; } @@ -725,7 +710,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -779,14 +764,14 @@ int live_rps_frequency_srm(void *arg) int freq; } min, max; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, true, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); break; } @@ -866,7 +851,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -1061,11 +1046,11 @@ int live_rps_interrupt(void *arg) intel_gt_pm_wait_for_idle(engine->gt); GEM_BUG_ON(intel_rps_is_active(rps)); - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); err = __rps_up_interrupt(rps, engine, &spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) goto out; @@ -1074,13 +1059,13 @@ int live_rps_interrupt(void *arg) /* Keep the engine awake but idle and check for DOWN */ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); intel_rc6_disable(>->rc6); err = __rps_down_interrupt(rps, engine); intel_rc6_enable(>->rc6); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) goto out; } @@ -1165,13 +1150,13 @@ int live_rps_power(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -1182,7 +1167,7 @@ int live_rps_power(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -1195,7 +1180,7 @@ int live_rps_power(void *arg) min.power = measure_power_at(rps, &min.freq); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", engine->name, @@ -1252,6 +1237,11 @@ int live_rps_dynamic(void *arg) if (igt_spinner_init(&spin, gt)) return -ENOMEM; + if (intel_rps_has_interrupts(rps)) + pr_info("RPS has interrupt support\n"); + if (intel_rps_uses_timer(rps)) + pr_info("RPS has timer support\n"); + for_each_engine(engine, gt, id) { struct i915_request *rq; struct { diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index b2aad7ef046a..fcdee951579b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -12,6 +12,7 @@ #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_ring.h" +#include "selftest_engine_heartbeat.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -751,22 +752,6 @@ out_free: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static int live_hwsp_rollover_kernel(void *arg) { struct intel_gt *gt = arg; @@ -785,7 +770,7 @@ static int live_hwsp_rollover_kernel(void *arg) struct i915_request *rq[3] = {}; int i; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); if (intel_gt_wait_for_idle(gt, HZ / 2)) { err = -EIO; goto out; @@ -836,7 +821,7 @@ static int live_hwsp_rollover_kernel(void *arg) out: for (i = 0; i < ARRAY_SIZE(rq); i++) i915_request_put(rq[i]); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; } diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 92c628f18c60..06d18aae070b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -34,6 +34,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" +#include "gt/selftest_engine_heartbeat.h" #include "i915_random.h" #include "i915_selftest.h" @@ -2270,22 +2271,6 @@ static void rps_unpin(struct intel_gt *gt) atomic_dec(>->rps.num_waiters); } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static int perf_request_latency(void *arg) { struct drm_i915_private *i915 = arg; @@ -2311,7 +2296,7 @@ static int perf_request_latency(void *arg) goto out; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rps_pin(engine->gt); if (err == 0) @@ -2330,7 +2315,7 @@ static int perf_request_latency(void *arg) err = measure_completion(ce); rps_unpin(engine->gt); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_context_unpin(ce); intel_context_put(ce); From 810b7ee3008ab2ac94791f75857815484c2f9cce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Jun 2020 14:09:16 +0100 Subject: [PATCH 182/222] drm/i915/gt: Always report the sample time for busy-stats Return the monotonic timestamp (ktime_get()) at the time of sampling the busy-time. This is used in preference to taking ktime_get() separately before or after the read seqlock as there can be some large variance in reported timestamps. For selftests trying to ascertain that we are reporting accurate to within a few microseconds, even a small delay leads to the test failing. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200617130916.15261-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 3 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_rps.c | 9 ++- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 18 +++--- drivers/gpu/drm/i915/i915_pmu.c | 5 +- drivers/gpu/drm/i915/selftests/i915_request.c | 63 ++++++++++++------- 6 files changed, 66 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 791897f8d847..a9249a23903a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -334,7 +334,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, + ktime_t *now); struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 045179c65c44..c62b3cbdbbf9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1595,7 +1595,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) +static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, + ktime_t *now) { ktime_t total = engine->stats.total; @@ -1603,9 +1604,9 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) * If the engine is executing something at the moment * add it to the total. */ + *now = ktime_get(); if (atomic_read(&engine->stats.active)) - total = ktime_add(total, - ktime_sub(ktime_get(), engine->stats.start)); + total = ktime_add(total, ktime_sub(*now, engine->stats.start)); return total; } @@ -1613,17 +1614,18 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) /** * intel_engine_get_busy_time() - Return current accumulated engine busyness * @engine: engine to report on + * @now: monotonic timestamp of sampling * * Returns accumulated time @engine was busy since engine stats were enabled. */ -ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) { unsigned int seq; ktime_t total; do { seq = read_seqbegin(&engine->stats.lock); - total = __intel_engine_get_busy_time(engine); + total = __intel_engine_get_busy_time(engine, now); } while (read_seqretry(&engine->stats.lock, seq)); return total; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 2f59fc6df3c2..bdece932592b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -53,13 +53,13 @@ static void rps_timer(struct timer_list *t) struct intel_engine_cs *engine; enum intel_engine_id id; s64 max_busy[3] = {}; - ktime_t dt, last; + ktime_t dt, timestamp, last; for_each_engine(engine, rps_to_gt(rps), id) { s64 busy; int i; - dt = intel_engine_get_busy_time(engine); + dt = intel_engine_get_busy_time(engine, ×tamp); last = engine->stats.rps; engine->stats.rps = dt; @@ -70,15 +70,14 @@ static void rps_timer(struct timer_list *t) } } - dt = ktime_get(); last = rps->pm_timestamp; - rps->pm_timestamp = dt; + rps->pm_timestamp = timestamp; if (intel_rps_is_active(rps)) { s64 busy; int i; - dt = ktime_sub(dt, last); + dt = ktime_sub(timestamp, last); /* * Our goal is to evaluate each engine independently, so we run diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index dd54dcb5cca2..b08fc5390e8a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -29,8 +29,8 @@ static int live_engine_busy_stats(void *arg) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); for_each_engine(engine, gt, id) { struct i915_request *rq; - ktime_t de; - u64 dt; + ktime_t de, dt; + ktime_t t[2]; if (!intel_engine_supports_stats(engine)) continue; @@ -47,12 +47,11 @@ static int live_engine_busy_stats(void *arg) ENGINE_TRACE(engine, "measuring idle time\n"); preempt_disable(); - dt = ktime_to_ns(ktime_get()); - de = intel_engine_get_busy_time(engine); + de = intel_engine_get_busy_time(engine, &t[0]); udelay(100); - de = ktime_sub(intel_engine_get_busy_time(engine), de); - dt = ktime_to_ns(ktime_get()) - dt; + de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); + dt = ktime_sub(t[1], t[0]); if (de < 0 || de > 10) { pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n", engine->name, @@ -80,12 +79,11 @@ static int live_engine_busy_stats(void *arg) ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); - dt = ktime_to_ns(ktime_get()); - de = intel_engine_get_busy_time(engine); + de = intel_engine_get_busy_time(engine, &t[0]); udelay(100); - de = ktime_sub(intel_engine_get_busy_time(engine), de); - dt = ktime_to_ns(ktime_get()) - dt; + de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); + dt = ktime_sub(t[1], t[0]); if (100 * de < 95 * dt || 95 * de > 100 * dt) { pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n", engine->name, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 802837de1767..28bc5f13ae52 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -565,7 +565,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event) /* Do nothing */ } else if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) { - val = ktime_to_ns(intel_engine_get_busy_time(engine)); + ktime_t unused; + + val = ktime_to_ns(intel_engine_get_busy_time(engine, + &unused)); } else { val = engine->pmu.sample[sample].cur; } diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 06d18aae070b..9271aad7f779 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -2492,9 +2492,11 @@ static int perf_series_engines(void *arg) intel_engine_pm_get(p->engine); if (intel_engine_supports_stats(p->engine)) - p->busy = intel_engine_get_busy_time(p->engine) + 1; + p->busy = intel_engine_get_busy_time(p->engine, + &p->time) + 1; + else + p->time = ktime_get(); p->runtime = -intel_context_get_total_runtime_ns(ce); - p->time = ktime_get(); } err = (*fn)(ps); @@ -2505,13 +2507,15 @@ static int perf_series_engines(void *arg) struct perf_stats *p = &stats[idx]; struct intel_context *ce = ps->ce[idx]; int integer, decimal; - u64 busy, dt; + u64 busy, dt, now; - p->time = ktime_sub(ktime_get(), p->time); - if (p->busy) { - p->busy = ktime_sub(intel_engine_get_busy_time(p->engine), + if (p->busy) + p->busy = ktime_sub(intel_engine_get_busy_time(p->engine, + &now), p->busy - 1); - } + else + now = ktime_get(); + p->time = ktime_sub(now, p->time); err = switch_to_kernel_sync(ce, err); p->runtime += intel_context_get_total_runtime_ns(ce); @@ -2571,13 +2575,14 @@ static int p_sync0(void *arg) return err; } - busy = false; if (intel_engine_supports_stats(engine)) { - p->busy = intel_engine_get_busy_time(engine); + p->busy = intel_engine_get_busy_time(engine, &p->time); busy = true; + } else { + p->time = ktime_get(); + busy = false; } - p->time = ktime_get(); count = 0; do { struct i915_request *rq; @@ -2600,11 +2605,15 @@ static int p_sync0(void *arg) count++; } while (!__igt_timeout(end_time, NULL)); - p->time = ktime_sub(ktime_get(), p->time); if (busy) { - p->busy = ktime_sub(intel_engine_get_busy_time(engine), + ktime_t now; + + p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), p->busy); + p->time = ktime_sub(now, p->time); + } else { + p->time = ktime_sub(ktime_get(), p->time); } err = switch_to_kernel_sync(ce, err); @@ -2637,13 +2646,14 @@ static int p_sync1(void *arg) return err; } - busy = false; if (intel_engine_supports_stats(engine)) { - p->busy = intel_engine_get_busy_time(engine); + p->busy = intel_engine_get_busy_time(engine, &p->time); busy = true; + } else { + p->time = ktime_get(); + busy = false; } - p->time = ktime_get(); count = 0; do { struct i915_request *rq; @@ -2668,11 +2678,15 @@ static int p_sync1(void *arg) count++; } while (!__igt_timeout(end_time, NULL)); i915_request_put(prev); - p->time = ktime_sub(ktime_get(), p->time); if (busy) { - p->busy = ktime_sub(intel_engine_get_busy_time(engine), + ktime_t now; + + p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), p->busy); + p->time = ktime_sub(now, p->time); + } else { + p->time = ktime_sub(ktime_get(), p->time); } err = switch_to_kernel_sync(ce, err); @@ -2704,14 +2718,15 @@ static int p_many(void *arg) return err; } - busy = false; if (intel_engine_supports_stats(engine)) { - p->busy = intel_engine_get_busy_time(engine); + p->busy = intel_engine_get_busy_time(engine, &p->time); busy = true; + } else { + p->time = ktime_get(); + busy = false; } count = 0; - p->time = ktime_get(); do { struct i915_request *rq; @@ -2724,11 +2739,15 @@ static int p_many(void *arg) i915_request_add(rq); count++; } while (!__igt_timeout(end_time, NULL)); - p->time = ktime_sub(ktime_get(), p->time); if (busy) { - p->busy = ktime_sub(intel_engine_get_busy_time(engine), + ktime_t now; + + p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), p->busy); + p->time = ktime_sub(now, p->time); + } else { + p->time = ktime_sub(ktime_get(), p->time); } err = switch_to_kernel_sync(ce, err); From bf3c508375069c9e0b57d8453f99ec26e95b6548 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 17:03:31 -0500 Subject: [PATCH 183/222] drm/i915/query: Use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200617220331.GA19550@embeddedor --- drivers/gpu/drm/i915/i915_query.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index ef25ce6e395e..9a8fdd3ac6bd 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -113,8 +113,7 @@ query_engine_info(struct drm_i915_private *i915, for_each_uabi_engine(engine, i915) num_uabi_engines++; - len = sizeof(struct drm_i915_query_engine_info) + - num_uabi_engines * sizeof(struct drm_i915_engine_info); + len = struct_size(query_ptr, engines, num_uabi_engines); ret = copy_query_item(&query, sizeof(query), len, query_item); if (ret != 0) From 5a15550e56cc57bb5b953b0f6629c6805f4724fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Jun 2020 16:19:38 +0100 Subject: [PATCH 184/222] drm/i915/gt: Initialise rps timestamp Smatch warns that we may iterate over an empty array of gt->engines[]. One hopes that this is impossible, but nevertheless we can simply appease smatch by initialising the timestamp to zero before we starting probing the busy-time from the engines. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200619151938.21740-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index bdece932592b..296391deeb94 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -51,10 +51,11 @@ static void rps_timer(struct timer_list *t) { struct intel_rps *rps = from_timer(rps, t, timer); struct intel_engine_cs *engine; + ktime_t dt, last, timestamp; enum intel_engine_id id; s64 max_busy[3] = {}; - ktime_t dt, timestamp, last; + timestamp = 0; for_each_engine(engine, rps_to_gt(rps), id) { s64 busy; int i; @@ -69,7 +70,6 @@ static void rps_timer(struct timer_list *t) swap(busy, max_busy[i]); } } - last = rps->pm_timestamp; rps->pm_timestamp = timestamp; From 4fb33953438bf3c67c655a4d187f1645446863fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Jun 2020 20:10:53 +0100 Subject: [PATCH 185/222] drm/i915/gt: Show the culmative runtime as part of the engine info Since we always enable the busy-stats, the culmulative runtime should be accurate, and might be useful for diagnosing issues with the engine. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200619191053.9654-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c62b3cbdbbf9..7bf2f76212f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1510,6 +1510,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_request *rq; intel_wakeref_t wakeref; unsigned long flags; + ktime_t dummy; if (header) { va_list ap; @@ -1527,6 +1528,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, yesno(!llist_empty(&engine->barrier_tasks))); drm_printf(m, "\tLatency: %luus\n", ewma__engine_latency_read(&engine->latency)); + if (intel_engine_supports_stats(engine)) + drm_printf(m, "\tRuntime: %llums\n", + ktime_to_ms(intel_engine_get_busy_time(engine, + &dummy))); drm_printf(m, "\tForcewake: %x domains, %d active\n", engine->fw_domain, atomic_read(&engine->fw_active)); From 033ef711bbfb671ae58a1acb0f8440889c6000c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 20 Jun 2020 00:45:42 +0100 Subject: [PATCH 186/222] drm/i915/gvt: Drop redundant prepare_write/pin_pages Since gvt calls pin_map for the shadow batch buffer, this makes the action of prepare_write [+pin_pages] redundant. We can write into the obj->mm.mapping directory and the flush_map routine knows when it has to flush the cpu cache afterwards. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200619234543.17499-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 14 +------- drivers/gpu/drm/i915/gvt/scheduler.c | 51 ++++++++------------------- drivers/gpu/drm/i915/gvt/scheduler.h | 2 -- 3 files changed, 15 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 8b87f130f7f1..f1940939260a 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1904,19 +1904,10 @@ static int perform_bb_shadow(struct parser_exec_state *s) goto err_free_bb; } - ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush); - if (ret) - goto err_free_obj; - bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB); if (IS_ERR(bb->va)) { ret = PTR_ERR(bb->va); - goto err_finish_shmem_access; - } - - if (bb->clflush & CLFLUSH_BEFORE) { - drm_clflush_virt_range(bb->va, bb->obj->base.size); - bb->clflush &= ~CLFLUSH_BEFORE; + goto err_free_obj; } ret = copy_gma_to_hva(s->vgpu, mm, @@ -1935,7 +1926,6 @@ static int perform_bb_shadow(struct parser_exec_state *s) INIT_LIST_HEAD(&bb->list); list_add(&bb->list, &s->workload->shadow_bb); - bb->accessing = true; bb->bb_start_cmd_va = s->ip_va; if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa)) @@ -1956,8 +1946,6 @@ static int perform_bb_shadow(struct parser_exec_state *s) return 0; err_unmap: i915_gem_object_unpin_map(bb->obj); -err_finish_shmem_access: - i915_gem_object_finish_access(bb->obj); err_free_obj: i915_gem_object_put(bb->obj); err_free_bb: diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1e4dd4544dcf..f5dc52a80fe5 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -505,26 +505,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + bb->bb_offset; - if (bb->ppgtt) { - /* for non-priv bb, scan&shadow is only for - * debugging purpose, so the content of shadow bb - * is the same as original bb. Therefore, - * here, rather than switch to shadow bb's gma - * address, we directly use original batch buffer's - * gma address, and send original bb to hardware - * directly - */ - if (bb->clflush & CLFLUSH_AFTER) { - drm_clflush_virt_range(bb->va, - bb->obj->base.size); - bb->clflush &= ~CLFLUSH_AFTER; - } - i915_gem_object_finish_access(bb->obj); - bb->accessing = false; - - } else { + /* + * For non-priv bb, scan&shadow is only for + * debugging purpose, so the content of shadow bb + * is the same as original bb. Therefore, + * here, rather than switch to shadow bb's gma + * address, we directly use original batch buffer's + * gma address, and send original bb to hardware + * directly + */ + if (!bb->ppgtt) { bb->vma = i915_gem_object_ggtt_pin(bb->obj, - NULL, 0, 0, 0); + NULL, 0, 0, 0); if (IS_ERR(bb->vma)) { ret = PTR_ERR(bb->vma); goto err; @@ -535,27 +527,15 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) if (gmadr_bytes == 8) bb->bb_start_cmd_va[2] = 0; - /* No one is going to touch shadow bb from now on. */ - if (bb->clflush & CLFLUSH_AFTER) { - drm_clflush_virt_range(bb->va, - bb->obj->base.size); - bb->clflush &= ~CLFLUSH_AFTER; - } - - ret = i915_gem_object_set_to_gtt_domain(bb->obj, - false); - if (ret) - goto err; - ret = i915_vma_move_to_active(bb->vma, workload->req, 0); if (ret) goto err; - - i915_gem_object_finish_access(bb->obj); - bb->accessing = false; } + + /* No one is going to touch shadow bb from now on. */ + i915_gem_object_flush_map(bb->obj); } return 0; err: @@ -626,9 +606,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { - if (bb->accessing) - i915_gem_object_finish_access(bb->obj); - if (bb->va && !IS_ERR(bb->va)) i915_gem_object_unpin_map(bb->obj); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 15d317f2a4a4..64e7a0b791c3 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -124,8 +124,6 @@ struct intel_vgpu_shadow_bb { struct i915_vma *vma; void *va; u32 *bb_start_cmd_va; - unsigned int clflush; - bool accessing; unsigned long bb_offset; bool ppgtt; }; From cf46143fe2c90d08554031b64529c8c1c0f9cb08 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 20 Jun 2020 00:45:43 +0100 Subject: [PATCH 187/222] drm/i915/gt: Replace manual kmap_atomic() with pin_map for renderstate We only emit the renderstate once now during module load, it is no longer a concern that we are delaying context creation and so do not need to so eagerly optimise. Since the last time we have looked at the renderstate, we have a pin_map / flush_map facility that supports simple single mappings, replacing the open-coded kmap_atomic() and prepare_write. As it should be a single page, of which we only write a small portion, we stick to a simple WB [kmap] and use clflush on !llc platforms, rather than creating a temporary WC vmapping for the single page. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200619234543.17499-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_renderstate.c | 29 +++++++-------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index f59e7875cc5e..6db23389e427 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -61,7 +61,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine) #define OUT_BATCH(batch, i, val) \ do { \ if ((i) >= PAGE_SIZE / sizeof(u32)) \ - goto err; \ + goto out; \ (batch)[(i)++] = (val); \ } while(0) @@ -70,15 +70,12 @@ static int render_state_setup(struct intel_renderstate *so, { const struct intel_renderstate_rodata *rodata = so->rodata; unsigned int i = 0, reloc_index = 0; - unsigned int needs_clflush; + int ret = -EINVAL; u32 *d; - int ret; - ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush); - if (ret) - return ret; - - d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0)); + d = i915_gem_object_pin_map(so->vma->obj, I915_MAP_WB); + if (IS_ERR(d)) + return PTR_ERR(d); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -89,7 +86,7 @@ static int render_state_setup(struct intel_renderstate *so, if (HAS_64BIT_RELOC(i915)) { if (i + 1 >= rodata->batch_items || rodata->batch[i + 1] != 0) - goto err; + goto out; d[i++] = s; s = upper_32_bits(r); @@ -103,7 +100,7 @@ static int render_state_setup(struct intel_renderstate *so, if (rodata->reloc[reloc_index] != -1) { drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index); - goto err; + goto out; } so->batch_offset = i915_ggtt_offset(so->vma); @@ -150,19 +147,11 @@ static int render_state_setup(struct intel_renderstate *so, */ so->aux_size = ALIGN(so->aux_size, 8); - if (needs_clflush) - drm_clflush_virt_range(d, i * sizeof(u32)); - kunmap_atomic(d); - ret = 0; out: - i915_gem_object_finish_access(so->vma->obj); + __i915_gem_object_flush_map(so->vma->obj, 0, i * sizeof(u32)); + i915_gem_object_unpin_map(so->vma->obj); return ret; - -err: - kunmap_atomic(d); - ret = -EINVAL; - goto out; } #undef OUT_BATCH From 8a25c4be583d8a9e38fbadcc49d4de32b1aa8906 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Jun 2020 18:04:02 +0300 Subject: [PATCH 188/222] drm/i915/params: switch to device specific parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start using device specific parameters instead of module parameters for most things. The module parameters become the immutable initial values for i915 parameters. The device specific parameters in i915->params start life as a copy of i915_modparams. Any later changes are only reflected in the debugfs. The stragglers are: * i915.force_probe and i915.modeset. Needed before dev_priv is available. This is fine because the parameters are read-only and never modified. * i915.verbose_state_checks. Passing dev_priv to I915_STATE_WARN and I915_STATE_WARN_ON would result in massive and ugly churn. This is handled by not exposing the parameter via debugfs, and leaving the parameter writable in sysfs. This may be fixed up in follow-up work. * i915.inject_probe_failure. Only makes sense in terms of the module, not the device. This is handled by not exposing the parameter via debugfs. v2: Fix uc i915 lookup code (Michał Winiarski) Cc: Juha-Pekka Heikkilä Cc: Venkata Sandeep Dhanalakota Cc: Michał Winiarski Reviewed-by: Rodrigo Vivi Acked-by: Michał Winiarski Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20200618150402.14022-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 6 ++-- drivers/gpu/drm/i915/display/intel_crt.c | 4 +-- drivers/gpu/drm/i915/display/intel_csr.c | 6 ++-- drivers/gpu/drm/i915/display/intel_display.c | 12 +++---- .../drm/i915/display/intel_display_debugfs.c | 2 +- .../drm/i915/display/intel_display_power.c | 14 ++++---- drivers/gpu/drm/i915/display/intel_dp.c | 8 +++-- .../drm/i915/display/intel_dp_aux_backlight.c | 4 +-- drivers/gpu/drm/i915/display/intel_fbc.c | 12 +++---- drivers/gpu/drm/i915/display/intel_lvds.c | 4 +-- drivers/gpu/drm/i915/display/intel_opregion.c | 2 +- drivers/gpu/drm/i915/display/intel_panel.c | 4 +-- drivers/gpu/drm/i915/display/intel_psr.c | 10 +++--- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 +-- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 3 +- drivers/gpu/drm/i915/gt/intel_reset.c | 6 ++-- .../drm/i915/gt/selftest_engine_heartbeat.c | 6 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 15 +++++---- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++------ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 32 +++++++++---------- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_debugfs_params.c | 7 +--- drivers/gpu/drm/i915/i915_drv.c | 9 ++++-- drivers/gpu/drm/i915/i915_drv.h | 5 ++- drivers/gpu/drm/i915/i915_getparam.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +-- drivers/gpu/drm/i915/intel_gvt.c | 8 ++--- drivers/gpu/drm/i915/intel_region_lmem.c | 6 ++-- drivers/gpu/drm/i915/intel_uncore.c | 8 ++--- 30 files changed, 120 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index c974c716f859..6593e2c38043 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -479,7 +479,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv, struct drm_display_mode *panel_fixed_mode; int index; - index = i915_modparams.vbt_sdvo_panel_type; + index = dev_priv->params.vbt_sdvo_panel_type; if (index == -2) { drm_dbg_kms(&dev_priv->drm, "Ignore SDVO panel mode from BIOS VBT tables.\n"); @@ -829,9 +829,9 @@ parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) u8 vswing; /* Don't read from VBT if module parameter has valid value*/ - if (i915_modparams.edp_vswing) { + if (dev_priv->params.edp_vswing) { dev_priv->vbt.edp.low_vswing = - i915_modparams.edp_vswing == 1; + dev_priv->params.edp_vswing == 1; } else { vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF; dev_priv->vbt.edp.low_vswing = vswing == 0; diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 2f5b9a4baafd..5b4510ce5693 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -833,7 +833,7 @@ intel_crt_detect(struct drm_connector *connector, connector->base.id, connector->name, force); - if (i915_modparams.load_detect_test) { + if (dev_priv->params.load_detect_test) { wakeref = intel_display_power_get(dev_priv, intel_encoder->power_domain); goto load_detect; @@ -889,7 +889,7 @@ load_detect: else if (INTEL_GEN(dev_priv) < 4) status = intel_crt_load_detect(crt, to_intel_crtc(connector->state->crtc)->pipe); - else if (i915_modparams.load_detect_test) + else if (dev_priv->params.load_detect_test) status = connector_status_disconnected; else status = connector_status_unknown; diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index 9843c9af6c13..f22a7645c249 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -723,15 +723,15 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) csr->max_fw_size = BXT_CSR_MAX_FW_SIZE; } - if (i915_modparams.dmc_firmware_path) { - if (strlen(i915_modparams.dmc_firmware_path) == 0) { + if (dev_priv->params.dmc_firmware_path) { + if (strlen(dev_priv->params.dmc_firmware_path) == 0) { csr->fw_path = NULL; drm_info(&dev_priv->drm, "Disabling CSR firmware and runtime PM\n"); return; } - csr->fw_path = i915_modparams.dmc_firmware_path; + csr->fw_path = dev_priv->params.dmc_firmware_path; /* Bypass version check for firmware override. */ csr->required_version = 0; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 34d1b7f1b140..77681356505b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4886,7 +4886,7 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) int ret; /* reset doesn't touch the display */ - if (!i915_modparams.force_reset_modeset_test && + if (!dev_priv->params.force_reset_modeset_test && !gpu_reset_clobbers_display(dev_priv)) return; @@ -7882,7 +7882,7 @@ bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) if (!hsw_crtc_supports_ips(crtc)) return false; - if (!i915_modparams.enable_ips) + if (!dev_priv->params.enable_ips) return false; if (crtc_state->pipe_bpp > 24) @@ -8153,8 +8153,8 @@ static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv) static bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { - if (i915_modparams.panel_use_ssc >= 0) - return i915_modparams.panel_use_ssc != 0; + if (dev_priv->params.panel_use_ssc >= 0) + return dev_priv->params.panel_use_ssc != 0; return dev_priv->vbt.lvds_use_ssc && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } @@ -13585,8 +13585,8 @@ pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc, static bool fastboot_enabled(struct drm_i915_private *dev_priv) { - if (i915_modparams.fastboot != -1) - return i915_modparams.fastboot; + if (dev_priv->params.fastboot != -1) + return dev_priv->params.fastboot; /* Enable fastboot by default on Skylake and newer */ if (INTEL_GEN(dev_priv) >= 9) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 852f64946b96..cfe2517e0088 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -125,7 +125,7 @@ static int i915_ips_status(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); seq_printf(m, "Enabled by kernel parameter: %s\n", - yesno(i915_modparams.enable_ips)); + yesno(dev_priv->params.enable_ips)); if (INTEL_GEN(dev_priv) >= 8) { seq_puts(m, "Currently: unknown\n"); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 24a2aa1fdc9c..834162bc5a3f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4513,7 +4513,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, mask = 0; } - if (!i915_modparams.disable_power_well) + if (!dev_priv->params.disable_power_well) max_dc = 0; if (enable_dc >= 0 && enable_dc <= max_dc) { @@ -4602,11 +4602,11 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; int err; - i915_modparams.disable_power_well = + dev_priv->params.disable_power_well = sanitize_disable_power_well_option(dev_priv, - i915_modparams.disable_power_well); + dev_priv->params.disable_power_well); dev_priv->csr.allowed_dc_mask = - get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc); + get_allowed_dc_mask(dev_priv, dev_priv->params.enable_dc); dev_priv->csr.target_dc_state = sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); @@ -5568,7 +5568,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) intel_display_power_get(i915, POWER_DOMAIN_INIT); /* Disable power support if the user asked so. */ - if (!i915_modparams.disable_power_well) + if (!i915->params.disable_power_well) intel_display_power_get(i915, POWER_DOMAIN_INIT); intel_power_domains_sync_hw(i915); @@ -5592,7 +5592,7 @@ void intel_power_domains_driver_remove(struct drm_i915_private *i915) fetch_and_zero(&i915->power_domains.wakeref); /* Remove the refcount we took to keep power well support disabled. */ - if (!i915_modparams.disable_power_well) + if (!i915->params.disable_power_well) intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT); intel_display_power_flush_work_sync(i915); @@ -5681,7 +5681,7 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, * Even if power well support was disabled we still want to disable * power wells if power domains must be deinitialized for suspend. */ - if (!i915_modparams.disable_power_well) + if (!i915->params.disable_power_well) intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT); intel_display_power_flush_work(i915); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 709ab7407a96..bbba97c956a2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4707,7 +4707,9 @@ intel_dp_sink_can_mst(struct intel_dp *intel_dp) static bool intel_dp_can_mst(struct intel_dp *intel_dp) { - return i915_modparams.enable_dp_mst && + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + return i915->params.enable_dp_mst && intel_dp->can_mst && intel_dp_sink_can_mst(intel_dp); } @@ -4724,13 +4726,13 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) "[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n", encoder->base.base.id, encoder->base.name, yesno(intel_dp->can_mst), yesno(sink_can_mst), - yesno(i915_modparams.enable_dp_mst)); + yesno(i915->params.enable_dp_mst)); if (!intel_dp->can_mst) return; intel_dp->is_mst = sink_can_mst && - i915_modparams.enable_dp_mst; + i915->params.enable_dp_mst; drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 0722540d64ad..acbd7eb66cbe 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -348,7 +348,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector) struct intel_dp *intel_dp = enc_to_intel_dp(intel_connector->encoder); struct drm_i915_private *i915 = dp_to_i915(intel_dp); - if (i915_modparams.enable_dpcd_backlight == 0 || + if (i915->params.enable_dpcd_backlight == 0 || !intel_dp_aux_display_control_capable(intel_connector)) return -ENODEV; @@ -358,7 +358,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector) */ if (i915->vbt.backlight.type != INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE && - i915_modparams.enable_dpcd_backlight != 1 && + i915->params.enable_dpcd_backlight != 1 && !drm_dp_has_quirk(&intel_dp->desc, intel_dp->edid_quirks, DP_QUIRK_FORCE_DPCD_BACKLIGHT)) { drm_info(&i915->drm, diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 1c26673acb2d..30649e17cfb7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -740,7 +740,7 @@ static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) return false; } - if (!i915_modparams.enable_fbc) { + if (!dev_priv->params.enable_fbc) { fbc->no_fbc_reason = "disabled per module param or by default"; return false; } @@ -1017,7 +1017,7 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) fbc->flip_pending = false; - if (!i915_modparams.enable_fbc) { + if (!dev_priv->params.enable_fbc) { intel_fbc_deactivate(dev_priv, "disabled at runtime per module param"); __intel_fbc_disable(dev_priv); @@ -1370,8 +1370,8 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *dev_priv) */ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) { - if (i915_modparams.enable_fbc >= 0) - return !!i915_modparams.enable_fbc; + if (dev_priv->params.enable_fbc >= 0) + return !!dev_priv->params.enable_fbc; if (!HAS_FBC(dev_priv)) return 0; @@ -1415,9 +1415,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) if (need_fbc_vtd_wa(dev_priv)) mkwrite_device_info(dev_priv)->display.has_fbc = false; - i915_modparams.enable_fbc = intel_sanitize_fbc_option(dev_priv); + dev_priv->params.enable_fbc = intel_sanitize_fbc_option(dev_priv); drm_dbg_kms(&dev_priv->drm, "Sanitized enable_fbc value: %d\n", - i915_modparams.enable_fbc); + dev_priv->params.enable_fbc); if (!HAS_FBC(dev_priv)) { fbc->no_fbc_reason = "unsupported by this chipset"; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 872f2a489339..1888611244db 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -784,8 +784,8 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) struct drm_i915_private *dev_priv = to_i915(dev); /* use the module option value if specified */ - if (i915_modparams.lvds_channel_mode > 0) - return i915_modparams.lvds_channel_mode == 2; + if (dev_priv->params.lvds_channel_mode > 0) + return dev_priv->params.lvds_channel_mode == 2; /* single channel LVDS is limited to 112 MHz */ if (lvds_encoder->attached_connector->panel.fixed_mode->clock > 112999) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index cc6b00959586..de995362f428 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -801,7 +801,7 @@ static int intel_load_vbt_firmware(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->opregion; const struct firmware *fw = NULL; - const char *name = i915_modparams.vbt_firmware; + const char *name = dev_priv->params.vbt_firmware; int ret; if (!name || !*name) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 3c5056dbf607..aaed9eb3b56c 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -521,10 +521,10 @@ static u32 intel_panel_compute_brightness(struct intel_connector *connector, drm_WARN_ON(&dev_priv->drm, panel->backlight.max == 0); - if (i915_modparams.invert_brightness < 0) + if (dev_priv->params.invert_brightness < 0) return val; - if (i915_modparams.invert_brightness > 0 || + if (dev_priv->params.invert_brightness > 0 || dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) { return panel->backlight.max - val + panel->backlight.min; } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index ab380e6dc674..86bf7a76f93d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -83,7 +83,7 @@ static bool psr_global_enabled(struct drm_i915_private *i915) { switch (i915->psr.debug & I915_PSR_DEBUG_MODE_MASK) { case I915_PSR_DEBUG_DEFAULT: - return i915_modparams.enable_psr; + return i915->params.enable_psr; case I915_PSR_DEBUG_DISABLE: return false; default: @@ -426,7 +426,7 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp) if (INTEL_GEN(dev_priv) >= 11) val |= EDP_PSR_TP4_TIME_0US; - if (i915_modparams.psr_safest_params) { + if (dev_priv->params.psr_safest_params) { val |= EDP_PSR_TP1_TIME_2500us; val |= EDP_PSR_TP2_TP3_TIME_2500us; goto check_tp3_sel; @@ -507,7 +507,7 @@ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 val = 0; - if (i915_modparams.psr_safest_params) + if (dev_priv->params.psr_safest_params) return EDP_PSR2_TP2_TIME_2500us; if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 && @@ -1500,9 +1500,9 @@ void intel_psr_init(struct drm_i915_private *dev_priv) */ dev_priv->hsw_psr_mmio_adjust = _SRD_CTL_EDP - _HSW_EDP_PSR_BASE; - if (i915_modparams.enable_psr == -1) + if (dev_priv->params.enable_psr == -1) if (INTEL_GEN(dev_priv) < 9 || !dev_priv->vbt.psr.enable) - i915_modparams.enable_psr = 0; + dev_priv->params.enable_psr = 0; /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f5d59d18cd5b..4d88faeb4d4c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -650,7 +650,7 @@ static void context_close(struct i915_gem_context *ctx) * context close. */ if (!i915_gem_context_is_persistent(ctx) || - !i915_modparams.enable_hangcheck) + !ctx->i915->params.enable_hangcheck) kill_context(ctx); i915_gem_context_put(ctx); @@ -667,7 +667,7 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) * reset] are allowed to survive past termination. We require * hangcheck to ensure that the persistent requests are healthy. */ - if (!i915_modparams.enable_hangcheck) + if (!ctx->i915->params.enable_hangcheck) return -EINVAL; i915_gem_context_set_persistence(ctx); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index cd20fb549b38..8db7e93abde5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -4,6 +4,7 @@ * Copyright © 2019 Intel Corporation */ +#include "i915_drv.h" #include "i915_request.h" #include "intel_context.h" @@ -133,7 +134,7 @@ static void heartbeat(struct work_struct *wrk) goto unlock; idle_pulse(engine, rq); - if (i915_modparams.enable_hangcheck) + if (engine->i915->params.enable_hangcheck) engine->heartbeat.systole = i915_request_get(rq); __i915_request_commit(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 39070b514e65..0156f1f5c736 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -638,7 +638,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) bool intel_has_gpu_reset(const struct intel_gt *gt) { - if (!i915_modparams.reset) + if (!gt->i915->params.reset) return NULL; return intel_get_gpu_reset(gt); @@ -646,7 +646,7 @@ bool intel_has_gpu_reset(const struct intel_gt *gt) bool intel_has_reset_engine(const struct intel_gt *gt) { - if (i915_modparams.reset < 2) + if (gt->i915->params.reset < 2) return false; return INTEL_INFO(gt->i915)->has_reset_engine; @@ -1038,7 +1038,7 @@ void intel_gt_reset(struct intel_gt *gt, awake = reset_prepare(gt); if (!intel_has_gpu_reset(gt)) { - if (i915_modparams.reset) + if (gt->i915->params.reset) drm_err(>->i915->drm, "GPU reset not supported\n"); else drm_dbg(>->i915->drm, "GPU reset disabled\n"); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index f3034c613bc0..73243ba59c7d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -365,12 +365,12 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - saved_hangcheck = i915_modparams.enable_hangcheck; - i915_modparams.enable_hangcheck = INT_MAX; + saved_hangcheck = i915->params.enable_hangcheck; + i915->params.enable_hangcheck = INT_MAX; err = intel_gt_live_subtests(tests, &i915->gt); - i915_modparams.enable_hangcheck = saved_hangcheck; + i915->params.enable_hangcheck = saved_hangcheck; return err; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index fb10f3597ea5..9bbe8a795cb8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -424,25 +424,28 @@ static void guc_log_capture_logs(struct intel_guc_log *log) static u32 __get_default_log_level(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + /* A negative value means "use platform/config default" */ - if (i915_modparams.guc_log_level < 0) { + if (i915->params.guc_log_level < 0) { return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_NON_VERBOSE; } - if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { + if (i915->params.guc_log_level > GUC_LOG_LEVEL_MAX) { DRM_WARN("Incompatible option detected: %s=%d, %s!\n", - "guc_log_level", i915_modparams.guc_log_level, + "guc_log_level", i915->params.guc_log_level, "verbosity too high"); return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED; } - GEM_BUG_ON(i915_modparams.guc_log_level < GUC_LOG_LEVEL_DISABLED); - GEM_BUG_ON(i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX); - return i915_modparams.guc_log_level; + GEM_BUG_ON(i915->params.guc_log_level < GUC_LOG_LEVEL_DISABLED); + GEM_BUG_ON(i915->params.guc_log_level > GUC_LOG_LEVEL_MAX); + return i915->params.guc_log_level; } int intel_guc_log_create(struct intel_guc_log *log) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 94eb63f309ce..fdfeb4b9b0f5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -660,10 +660,12 @@ void intel_guc_submission_disable(struct intel_guc *guc) static bool __guc_submission_selected(struct intel_guc *guc) { + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + if (!intel_guc_submission_is_supported(guc)) return false; - return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; + return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } void intel_guc_submission_init_early(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index f518fe05c6f9..1c2d6358826c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -47,15 +47,15 @@ static void __confirm_options(struct intel_uc *uc) drm_dbg(&i915->drm, "enable_guc=%d (guc:%s submission:%s huc:%s)\n", - i915_modparams.enable_guc, + i915->params.enable_guc, yesno(intel_uc_wants_guc(uc)), yesno(intel_uc_wants_guc_submission(uc)), yesno(intel_uc_wants_huc(uc))); - if (i915_modparams.enable_guc == -1) + if (i915->params.enable_guc == -1) return; - if (i915_modparams.enable_guc == 0) { + if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); GEM_BUG_ON(intel_uc_wants_huc(uc)); @@ -65,25 +65,25 @@ static void __confirm_options(struct intel_uc *uc) if (!intel_uc_supports_guc(uc)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "GuC is not supported!"); + i915->params.enable_guc, "GuC is not supported!"); - if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC && + if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC && !intel_uc_supports_huc(uc)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "HuC is not supported!"); + i915->params.enable_guc, "HuC is not supported!"); - if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION && + if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && !intel_uc_supports_guc_submission(uc)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "GuC submission is N/A"); + i915->params.enable_guc, "GuC submission is N/A"); - if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION | + if (i915->params.enable_guc & ~(ENABLE_GUC_SUBMISSION | ENABLE_GUC_LOAD_HUC)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "undocumented flag"); + i915->params.enable_guc, "undocumented flag"); } void intel_uc_init_early(struct intel_uc *uc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index e75be3999358..59b27aba15c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -115,11 +115,13 @@ struct __packed uc_fw_platform_requirement { }, static void -__uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev) +__uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { static const struct uc_fw_platform_requirement fw_blobs[] = { INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB) }; + enum intel_platform p = INTEL_INFO(i915)->platform; + u8 rev = INTEL_REVID(i915); int i; for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) { @@ -154,35 +156,35 @@ __uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev) } /* We don't want to enable GuC/HuC on pre-Gen11 by default */ - if (i915_modparams.enable_guc == -1 && p < INTEL_ICELAKE) + if (i915->params.enable_guc == -1 && p < INTEL_ICELAKE) uc_fw->path = NULL; } -static const char *__override_guc_firmware_path(void) +static const char *__override_guc_firmware_path(struct drm_i915_private *i915) { - if (i915_modparams.enable_guc & (ENABLE_GUC_SUBMISSION | - ENABLE_GUC_LOAD_HUC)) - return i915_modparams.guc_firmware_path; + if (i915->params.enable_guc & (ENABLE_GUC_SUBMISSION | + ENABLE_GUC_LOAD_HUC)) + return i915->params.guc_firmware_path; return ""; } -static const char *__override_huc_firmware_path(void) +static const char *__override_huc_firmware_path(struct drm_i915_private *i915) { - if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC) - return i915_modparams.huc_firmware_path; + if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC) + return i915->params.huc_firmware_path; return ""; } -static void __uc_fw_user_override(struct intel_uc_fw *uc_fw) +static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { const char *path = NULL; switch (uc_fw->type) { case INTEL_UC_FW_TYPE_GUC: - path = __override_guc_firmware_path(); + path = __override_guc_firmware_path(i915); break; case INTEL_UC_FW_TYPE_HUC: - path = __override_huc_firmware_path(); + path = __override_huc_firmware_path(i915); break; } @@ -216,10 +218,8 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, uc_fw->type = type; if (HAS_GT_UC(i915)) { - __uc_fw_auto_select(uc_fw, - INTEL_INFO(i915)->platform, - INTEL_REVID(i915)); - __uc_fw_user_override(uc_fw); + __uc_fw_auto_select(i915, uc_fw); + __uc_fw_user_override(i915, uc_fw); } intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4481feb34bc5..242f59910c19 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -64,7 +64,7 @@ static int i915_capabilities(struct seq_file *m, void *data) intel_driver_caps_print(&i915->caps, &p); kernel_param_lock(THIS_MODULE); - i915_params_dump(&i915_modparams, &p); + i915_params_dump(&i915->params, &p); kernel_param_unlock(THIS_MODULE); return 0; diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 62b2c5f0495d..4e2b077692cb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -138,9 +138,6 @@ static ssize_t i915_param_charp_write(struct file *file, char **s = m->private; char *new, *old; - /* FIXME: remove locking after params aren't the module params */ - kernel_param_lock(THIS_MODULE); - old = *s; new = strndup_user(ubuf, PAGE_SIZE); if (IS_ERR(new)) { @@ -152,8 +149,6 @@ static ssize_t i915_param_charp_write(struct file *file, kfree(old); out: - kernel_param_unlock(THIS_MODULE); - return len; } @@ -229,7 +224,7 @@ _i915_param_create_file(struct dentry *parent, const char *name, struct dentry *i915_debugfs_params(struct drm_i915_private *i915) { struct drm_minor *minor = i915->drm.primary; - struct i915_params *params = &i915_modparams; + struct i915_params *params = &i915->params; struct dentry *dir; dir = debugfs_create_dir("i915_params", minor->debugfs_root); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c08b165a9cb4..da991d1967a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -500,6 +500,8 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) cpu_latency_qos_remove_request(&dev_priv->sb_qos); mutex_destroy(&dev_priv->sb_lock); + + i915_params_free(&dev_priv->params); } /** @@ -920,6 +922,9 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent) i915->drm.pdev = pdev; pci_set_drvdata(pdev, i915); + /* Device parameters start as a copy of module parameters. */ + i915_params_copy(&i915->params, &i915_modparams); + /* Setup the write-once "constant" device info */ device_info = mkwrite_device_info(i915); memcpy(device_info, match_info, sizeof(*device_info)); @@ -964,7 +969,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return PTR_ERR(i915); /* Disable nuclear pageflip by default on pre-ILK */ - if (!i915_modparams.nuclear_pageflip && match_info->gen < 5) + if (!i915->params.nuclear_pageflip && match_info->gen < 5) i915->drm.driver_features &= ~DRIVER_ATOMIC; /* @@ -974,7 +979,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { if (INTEL_GEN(i915) >= 9 && i915_selftest.live < 0 && - i915_modparams.fake_lmem_start) { + i915->params.fake_lmem_start) { mkwrite_device_info(i915)->memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN; mkwrite_device_info(i915)->is_dgfx = true; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7a9ea43bab66..2697960f15a9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -827,6 +827,9 @@ struct i915_selftest_stash { struct drm_i915_private { struct drm_device drm; + /* i915 device parameters */ + struct i915_params params; + const struct intel_device_info __info; /* Use INTEL_INFO() to access. */ struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */ struct intel_driver_caps caps; @@ -1688,7 +1691,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* Only valid when HAS_DISPLAY() is true */ #define INTEL_DISPLAY_ENABLED(dev_priv) \ - (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display) + (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !(dev_priv)->params.disable_display) static inline bool intel_vtd_active(void) { diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index d042644b9cd2..40390b2352b1 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -80,7 +80,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_HAS_GPU_RESET: - value = i915_modparams.enable_hangcheck && + value = i915->params.enable_hangcheck && intel_has_gpu_reset(&i915->gt); if (value && intel_has_reset_engine(&i915->gt)) value = 2; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eec292d06f11..866166ada10e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1698,7 +1698,7 @@ static void capture_gen(struct i915_gpu_coredump *error) error->reset_count = i915_reset_count(&i915->gpu_error); error->suspend_count = i915->suspend_count; - i915_params_copy(&error->params, &i915_modparams); + i915_params_copy(&error->params, &i915->params); memcpy(&error->device_info, INTEL_INFO(i915), sizeof(error->device_info)); @@ -1713,7 +1713,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) { struct i915_gpu_coredump *error; - if (!i915_modparams.error_capture) + if (!i915->params.error_capture) return NULL; error = kzalloc(sizeof(*error), gfp); diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index dd8981340d6e..99fe8aef1c67 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -66,7 +66,7 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) */ void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv) { - if (!i915_modparams.enable_gvt) + if (!dev_priv->params.enable_gvt) return; if (intel_vgpu_active(dev_priv)) { @@ -82,7 +82,7 @@ void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv) return; bail: - i915_modparams.enable_gvt = 0; + dev_priv->params.enable_gvt = 0; } /** @@ -102,7 +102,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) if (i915_inject_probe_failure(dev_priv)) return -ENODEV; - if (!i915_modparams.enable_gvt) { + if (!dev_priv->params.enable_gvt) { drm_dbg(&dev_priv->drm, "GVT-g is disabled by kernel params\n"); return 0; @@ -123,7 +123,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return 0; bail: - i915_modparams.enable_gvt = 0; + dev_priv->params.enable_gvt = 0; return 0; } diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c index 14b59b899c9b..40d8f1a95df6 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -76,7 +76,7 @@ region_lmem_init(struct intel_memory_region *mem) { int ret; - if (i915_modparams.fake_lmem_start) { + if (mem->i915->params.fake_lmem_start) { ret = init_fake_lmem_bar(mem); GEM_BUG_ON(ret); } @@ -111,12 +111,12 @@ intel_setup_fake_lmem(struct drm_i915_private *i915) resource_size_t start; GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt)); - GEM_BUG_ON(!i915_modparams.fake_lmem_start); + GEM_BUG_ON(!i915->params.fake_lmem_start); /* Your mappable aperture belongs to me now! */ mappable_end = pci_resource_len(pdev, 2); io_start = pci_resource_start(pdev, 2), - start = i915_modparams.fake_lmem_start; + start = i915->params.fake_lmem_start; mem = intel_memory_region_create(i915, start, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 7d6b9ae7403c..592364aed2da 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1185,7 +1185,7 @@ __unclaimed_reg_debug(struct intel_uncore *uncore, read ? "read from" : "write to", i915_mmio_reg_offset(reg))) /* Only report the first N failures */ - i915_modparams.mmio_debug--; + uncore->i915->params.mmio_debug--; } static inline void @@ -1194,7 +1194,7 @@ unclaimed_reg_debug(struct intel_uncore *uncore, const bool read, const bool before) { - if (likely(!i915_modparams.mmio_debug)) + if (likely(!uncore->i915->params.mmio_debug)) return; /* interrupts are disabled and re-enabled around uncore->lock usage */ @@ -2093,12 +2093,12 @@ intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore) goto out; if (unlikely(check_for_unclaimed_mmio(uncore))) { - if (!i915_modparams.mmio_debug) { + if (!uncore->i915->params.mmio_debug) { drm_dbg(&uncore->i915->drm, "Unclaimed register detected, " "enabling oneshot unclaimed register reporting. " "Please use i915.mmio_debug=N for more information.\n"); - i915_modparams.mmio_debug++; + uncore->i915->params.mmio_debug++; } uncore->debug->unclaimed_mmio_check--; ret = true; From f153478de4b259afb0a383de83e817b4c237401b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 17 Jun 2020 00:11:44 +0300 Subject: [PATCH 189/222] drm/i915/tgl+: Use the correct DP_TP_* register instances in MST encoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MST encoders must use the master MST transcoder's DP_TP_STATUS and DP_TP_CONTROL registers. Atm, during the HW readout of an MST encoder connected to a slave transcoder we reset these register addresses in intel_dp::regs.dp_tp_* to the slave transcoder's DP_TP_* register addresses incorrectly; fix this. One example where the above overwite happens is the encoder HW state validation after enabling multiple streams; see intel_dp_mst_enc_get_config(). After that during disabling any stream we'll get a 'Timed out waiting for ACT sent when disabling' error, due to reading from the incorrect DP_TP_STATUS register. This change replaces https://patchwork.freedesktop.org/patch/369577/?series=78193&rev=1 which just papered over the problem. v2: - Correct the failure scenario in the commit log. (José) Cc: Ville Syrjälä Cc: José Roberto de Souza Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200616211146.23027-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index ca7bb2294d2b..73d6cc29291a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4193,11 +4193,6 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (drm_WARN_ON(&dev_priv->drm, transcoder_is_dsi(cpu_transcoder))) return; - if (INTEL_GEN(dev_priv) >= 12) { - intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(cpu_transcoder); - intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(cpu_transcoder); - } - intel_dsc_get_config(encoder, pipe_config); temp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder)); @@ -4299,6 +4294,16 @@ void intel_ddi_get_config(struct intel_encoder *encoder, break; } + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder transcoder = + intel_dp_mst_is_slave_trans(pipe_config) ? + pipe_config->mst_master_transcoder : + pipe_config->cpu_transcoder; + + intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(transcoder); + intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(transcoder); + } + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); From 80a8cecf62a5917e27c85d13430f7346df57883f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 17 Jun 2020 00:11:45 +0300 Subject: [PATCH 190/222] drm/i915/dp_mst: Disable link training fallback on MST links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the initial probing of an MST sink, MST core will determine the sink's link bandwidth based on its own version of the sink link rate/lane count caps it reads from the DPCD. At a later point (after probing and 1 or more modesets) i915 may limit the link parameters wrt. the original source/sink common caps above due to link training failures during a modeset and the resulting link training fallback logic. Based on the above a modeset following another modeset with a link training error will compute the i915 HW specific and DP protocol timing parameters (data/link M/N and MST TU values) taking into account only the unlimited source/sink common caps, but not taking into account the fallback limits. This will also let DRM core oversubscribe the actual link bandwidth during the MST payload allocation. Prevent the above problem by disabling the link training fallback on MST links for now, until the MST probe time initialization and the MST compute config logic can deal with changing link parameters. The misconfigured timings lead at least to a 'Timed out waiting for DP idle patterns' error. v2: (Ville) - Print link training error message on the MST path too. - Clarify the problem in the commit log. Cc: Ville Syrjälä Cc: Manasi Navare Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200616211146.23027-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 27 ++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index bbba97c956a2..7765a8b95b9d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -468,6 +468,15 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, struct drm_i915_private *i915 = dp_to_i915(intel_dp); int index; + /* + * TODO: Enable fallback on MST links once MST link compute can handle + * the fallback params. + */ + if (intel_dp->is_mst) { + drm_err(&i915->drm, "Link Training Unsuccessful\n"); + return -1; + } + index = intel_dp_rate_index(intel_dp->common_rates, intel_dp->num_common_rates, link_rate); @@ -6165,7 +6174,17 @@ intel_dp_detect(struct drm_connector *connector, goto out; } - if (intel_dp->reset_link_params) { + /* Read DP Sink DSC Cap DPCD regs for DP v1.4 */ + if (INTEL_GEN(dev_priv) >= 11) + intel_dp_get_dsc_sink_cap(intel_dp); + + intel_dp_configure_mst(intel_dp); + + /* + * TODO: Reset link params when switching to MST mode, until MST + * supports link training fallback params. + */ + if (intel_dp->reset_link_params || intel_dp->is_mst) { /* Initial max link lane count */ intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp); @@ -6177,12 +6196,6 @@ intel_dp_detect(struct drm_connector *connector, intel_dp_print_rates(intel_dp); - /* Read DP Sink DSC Cap DPCD regs for DP v1.4 */ - if (INTEL_GEN(dev_priv) >= 11) - intel_dp_get_dsc_sink_cap(intel_dp); - - intel_dp_configure_mst(intel_dp); - if (intel_dp->is_mst) { /* * If we are in MST mode then this connector From e60b8672526cc0e1a033380998eff6978b8d3e09 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 16 Jun 2020 17:18:52 +0300 Subject: [PATCH 191/222] drm/i915/dp_mst: Move clearing the ACT sent flag closer to its polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During transcoder enabling we'll configure the transcoder in MST mode and enable the VC payload allocation, which will start the ACT sequence. Before waiting for the ACT sequence completion, we need to clear the ACT sent flag, but based on the above we can do this right before enabling the transcoder. For clarity, move the flag clearing closer to where we wait for it. While at it also factor out some common code. Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200616141855.746-3-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 36 +++++++++++++-------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index c5cda3e24fd5..33eb15478f40 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -318,6 +318,25 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, return ret; } +static void clear_act_sent(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + intel_de_write(i915, intel_dp->regs.dp_tp_status, + intel_de_read(i915, intel_dp->regs.dp_tp_status)); +} + +static void wait_for_act_sent(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + if (intel_de_wait_for_set(i915, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_ACT_SENT, 1)) + drm_err(&i915->drm, "Timed out waiting for ACT sent\n"); + + drm_dp_check_act_status(&intel_dp->mst_mgr); +} + static void intel_mst_disable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -378,11 +397,7 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder), val); - if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, - DP_TP_STATUS_ACT_SENT, 1)) - drm_err(&dev_priv->drm, - "Timed out waiting for ACT sent when disabling\n"); - drm_dp_check_act_status(&intel_dp->mst_mgr); + wait_for_act_sent(intel_dp); drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); @@ -453,7 +468,6 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; - u32 temp; bool first_mst_stream; /* MST encoders are bound to a crtc, not to a connector, @@ -486,8 +500,6 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, drm_err(&dev_priv->drm, "failed to allocate vcpi\n"); intel_dp->active_mst_links++; - temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_status); - intel_de_write(dev_priv, intel_dp->regs.dp_tp_status, temp); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); @@ -518,16 +530,14 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); + clear_act_sent(intel_dp); + intel_ddi_enable_transcoder_func(encoder, pipe_config); drm_dbg_kms(&dev_priv->drm, "active links %d\n", intel_dp->active_mst_links); - if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, - DP_TP_STATUS_ACT_SENT, 1)) - drm_err(&dev_priv->drm, "Timed out waiting for ACT sent\n"); - - drm_dp_check_act_status(&intel_dp->mst_mgr); + wait_for_act_sent(intel_dp); drm_dp_update_payload_part2(&intel_dp->mst_mgr); From 3d289d2578eff2a274fc5e63fca2a611404a3003 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 16 Jun 2020 17:18:53 +0300 Subject: [PATCH 192/222] drm/i915/dp_mst: Clear only the ACT sent flag from DP_TP_STATUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not clear if the DP_TP_STATUS flags other than the ACT sent flag have some side-effect, so don't clear those; we don't depend on the state of these flags anyway. Suggested-by: Ville Syrjälä Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200616141855.746-4-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 33eb15478f40..2ee538d99ba7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -323,7 +323,7 @@ static void clear_act_sent(struct intel_dp *intel_dp) struct drm_i915_private *i915 = dp_to_i915(intel_dp); intel_de_write(i915, intel_dp->regs.dp_tp_status, - intel_de_read(i915, intel_dp->regs.dp_tp_status)); + DP_TP_STATUS_ACT_SENT); } static void wait_for_act_sent(struct intel_dp *intel_dp) From 90d4f99ac56b4aadf4e87871a4e9128474da259b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 16 Jun 2020 17:18:54 +0300 Subject: [PATCH 193/222] drm/i915/dp_mst: Clear the ACT sent flag during encoder disabling too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During encoder enabling we clear the flag before starting the ACT sequence and wait for the flag, but the clearing is missing during encoder disabling, add it there too. Since nothing cleared the flag automatically we could've run subsequent disabling steps too early. Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200616141855.746-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2ee538d99ba7..e1bc14139f46 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -390,6 +390,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, drm_dp_update_payload_part2(&intel_dp->mst_mgr); + clear_act_sent(intel_dp); + val = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder)); val &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC; From 580fbdc5136822208f107500682e50a1cb232e94 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 23 Jun 2020 11:24:11 +0300 Subject: [PATCH 194/222] drm/i915/dp_mst: Enable VC payload allocation after transcoder is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec requires enabling the MST Virtual Channel payload allocation - in a separate step - after the transcoder is enabled, follow this. Cc: Ville Syrjälä Cc: José Roberto de Souza Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200623082411.3889-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 8 +++----- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 73d6cc29291a..884b507c5f55 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1653,7 +1653,6 @@ void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - u32 ctl; if (INTEL_GEN(dev_priv) >= 11) { enum transcoder master_transcoder = crtc_state->master_transcoder; @@ -1671,10 +1670,9 @@ void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, TRANS_DDI_FUNC_CTL2(cpu_transcoder), ctl2); } - ctl = intel_ddi_transcoder_func_reg_val_get(encoder, crtc_state); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) - ctl |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), ctl); + intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), + intel_ddi_transcoder_func_reg_val_get(encoder, + crtc_state)); } /* diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index e1bc14139f46..f7df7a5b7c13 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -529,6 +529,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 val; drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); @@ -536,6 +537,13 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); + val = intel_de_read(dev_priv, + TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder)); + val |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; + intel_de_write(dev_priv, + TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder), + val); + drm_dbg_kms(&dev_priv->drm, "active links %d\n", intel_dp->active_mst_links); From 3625a1f5bf6ba47fbd903be1d63dade9a48e8e72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 May 2020 20:41:44 +0300 Subject: [PATCH 195/222] drm/i915: Fix DP_TRAIN_MAX_{PRE_EMPHASIS,SWING}_REACHED handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP spec says: "The transmitter shall support at least three levels of voltage swing (Levels 0, 1, and 2). If only three levels of voltage swing are supported (VOLTAGE SWING SET field (bits 1:0) are programmed to 10 (Level 2)), this bit shall be set to 1, and cleared in all other cases. If all four levels of voltage swing are supported (VOLTAGE SWING SET field (bits 1:0) are programmed to 11 (Level 3)), this bit shall be set to 1,and cleared in all other cases." Let's follow that exactly instead of the current apporach where we can set those also for vswing/preemph levels 0 or 1 (or 2 when the platform max is 3). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200512174145.3186-7-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index b9e4ee2dbddc..2493142a70e9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -67,8 +67,9 @@ void intel_dp_get_adjust_train(struct intel_dp *intel_dp, if (p >= preemph_max) p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; - voltage_max = min(intel_dp->voltage_max(intel_dp), - dp_voltage_max(p)); + v = min(v, dp_voltage_max(p)); + + voltage_max = intel_dp->voltage_max(intel_dp); if (v >= voltage_max) v = voltage_max | DP_TRAIN_MAX_SWING_REACHED; From 1e8110a6c347afdd8f2629815d691f0505b5023e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 17 Jun 2020 11:00:06 -0700 Subject: [PATCH 196/222] drm/i915: Extend Wa_14010685332 to all ICP+ PCH's This workaround now also applies to TGL and RKL, so extend the PCH test to just capture everthing ICP and beyond. Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200617180006.4130501-1-matthew.d.roper@intel.com Reviewed-by: Matt Atwood --- drivers/gpu/drm/i915/i915_irq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 710224d930c5..562b43ed077f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2907,10 +2907,8 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); - /* Wa_14010685332:icl,jsl,ehl */ - if (INTEL_PCH_TYPE(dev_priv) == PCH_ICP || - INTEL_PCH_TYPE(dev_priv) == PCH_JSP || - INTEL_PCH_TYPE(dev_priv) == PCH_MCC) { + /* Wa_14010685332:icl,jsl,ehl,tgl,rkl */ + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) { intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); intel_uncore_rmw(uncore, SOUTH_CHICKEN1, From af9e1032ff6ead528f5e7284a4b8e207550e63bc Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 24 Jun 2020 14:57:23 -0700 Subject: [PATCH 197/222] drm/i915/gen12: implement Wa_14011508470 Update code to reflect recent bspec changes Bspec: 52890 Bspec: 53508 Signed-off-by: Matt Atwood Reviewed-by: Radhakrishna Sripada Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20200624215723.2316-1-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 8 ++++++++ drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 834162bc5a3f..8a277dfbc070 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -5298,6 +5298,7 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; + u32 val; gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); @@ -5331,6 +5332,13 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, if (resume && dev_priv->csr.dmc_payload) intel_csr_load_program(dev_priv); + + /* Wa_14011508470 */ + if (IS_GEN(dev_priv, 12)) { + val = DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | + DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR; + intel_uncore_rmw(&dev_priv->uncore, GEN11_CHICKEN_DCPR_2, 0, val); + } } static void icl_display_core_uninit(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f09120cac89a..284af0c6439c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7880,6 +7880,12 @@ enum { #define MASK_WAKEMEM (1 << 13) #define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) +#define GEN11_CHICKEN_DCPR_2 _MMIO(0x46434) +#define DCPR_MASK_MAXLATENCY_MEMUP_CLR REG_BIT(27) +#define DCPR_MASK_LPMODE REG_BIT(26) +#define DCPR_SEND_RESP_IMM REG_BIT(25) +#define DCPR_CLEAR_MEMSTAT_DIS REG_BIT(24) + #define SKL_DFSM _MMIO(0x51000) #define SKL_DFSM_DISPLAY_PM_DISABLE (1 << 27) #define SKL_DFSM_DISPLAY_HDCP_DISABLE (1 << 25) From 4003dac180d059ebd6f39b63dd33df408c1e4914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 25 Jun 2020 23:00:03 +0300 Subject: [PATCH 198/222] drm/i915: Clamp linetime wm to <64usec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linetime watermark is a 9 bit value, which gives us a maximum linetime of just below 64 usec. If the linetime exceeds that value we currently just discard the high bits and program the rest into the register, which angers the state checker. To avoid that let's just clamp the value to the max. I believe it should be perfectly fine to program a smaller linetime wm than strictly required, just means the hardware may fetch data sooner than strictly needed. We are further reassured by the fact that with DRRS the spec tells us to program the smaller of the two linetimes corresponding to the two refresh rates. Cc: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200625200003.12436-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a11bb675f9b3..d486d675166f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12581,12 +12581,15 @@ static u16 hsw_linetime_wm(const struct intel_crtc_state *crtc_state) { const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + int linetime_wm; if (!crtc_state->hw.enable) return 0; - return DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - adjusted_mode->crtc_clock); + linetime_wm = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, + adjusted_mode->crtc_clock); + + return min(linetime_wm, 0x1ff); } static u16 hsw_ips_linetime_wm(const struct intel_crtc_state *crtc_state, @@ -12594,12 +12597,15 @@ static u16 hsw_ips_linetime_wm(const struct intel_crtc_state *crtc_state, { const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + int linetime_wm; if (!crtc_state->hw.enable) return 0; - return DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - cdclk_state->logical.cdclk); + linetime_wm = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, + cdclk_state->logical.cdclk); + + return min(linetime_wm, 0x1ff); } static u16 skl_linetime_wm(const struct intel_crtc_state *crtc_state) @@ -12608,7 +12614,7 @@ static u16 skl_linetime_wm(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - u16 linetime_wm; + int linetime_wm; if (!crtc_state->hw.enable) return 0; @@ -12620,7 +12626,7 @@ static u16 skl_linetime_wm(const struct intel_crtc_state *crtc_state) if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled) linetime_wm /= 2; - return linetime_wm; + return min(linetime_wm, 0x1ff); } static int hsw_compute_linetime_wm(struct intel_atomic_state *state, From b08239b2f47190981309c29149e31f111f699aab Mon Sep 17 00:00:00 2001 From: Oliver Barta Date: Mon, 4 May 2020 14:35:24 +0200 Subject: [PATCH 199/222] drm/i915: HDCP: retry link integrity check on failure A single Ri mismatch doesn't automatically mean that the link integrity is broken. Update and check of Ri and Ri' are done asynchronously. In case an update happens just between the read of Ri' and the check against Ri there will be a mismatch even if the link integrity is fine otherwise. Signed-off-by: Oliver Barta Reviewed-by: Sean Paul Reviewed-by: Ramalingam C Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200504123524.7731-1-oliver.barta@aptiv.com --- drivers/gpu/drm/i915/display/intel_hdmi.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index a31a98d26882..864a1642e81c 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1540,7 +1540,7 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port, } static -bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) +bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *intel_dig_port) { struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); struct intel_connector *connector = @@ -1563,8 +1563,7 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) if (wait_for((intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) == (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { - drm_err(&i915->drm, - "Ri' mismatch detected, link check failed (%x)\n", + drm_dbg_kms(&i915->drm, "Ri' mismatch detected (%x)\n", intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port))); return false; @@ -1572,6 +1571,20 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) return true; } +static +bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) +{ + struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); + int retry; + + for (retry = 0; retry < 3; retry++) + if (intel_hdmi_hdcp_check_link_once(intel_dig_port)) + return true; + + drm_err(&i915->drm, "Link check failed\n"); + return false; +} + struct hdcp2_hdmi_msg_timeout { u8 msg_id; u16 timeout; From 0e2feab51b38fbcd40427254d7e599832e73b797 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Jun 2020 09:21:29 +0100 Subject: [PATCH 200/222] drm/i915: fix a couple of spelling mistakes in kernel parameter help text There are a couple of spelling mistakes in kernel parameter help text, namely "helpfull" and "paramters". Fix them. Signed-off-by: Colin Ian King Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200616082129.65517-1-colin.king@canonical.com --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index a7b61e6ec508..8d8db9ff0a48 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -99,8 +99,8 @@ i915_param_named_unsafe(enable_psr, int, 0400, i915_param_named(psr_safest_params, bool, 0400, "Replace PSR VBT parameters by the safest and not optimal ones. This " - "is helpfull to detect if PSR issues are related to bad values set in " - " VBT. (0=use VBT paramters, 1=use safest parameters)"); + "is helpful to detect if PSR issues are related to bad values set in " + " VBT. (0=use VBT parameters, 1=use safest parameters)"); i915_param_named_unsafe(force_probe, charp, 0400, "Force probe the driver for specified devices. " From 607856a835539c465015882328f1e37e95de8ee7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Jun 2020 12:42:21 +0100 Subject: [PATCH 201/222] drm/i915/display: fix missing null check on allocated dsb object Currently there is no null check for a failed memory allocation on the dsb object and without this a null pointer dereference error can occur. Fix this by adding a null check. Note: added a drm_err message in keeping with the error message style in the function. Addresses-Coverity: ("Dereference null return") Fixes: afeda4f3b1c8 ("drm/i915/dsb: Pre allocate and late cleanup of cmd buffer") Signed-off-by: Colin Ian King Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200616114221.73971-1-colin.king@canonical.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 24e6d63e2d47..566fa72427b3 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -271,6 +271,10 @@ void intel_dsb_prepare(struct intel_crtc_state *crtc_state) return; dsb = kmalloc(sizeof(*dsb), GFP_KERNEL); + if (!dsb) { + drm_err(&i915->drm, "DSB object creation failed\n"); + return; + } wakeref = intel_runtime_pm_get(&i915->runtime_pm); From c1b9fd3d310177b31621d5e661f06885869cae12 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 30 Jun 2020 16:27:24 +0100 Subject: [PATCH 202/222] drm/i915: Skip stale object handle for debugfs per-file-stats As we close a handle GEM object, we update the drm_file's idr with an error^W NULL pointer to indicate the in-progress closure, and finally removing it. If we read the idr directly, we may then see an invalid object pointer, and in our debugfs per_file_stats() we therefore need to protect against the entry being invalid. [ 1016.651637] RIP: 0010:per_file_stats+0xe/0x16e [ 1016.651646] Code: d2 41 0f b6 8e 69 8c 00 00 48 89 df 48 c7 c6 7b 74 8c be 31 c0 e8 0c 89 cf ff eb d2 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 53 <8b> 06 85 c0 0f 84 4d 01 00 00 49 89 d6 48 89 f3 3d ff ff ff 7f 73 [ 1016.651651] RSP: 0018:ffffad3a01337ba0 EFLAGS: 00010293 [ 1016.651656] RAX: 0000000000000018 RBX: ffff96fe040d65e0 RCX: 0000000000000002 [ 1016.651660] RDX: ffffad3a01337c50 RSI: 0000000000000000 RDI: 00000000000001e8 [ 1016.651663] RBP: ffffad3a01337bb8 R08: 0000000000000000 R09: 00000000000001c0 [ 1016.651667] R10: 0000000000000000 R11: ffffffffbdbe5fce R12: 0000000000000000 [ 1016.651671] R13: ffffffffbdbe5fce R14: ffffad3a01337c50 R15: 0000000000000001 [ 1016.651676] FS: 00007a597e2d7480(0000) GS:ffff96ff3bb00000(0000) knlGS:0000000000000000 [ 1016.651680] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1016.651683] CR2: 0000000000000000 CR3: 0000000171fc2001 CR4: 00000000003606e0 [ 1016.651687] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1016.651690] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1016.651693] Call Trace: [ 1016.651693] Call Trace: [ 1016.651703] idr_for_each+0x8a/0xe8 [ 1016.651711] i915_gem_object_info+0x2a3/0x3eb [ 1016.651720] seq_read+0x162/0x3ca [ 1016.651727] full_proxy_read+0x5b/0x8d [ 1016.651733] __vfs_read+0x45/0x1bb [ 1016.651741] vfs_read+0xc9/0x15e [ 1016.651746] ksys_read+0x7e/0xde [ 1016.651752] do_syscall_64+0x54/0x68 [ 1016.651758] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: Guenter Roeck Fixes: a8c15954d64a ("drm/i915: Protect debugfs per_file_stats with RCU lock") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Guenter Roeck Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200630152724.3734-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8594a8ef08ce..9ca94a435b75 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -230,7 +230,7 @@ static int per_file_stats(int id, void *ptr, void *data) struct file_stats *stats = data; struct i915_vma *vma; - if (!kref_get_unless_zero(&obj->base.refcount)) + if (IS_ERR_OR_NULL(obj) || !kref_get_unless_zero(&obj->base.refcount)) return 0; stats->count++; From 5331889b5ffb11d6257953e418291a9f04c02bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:25 +0300 Subject: [PATCH 203/222] drm/i915/fbc: Fix fence_y_offset handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current fence_y_offset calculation is broken. I think it more or less used to do the right thing, but then I changed the plane code to put the final x/y source offsets back into the src rectangle so now it's just subtraacting the same value from itself. The code would never have worked if we allowed the framebuffer to have a non-zero offset. Let's do this in a better way by just calculating the fence_y_offset from the final plane surface offset. Note that we don't align the plane surface address to fence rows so with horizontal panning there's often a horizontal offset from the fence start to the surface address as well. We have no way to tell the hardware about that so we just ignore it. Based on some quick tests the invlidation still happens correctly. I presume due to the invalidation nuking at least the full line (or a segment of multiple lines). Fixes: 54d4d719fa11 ("drm/i915: Overcome display engine stride limits via GTT remapping") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-4-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++ drivers/gpu/drm/i915/display/intel_display.h | 1 + drivers/gpu/drm/i915/display/intel_fbc.c | 32 ++++++-------------- drivers/gpu/drm/i915/i915_drv.h | 6 ++-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d486d675166f..4fdc9ccfea2a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3822,6 +3822,17 @@ skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, return true; } +unsigned int +intel_plane_fence_y_offset(const struct intel_plane_state *plane_state) +{ + int x = 0, y = 0; + + intel_plane_adjust_aligned_offset(&x, &y, plane_state, 0, + plane_state->color_plane[0].offset, 0); + + return y; +} + static int skl_check_main_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index b7a6d56bac5f..f68007ff8a13 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -615,6 +615,7 @@ unsigned int i9xx_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, unsigned int rotation); int bdw_get_pipemisc_bpp(struct intel_crtc *crtc); +unsigned int intel_plane_fence_y_offset(const struct intel_plane_state *plane_state); struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 30649e17cfb7..2312e70e2e5e 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -47,19 +47,6 @@ #include "intel_fbc.h" #include "intel_frontbuffer.h" -/* - * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the - * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's - * origin so the x and y offsets can actually fit the registers. As a - * consequence, the fence doesn't really start exactly at the display plane - * address we program because it starts at the real start of the buffer, so we - * have to take this into consideration here. - */ -static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc) -{ - return fbc->state_cache.plane.y - fbc->state_cache.plane.adjusted_y; -} - /* * For SKL+, the plane source size used by the hardware is based on the value we * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value @@ -141,7 +128,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) fbc_ctl2 |= FBC_CTL_CPU_FENCE; intel_de_write(dev_priv, FBC_CONTROL2, fbc_ctl2); intel_de_write(dev_priv, FBC_FENCE_OFF, - params->crtc.fence_y_offset); + params->fence_y_offset); } /* enable it... */ @@ -175,7 +162,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id; intel_de_write(dev_priv, DPFC_FENCE_YOFF, - params->crtc.fence_y_offset); + params->fence_y_offset); } else { intel_de_write(dev_priv, DPFC_FENCE_YOFF, 0); } @@ -243,7 +230,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | params->fence_id); intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, - params->crtc.fence_y_offset); + params->fence_y_offset); } } else { if (IS_GEN(dev_priv, 6)) { @@ -253,7 +240,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) } intel_de_write(dev_priv, ILK_DPFC_FENCE_YOFF, - params->crtc.fence_y_offset); + params->fence_y_offset); /* enable it... */ intel_de_write(dev_priv, ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -320,7 +307,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | params->fence_id); intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, - params->crtc.fence_y_offset); + params->fence_y_offset); } else if (dev_priv->ggtt.num_fences) { intel_de_write(dev_priv, SNB_DPFC_CTL_SA, 0); intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, 0); @@ -631,8 +618,8 @@ static bool rotation_is_valid(struct drm_i915_private *dev_priv, /* * For some reason, the hardware tracking starts looking at whatever we * programmed as the display plane base address register. It does not look at - * the X and Y offset registers. That's why we look at the crtc->adjusted{x,y} - * variables instead of just looking at the pipe/plane size. + * the X and Y offset registers. That's why we include the src x/y offsets + * instead of just looking at the plane size. */ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) { @@ -705,7 +692,6 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->plane.src_h = drm_rect_height(&plane_state->uapi.src) >> 16; cache->plane.adjusted_x = plane_state->color_plane[0].x; cache->plane.adjusted_y = plane_state->color_plane[0].y; - cache->plane.y = plane_state->uapi.src.y1 >> 16; cache->plane.pixel_blend_mode = plane_state->hw.pixel_blend_mode; @@ -713,6 +699,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.stride = fb->pitches[0]; cache->fb.modifier = fb->modifier; + cache->fence_y_offset = intel_plane_fence_y_offset(plane_state); + drm_WARN_ON(&dev_priv->drm, plane_state->flags & PLANE_HAS_FENCE && !plane_state->vma->fence); @@ -883,10 +871,10 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, memset(params, 0, sizeof(*params)); params->fence_id = cache->fence_id; + params->fence_y_offset = cache->fence_y_offset; params->crtc.pipe = crtc->pipe; params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; - params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc); params->fb.format = cache->fb.format; params->fb.stride = cache->fb.stride; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9aad3ec979bd..087608d98f09 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -411,8 +411,6 @@ struct intel_fbc { int adjusted_x; int adjusted_y; - int y; - u16 pixel_blend_mode; } plane; @@ -421,6 +419,8 @@ struct intel_fbc { unsigned int stride; u64 modifier; } fb; + + unsigned int fence_y_offset; u16 gen9_wa_cfb_stride; s8 fence_id; } state_cache; @@ -436,7 +436,6 @@ struct intel_fbc { struct { enum pipe pipe; enum i9xx_plane_id i9xx_plane; - unsigned int fence_y_offset; } crtc; struct { @@ -445,6 +444,7 @@ struct intel_fbc { } fb; int cfb_size; + unsigned int fence_y_offset; u16 gen9_wa_cfb_stride; s8 fence_id; bool plane_visible; From d838962f165c4bdbed9047b0f4d9ecd783e04b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:28 +0300 Subject: [PATCH 204/222] drm/i915/fbc: Don't clear busy_bits for origin==GTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware host tracking won't nuke the entire cfb (unless the entire fb is written through the gtt) so don't clear the busy_bits for gtt tracking. Not that it really matters anymore since we've lost ORIGIN_GTT usage everywhere. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-7-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_fbc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 2312e70e2e5e..90088c77c059 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1078,11 +1078,19 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv, if (!HAS_FBC(dev_priv)) return; + /* + * GTT tracking does not nuke the entire cfb + * so don't clear busy_bits set for some other + * reason. + */ + if (origin == ORIGIN_GTT) + return; + mutex_lock(&fbc->lock); fbc->busy_bits &= ~frontbuffer_bits; - if (origin == ORIGIN_GTT || origin == ORIGIN_FLIP) + if (origin == ORIGIN_FLIP) goto out; if (!fbc->busy_bits && fbc->crtc && From a4c74b297448e64b1ae2b017e4b7efec37ef7592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:30 +0300 Subject: [PATCH 205/222] drm/i915/fbc: Parametrize FBC_CONTROL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parametrize the FBC_CONTROL bits for neater code. Also add the one missing bit: "stop compression on modification". Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-9-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_fbc.c | 8 ++++---- drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 90088c77c059..9cdc1221e760 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -133,13 +133,13 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) /* enable it... */ fbc_ctl = intel_de_read(dev_priv, FBC_CONTROL); - fbc_ctl &= 0x3fff << FBC_CTL_INTERVAL_SHIFT; + fbc_ctl &= FBC_CTL_INTERVAL(0x3fff); fbc_ctl |= FBC_CTL_EN | FBC_CTL_PERIODIC; if (IS_I945GM(dev_priv)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ - fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; + fbc_ctl |= FBC_CTL_STRIDE(cfb_pitch & 0xff); if (params->fence_id >= 0) - fbc_ctl |= params->fence_id; + fbc_ctl |= FBC_CTL_FENCENO(params->fence_id); intel_de_write(dev_priv, FBC_CONTROL, fbc_ctl); } @@ -1423,7 +1423,7 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) /* This value was pulled out of someone's hat */ if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv)) intel_de_write(dev_priv, FBC_CONTROL, - 500 << FBC_CTL_INTERVAL_SHIFT); + FBC_CTL_INTERVAL(500)); /* We still don't have any sort of hardware state readout for FBC, so * deactivate it in case the BIOS activated it to make sure software diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 284af0c6439c..2ecde5c2e357 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3208,13 +3208,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FBC_CFB_BASE _MMIO(0x3200) /* 4k page aligned */ #define FBC_LL_BASE _MMIO(0x3204) /* 4k page aligned */ #define FBC_CONTROL _MMIO(0x3208) -#define FBC_CTL_EN (1 << 31) -#define FBC_CTL_PERIODIC (1 << 30) -#define FBC_CTL_INTERVAL_SHIFT (16) -#define FBC_CTL_UNCOMPRESSIBLE (1 << 14) -#define FBC_CTL_C3_IDLE (1 << 13) -#define FBC_CTL_STRIDE_SHIFT (5) -#define FBC_CTL_FENCENO_SHIFT (0) +#define FBC_CTL_EN REG_BIT(31) +#define FBC_CTL_PERIODIC REG_BIT(30) +#define FBC_CTL_INTERVAL_MASK REG_GENMASK(29, 16) +#define FBC_CTL_INTERVAL(x) REG_FIELD_PREP(FBC_CTL_INTERVAL_MASK, (x)) +#define FBC_CTL_STOP_ON_MOD REG_BIT(15) +#define FBC_CTL_UNCOMPRESSIBLE REG_BIT(14) /* i915+ */ +#define FBC_CTL_C3_IDLE REG_BIT(13) /* i945gm */ +#define FBC_CTL_STRIDE_MASK REG_GENMASK(12, 5) +#define FBC_CTL_STRIDE(x) REG_FIELD_PREP(FBC_CTL_STRIDE_MASK, (x)) +#define FBC_CTL_FENCENO_MASK REG_GENMASK(3, 0) +#define FBC_CTL_FENCENO(x) REG_FIELD_PREP(FBC_CTL_FENCENO_MASK, (x)) #define FBC_COMMAND _MMIO(0x320c) #define FBC_CMD_COMPRESS (1 << 0) #define FBC_STATUS _MMIO(0x3210) From a68ce21ba0c44f5504e1e5bb0e5151b239540dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:31 +0300 Subject: [PATCH 206/222] drm/i915/fbc: Store the fbc1 compression interval in the params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the FBC_CONTROL rmw and just store the fbc compression interval in the params/ Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-10-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_fbc.c | 13 ++++++------- drivers/gpu/drm/i915/i915_drv.h | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 9cdc1221e760..09b424611548 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -132,8 +132,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) } /* enable it... */ - fbc_ctl = intel_de_read(dev_priv, FBC_CONTROL); - fbc_ctl &= FBC_CTL_INTERVAL(0x3fff); + fbc_ctl = FBC_CTL_INTERVAL(params->interval); fbc_ctl |= FBC_CTL_EN | FBC_CTL_PERIODIC; if (IS_I945GM(dev_priv)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ @@ -699,6 +698,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.stride = fb->pitches[0]; cache->fb.modifier = fb->modifier; + /* This value was pulled out of someone's hat */ + cache->interval = 500; + cache->fence_y_offset = intel_plane_fence_y_offset(plane_state); drm_WARN_ON(&dev_priv->drm, plane_state->flags & PLANE_HAS_FENCE && @@ -873,6 +875,8 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, params->fence_id = cache->fence_id; params->fence_y_offset = cache->fence_y_offset; + params->interval = cache->interval; + params->crtc.pipe = crtc->pipe; params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; @@ -1420,11 +1424,6 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) return; } - /* This value was pulled out of someone's hat */ - if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv)) - intel_de_write(dev_priv, FBC_CONTROL, - FBC_CTL_INTERVAL(500)); - /* We still don't have any sort of hardware state readout for FBC, so * deactivate it in case the BIOS activated it to make sure software * matches the hardware state. */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 087608d98f09..a574bdfbc599 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -422,6 +422,7 @@ struct intel_fbc { unsigned int fence_y_offset; u16 gen9_wa_cfb_stride; + u16 interval; s8 fence_id; } state_cache; @@ -446,6 +447,7 @@ struct intel_fbc { int cfb_size; unsigned int fence_y_offset; u16 gen9_wa_cfb_stride; + u16 interval; s8 fence_id; bool plane_visible; } params; From 42ae1f88feacc8643bc56dcfa55e0722967e424f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:32 +0300 Subject: [PATCH 207/222] drm/i915/fbc: Reduce fbc1 compression interval to 1 second MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default fbc1 compression interval we use is 500 frames. That translates to over 8 seconds typically. That's rather excessive so let's drop it to 1 second. The hardware will not attempt recompression unless at least one line has been modified, so a shorter compression interval should not cause extra bandwidth use in the purely idle scenario. Of course in the mostly idle case we are possibly going to recompress a bit more. Should really try to find some kind of sweet spot to minimize the energy usage... Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-11-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_fbc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 09b424611548..69a0682ddb6a 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -698,8 +698,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.stride = fb->pitches[0]; cache->fb.modifier = fb->modifier; - /* This value was pulled out of someone's hat */ - cache->interval = 500; + /* FBC1 compression interval: arbitrary choice of 1 second */ + cache->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode); cache->fence_y_offset = intel_plane_fence_y_offset(plane_state); From f6a7d3952cbed2c1e07d945dbb36f5c34200e9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:34 +0300 Subject: [PATCH 208/222] drm/i915: Suppress spurious underruns on gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Often we seem to detect an underrun right after modeset on gen2. It seems to be a spurious detection (potentially the pipe is still in a wonky state when we enable the planes). An extra vblank wait seems to cure it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-13-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4fdc9ccfea2a..182cef0dc2fd 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7526,6 +7526,10 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state, intel_crtc_vblank_on(new_crtc_state); intel_encoders_enable(state, crtc); + + /* prevents spurious underruns */ + if (IS_GEN(dev_priv, 2)) + intel_wait_for_vblank(dev_priv, pipe); } static void i9xx_pfit_disable(const struct intel_crtc_state *old_crtc_state) From 040e123c0b7305a490adb4168b459c342ae7e9b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Jun 2020 20:40:38 +0100 Subject: [PATCH 209/222] drm/i915/gem: Avoid kmalloc under i915->mm_lock Rearrange the allocation of the mm_struct registration to avoid allocating underneath the i915->mm_lock, so that we avoid tainting the lock (and in turn many other locks that may be held as i915->mm_lock is taken, and those locks we may want on the free [shrinker] paths). In doing so, we convert the lookup to be RCU protected by courtesy of converting the free-worker to be an rcu_work. v2: Remember to use hash_rcu variants to protect the list iteration from concurrent add/del. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200619194038.5088-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 131 ++++++++++---------- drivers/gpu/drm/i915/i915_drv.h | 2 +- 2 files changed, 65 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 9c53eb883400..e946032b13e4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -21,7 +21,7 @@ struct i915_mm_struct { struct i915_mmu_notifier *mn; struct hlist_node node; struct kref kref; - struct work_struct work; + struct rcu_work work; }; #if defined(CONFIG_MMU_NOTIFIER) @@ -189,40 +189,31 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) static struct i915_mmu_notifier * i915_mmu_notifier_find(struct i915_mm_struct *mm) { - struct i915_mmu_notifier *mn; - int err = 0; + struct i915_mmu_notifier *mn, *old; + int err; - mn = mm->mn; - if (mn) + mn = READ_ONCE(mm->mn); + if (likely(mn)) return mn; mn = i915_mmu_notifier_create(mm); if (IS_ERR(mn)) - err = PTR_ERR(mn); + return mn; - mmap_write_lock(mm->mm); - mutex_lock(&mm->i915->mm_lock); - if (mm->mn == NULL && !err) { - /* Protected by mmap_lock (write-lock) */ - err = __mmu_notifier_register(&mn->mn, mm->mm); - if (!err) { - /* Protected by mm_lock */ - mm->mn = fetch_and_zero(&mn); - } - } else if (mm->mn) { - /* - * Someone else raced and successfully installed the mmu - * notifier, we can cancel our own errors. - */ - err = 0; - } - mutex_unlock(&mm->i915->mm_lock); - mmap_write_unlock(mm->mm); - - if (mn && !IS_ERR(mn)) + err = mmu_notifier_register(&mn->mn, mm->mm); + if (err) { kfree(mn); + return ERR_PTR(err); + } - return err ? ERR_PTR(err) : mm->mn; + old = cmpxchg(&mm->mn, NULL, mn); + if (old) { + mmu_notifier_unregister(&mn->mn, mm->mm); + kfree(mn); + mn = old; + } + + return mn; } static int @@ -301,23 +292,28 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn, #endif static struct i915_mm_struct * -__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real) +__i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real) { - struct i915_mm_struct *mm; + struct i915_mm_struct *it, *mm = NULL; - /* Protected by dev_priv->mm_lock */ - hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real) - if (mm->mm == real) - return mm; + rcu_read_lock(); + hash_for_each_possible_rcu(i915->mm_structs, + it, node, + (unsigned long)real) + if (it->mm == real && kref_get_unless_zero(&it->kref)) { + mm = it; + break; + } + rcu_read_unlock(); - return NULL; + return mm; } static int i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_mm_struct *mm; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_mm_struct *mm, *new; int ret = 0; /* During release of the GEM object we hold the struct_mutex. This @@ -330,39 +326,42 @@ i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) * struct_mutex, i.e. we need to schedule a worker to do the clean * up. */ - mutex_lock(&dev_priv->mm_lock); - mm = __i915_mm_struct_find(dev_priv, current->mm); - if (mm == NULL) { - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (mm == NULL) { - ret = -ENOMEM; - goto out; - } + mm = __i915_mm_struct_find(i915, current->mm); + if (mm) + goto out; - kref_init(&mm->kref); - mm->i915 = to_i915(obj->base.dev); + new = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!new) + return -ENOMEM; - mm->mm = current->mm; + kref_init(&new->kref); + new->i915 = to_i915(obj->base.dev); + new->mm = current->mm; + new->mn = NULL; + + spin_lock(&i915->mm_lock); + mm = __i915_mm_struct_find(i915, current->mm); + if (!mm) { + hash_add_rcu(i915->mm_structs, + &new->node, + (unsigned long)new->mm); mmgrab(current->mm); + mm = new; + } + spin_unlock(&i915->mm_lock); + if (mm != new) + kfree(new); - mm->mn = NULL; - - /* Protected by dev_priv->mm_lock */ - hash_add(dev_priv->mm_structs, - &mm->node, (unsigned long)mm->mm); - } else - kref_get(&mm->kref); - - obj->userptr.mm = mm; out: - mutex_unlock(&dev_priv->mm_lock); + obj->userptr.mm = mm; return ret; } static void __i915_mm_struct_free__worker(struct work_struct *work) { - struct i915_mm_struct *mm = container_of(work, typeof(*mm), work); + struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work); + i915_mmu_notifier_free(mm->mn, mm->mm); mmdrop(mm->mm); kfree(mm); @@ -373,12 +372,12 @@ __i915_mm_struct_free(struct kref *kref) { struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); - /* Protected by dev_priv->mm_lock */ - hash_del(&mm->node); - mutex_unlock(&mm->i915->mm_lock); + spin_lock(&mm->i915->mm_lock); + hash_del_rcu(&mm->node); + spin_unlock(&mm->i915->mm_lock); - INIT_WORK(&mm->work, __i915_mm_struct_free__worker); - queue_work(mm->i915->mm.userptr_wq, &mm->work); + INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker); + queue_rcu_work(system_wq, &mm->work); } static void @@ -387,9 +386,7 @@ i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) if (obj->userptr.mm == NULL) return; - kref_put_mutex(&obj->userptr.mm->kref, - __i915_mm_struct_free, - &to_i915(obj->base.dev)->mm_lock); + kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free); obj->userptr.mm = NULL; } @@ -851,7 +848,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->mm_lock); + spin_lock_init(&dev_priv->mm_lock); hash_init(dev_priv->mm_structs); dev_priv->mm.userptr_wq = diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a574bdfbc599..6e9072ab30a1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -993,7 +993,7 @@ struct drm_i915_private { struct i915_gem_mm mm; DECLARE_HASHTABLE(mm_structs, 7); - struct mutex mm_lock; + spinlock_t mm_lock; /* Kernel Modesetting */ From 093a3a30000926b8bda9eef773e4ed5079053350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 25 Jun 2020 18:01:47 -0700 Subject: [PATCH 210/222] drm/i915: Add plane damage clips property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This property will be used by PSR2 software tracking, adding it to GEN12+. Reviewed-by: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200626010151.221388-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++++ drivers/gpu/drm/i915/display/intel_sprite.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 182cef0dc2fd..84e2a17b5ecb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -16496,6 +16497,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, zpos = RUNTIME_INFO(dev_priv)->num_sprites[pipe] + 1; drm_plane_create_zpos_immutable_property(&cursor->base, zpos); + if (INTEL_GEN(dev_priv) >= 12) + drm_plane_enable_fb_damage_clips(&cursor->base); + drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); return cursor; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 3cd461bf9131..d03860fef2d7 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -3156,6 +3157,9 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, drm_plane_create_zpos_immutable_property(&plane->base, plane_id); + if (INTEL_GEN(dev_priv) >= 12) + drm_plane_enable_fb_damage_clips(&plane->base); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; From 19167eb064da81ca7c837ecef61b23921606acd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 25 Jun 2020 18:01:48 -0700 Subject: [PATCH 211/222] drm/i915: Reorder intel_psr2_config_valid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Future patches will bring PSR2 selective fetch configuration validation but most of the configuration checks will be used for HW tracking and selective fetch so the reoder was necessary. Reviewed-by: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200626010151.221388-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 50 ++++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 86bf7a76f93d..611cb8d74811 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -681,21 +681,6 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * Some platforms lack PSR2 HW tracking and instead require manual - * tracking by software. In this case, the driver is required to track - * the areas that need updates and program hardware to send selective - * updates. - * - * So until the software tracking is implemented, PSR2 needs to be - * disabled for platforms without PSR2 HW tracking. - */ - if (!HAS_PSR_HW_TRACKING(dev_priv)) { - drm_dbg_kms(&dev_priv->drm, - "No PSR2 HW tracking in the platform\n"); - return false; - } - /* * DSC and PSR2 cannot be enabled simultaneously. If a requested * resolution requires DSC to be enabled, priority is given to DSC @@ -707,6 +692,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + if (crtc_state->crc_enabled) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled because it would inhibit pipe CRC calculation\n"); + return false; + } + if (INTEL_GEN(dev_priv) >= 12) { psr_max_h = 5120; psr_max_v = 3200; @@ -721,14 +712,6 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, max_bpp = 24; } - if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { - drm_dbg_kms(&dev_priv->drm, - "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", - crtc_hdisplay, crtc_vdisplay, - psr_max_h, psr_max_v); - return false; - } - if (crtc_state->pipe_bpp > max_bpp) { drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, pipe bpp %d > max supported %d\n", @@ -749,9 +732,26 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - if (crtc_state->crc_enabled) { + /* + * Some platforms lack PSR2 HW tracking and instead require manual + * tracking by software. In this case, the driver is required to track + * the areas that need updates and program hardware to send selective + * updates. + * + * So until the software tracking is implemented, PSR2 needs to be + * disabled for platforms without PSR2 HW tracking. + */ + if (!HAS_PSR_HW_TRACKING(dev_priv)) { drm_dbg_kms(&dev_priv->drm, - "PSR2 not enabled because it would inhibit pipe CRC calculation\n"); + "No PSR2 HW tracking in the platform\n"); + return false; + } + + if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", + crtc_hdisplay, crtc_vdisplay, + psr_max_h, psr_max_v); return false; } From a5523e2ff074a5a44b778f7c6483a882c2c88ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 25 Jun 2020 18:01:49 -0700 Subject: [PATCH 212/222] drm/i915: Add PSR2 selective fetch registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This registers will be used to implement PSR2 manual tracking/selective fetch. v2: - Fixed typo in _PLANE_SEL_FETCH_BASE - Renamed PSR2_MAN_TRK_CTL bits to better match spec names - Renamed _PLANE_SEL_FETCH_* to better match spec names BSpec: 55229 BSpec: 50424 BSpec: 50420 Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20200626010151.221388-3-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 68 ++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2ecde5c2e357..9d6536afc94b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4589,6 +4589,18 @@ enum { #define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) #define PSR2_SU_STATUS_FRAMES 8 +#define _PSR2_MAN_TRK_CTL_A 0x60910 +#define _PSR2_MAN_TRK_CTL_EDP 0x6f910 +#define PSR2_MAN_TRK_CTL(tran) _MMIO_TRANS2(tran, _PSR2_MAN_TRK_CTL_A) +#define PSR2_MAN_TRK_CTL_ENABLE REG_BIT(31) +#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(30, 21) +#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) +#define PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(20, 11) +#define PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) +#define PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(3) +#define PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(2) +#define PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(1) + /* VGA port control */ #define ADPA _MMIO(0x61100) #define PCH_ADPA _MMIO(0xe1100) @@ -7152,7 +7164,52 @@ enum { #define PLANE_COLOR_CTL(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_COLOR_CTL_1(pipe), _PLANE_COLOR_CTL_2(pipe)) -#/* SKL new cursor registers */ +#define _SEL_FETCH_PLANE_BASE_1_A 0x70890 +#define _SEL_FETCH_PLANE_BASE_2_A 0x708B0 +#define _SEL_FETCH_PLANE_BASE_3_A 0x708D0 +#define _SEL_FETCH_PLANE_BASE_4_A 0x708F0 +#define _SEL_FETCH_PLANE_BASE_5_A 0x70920 +#define _SEL_FETCH_PLANE_BASE_6_A 0x70940 +#define _SEL_FETCH_PLANE_BASE_7_A 0x70960 +#define _SEL_FETCH_PLANE_BASE_CUR_A 0x70880 +#define _SEL_FETCH_PLANE_BASE_1_B 0x70990 + +#define _SEL_FETCH_PLANE_BASE_A(plane) _PICK(plane, \ + _SEL_FETCH_PLANE_BASE_1_A, \ + _SEL_FETCH_PLANE_BASE_2_A, \ + _SEL_FETCH_PLANE_BASE_3_A, \ + _SEL_FETCH_PLANE_BASE_4_A, \ + _SEL_FETCH_PLANE_BASE_5_A, \ + _SEL_FETCH_PLANE_BASE_6_A, \ + _SEL_FETCH_PLANE_BASE_7_A, \ + _SEL_FETCH_PLANE_BASE_CUR_A) +#define _SEL_FETCH_PLANE_BASE_1(pipe) _PIPE(pipe, _SEL_FETCH_PLANE_BASE_1_A, _SEL_FETCH_PLANE_BASE_1_B) +#define _SEL_FETCH_PLANE_BASE(pipe, plane) (_SEL_FETCH_PLANE_BASE_1(pipe) - \ + _SEL_FETCH_PLANE_BASE_1_A + \ + _SEL_FETCH_PLANE_BASE_A(plane)) + +#define _SEL_FETCH_PLANE_CTL_1_A 0x70890 +#define PLANE_SEL_FETCH_CTL(pipe, plane) _MMIO(_SEL_FETCH_PLANE_BASE(pipe, plane) + \ + _SEL_FETCH_PLANE_CTL_1_A - \ + _SEL_FETCH_PLANE_BASE_1_A) +#define PLANE_SEL_FETCH_CTL_ENABLE REG_BIT(31) + +#define _SEL_FETCH_PLANE_POS_1_A 0x70894 +#define PLANE_SEL_FETCH_POS(pipe, plane) _MMIO(_SEL_FETCH_PLANE_BASE(pipe, plane) + \ + _SEL_FETCH_PLANE_POS_1_A - \ + _SEL_FETCH_PLANE_BASE_1_A) + +#define _SEL_FETCH_PLANE_SIZE_1_A 0x70898 +#define PLANE_SEL_FETCH_SIZE(pipe, plane) _MMIO(_SEL_FETCH_PLANE_BASE(pipe, plane) + \ + _SEL_FETCH_PLANE_SIZE_1_A - \ + _SEL_FETCH_PLANE_BASE_1_A) + +#define _SEL_FETCH_PLANE_OFFSET_1_A 0x7089C +#define PLANE_SEL_FETCH_OFFSET(pipe, plane) _MMIO(_SEL_FETCH_PLANE_BASE(pipe, plane) + \ + _SEL_FETCH_PLANE_OFFSET_1_A - \ + _SEL_FETCH_PLANE_BASE_1_A) + +/* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c #define CUR_BUF_CFG(pipe) _MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B) @@ -7798,11 +7855,12 @@ enum { # define CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE (1 << 5) # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) -#define CHICKEN_PAR1_1 _MMIO(0x42080) +#define CHICKEN_PAR1_1 _MMIO(0x42080) #define SKL_DE_COMPRESSED_HASH_MODE (1 << 15) -#define DPA_MASK_VBLANK_SRD (1 << 15) -#define FORCE_ARB_IDLE_PLANES (1 << 14) -#define SKL_EDP_PSR_FIX_RDWRAP (1 << 3) +#define DPA_MASK_VBLANK_SRD (1 << 15) +#define FORCE_ARB_IDLE_PLANES (1 << 14) +#define SKL_EDP_PSR_FIX_RDWRAP (1 << 3) +#define IGNORE_PSR2_HW_TRACKING (1 << 1) #define CHICKEN_PAR2_1 _MMIO(0x42090) #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) From 0ba7ffea2d118af7ca0df26cad418f71486b93ad Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 Jun 2020 16:48:32 -0700 Subject: [PATCH 213/222] drm/i915/display: remove alias to dig_port We don't need intel_dig_port and dig_port to refer to the same thing. Prefer the latter. v2: fix coding style Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200626234834.26864-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 884b507c5f55..025d4052f6f8 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3380,11 +3380,10 @@ static void intel_ddi_pre_enable_hdmi(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); - struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + struct intel_hdmi *intel_hdmi = &dig_port->hdmi; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int level = intel_ddi_hdmi_level(encoder); - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, crtc_state); @@ -3411,9 +3410,9 @@ static void intel_ddi_pre_enable_hdmi(struct intel_atomic_state *state, intel_ddi_enable_pipe_clock(encoder, crtc_state); - intel_dig_port->set_infoframes(encoder, - crtc_state->has_infoframe, - crtc_state, conn_state); + dig_port->set_infoframes(encoder, + crtc_state->has_infoframe, + crtc_state, conn_state); } static void intel_ddi_pre_enable(struct intel_atomic_state *state, From 096a42dd1998a966c1b9e0cf489103d77d6473a5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Jul 2020 09:44:39 +0100 Subject: [PATCH 214/222] drm/i915/gem: Move obj->lut_list under its own lock The obj->lut_list is traversed when the object is closed as the file table is destroyed during process termination. As this occurs before we kill any outstanding context if, due to some bug or another, the closure is blocked, then we fail to shootdown any inflight operations potentially leaving the GPU spinning forever. As we only need to guard the list against concurrent closures and insertions, the hold is short and merits being treated as a simple spinlock. Signed-off-by: Chris Wilson Reviewed-by: Michael J. Ruhl Link: https://patchwork.freedesktop.org/patch/msgid/20200701084439.17025-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 ++---- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c | 21 +++++++++++++------ .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5c13809dc3c8..6675447a47b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -112,8 +112,7 @@ static void lut_close(struct i915_gem_context *ctx) if (!kref_get_unless_zero(&obj->base.refcount)) continue; - rcu_read_unlock(); - i915_gem_object_lock(obj); + spin_lock(&obj->lut_lock); list_for_each_entry(lut, &obj->lut_list, obj_link) { if (lut->ctx != ctx) continue; @@ -124,8 +123,7 @@ static void lut_close(struct i915_gem_context *ctx) list_del(&lut->obj_link); break; } - i915_gem_object_unlock(obj); - rcu_read_lock(); + spin_unlock(&obj->lut_lock); if (&lut->obj_link != &obj->lut_list) { i915_lut_handle_free(lut); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c38ab51e82f0..b4862afaaf28 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -789,14 +789,14 @@ static int __eb_add_lut(struct i915_execbuffer *eb, if (err == 0) { /* And nor has this handle */ struct drm_i915_gem_object *obj = vma->obj; - i915_gem_object_lock(obj); + spin_lock(&obj->lut_lock); if (idr_find(&eb->file->object_idr, handle) == obj) { list_add(&lut->obj_link, &obj->lut_list); } else { radix_tree_delete(&ctx->handles_vma, handle); err = -ENOENT; } - i915_gem_object_unlock(obj); + spin_unlock(&obj->lut_lock); } mutex_unlock(&ctx->mutex); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index b6ec5b50d93b..6b69191c5543 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -61,6 +61,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->mm.link); INIT_LIST_HEAD(&obj->lut_list); + spin_lock_init(&obj->lut_lock); spin_lock_init(&obj->mmo.lock); obj->mmo.offsets = RB_ROOT; @@ -104,21 +105,29 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_lut_handle bookmark = {}; struct i915_mmap_offset *mmo, *mn; struct i915_lut_handle *lut, *ln; LIST_HEAD(close); - i915_gem_object_lock(obj); + spin_lock(&obj->lut_lock); list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { struct i915_gem_context *ctx = lut->ctx; - if (ctx->file_priv != fpriv) - continue; + if (ctx && ctx->file_priv == fpriv) { + i915_gem_context_get(ctx); + list_move(&lut->obj_link, &close); + } - i915_gem_context_get(ctx); - list_move(&lut->obj_link, &close); + /* Break long locks, and carefully continue on from this spot */ + if (&ln->obj_link != &obj->lut_list) { + list_add_tail(&bookmark.obj_link, &ln->obj_link); + if (cond_resched_lock(&obj->lut_lock)) + list_safe_reset_next(&bookmark, ln, obj_link); + __list_del_entry(&bookmark.obj_link); + } } - i915_gem_object_unlock(obj); + spin_unlock(&obj->lut_lock); spin_lock(&obj->mmo.lock); rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index b1f82a11aef2..5335f799b548 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -121,6 +121,7 @@ struct drm_i915_gem_object { * this translation from object to context->handles_vma. */ struct list_head lut_list; + spinlock_t lut_lock; /* guards lut_list */ /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; From 09eac8277262bea10a52159f90dcb55beffe0714 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 29 Jun 2020 21:58:47 +0300 Subject: [PATCH 215/222] drm/i915/tgl+: Fix TBT DPLL fractional divider for 38.4MHz ref clock When the reference clock is 38.4MHz, using the current TBT PLL fractional divider value results in a slightly off TBT link frequency. This causes an endless loop of link training success followed by a bad link signaling and retraining at least on a Dell WD19TB TBT dock. The workaround provided by the HW team is to divide the fractional divider value by two. This fixed the link training problem on the ThinkPad dock. The same workaround is needed on some EHL platforms and for combo PHY PLLs, these will be addressed in a follow-up. Bspec: 49204 References: HSDES#22010772725 References: HSDES#14011861142 Reported-and-tested-by: Khaled Almahallawy Signed-off-by: Imre Deak Reviewed-by: Khaled Almahallawy Link: https://patchwork.freedesktop.org/patch/msgid/20200629185848.20550-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index b45185b80bec..720aff8b9313 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2934,6 +2934,15 @@ static const struct skl_wrpll_params tgl_tbt_pll_19_2MHz_values = { static const struct skl_wrpll_params tgl_tbt_pll_24MHz_values = { .dco_integer = 0x43, .dco_fraction = 0x4000, /* the following params are unused */ +}; + +/* + * Display WA #22010492432: tgl + * Divide the nominal .dco_fraction value by 2. + */ +static const struct skl_wrpll_params tgl_tbt_pll_38_4MHz_values = { + .dco_integer = 0x54, .dco_fraction = 0x1800, + /* the following params are unused */ .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, }; @@ -2970,12 +2979,14 @@ static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, MISSING_CASE(dev_priv->dpll.ref_clks.nssc); /* fall-through */ case 19200: - case 38400: *pll_params = tgl_tbt_pll_19_2MHz_values; break; case 24000: *pll_params = tgl_tbt_pll_24MHz_values; break; + case 38400: + *pll_params = tgl_tbt_pll_38_4MHz_values; + break; } } else { switch (dev_priv->dpll.ref_clks.nssc) { From fc6200948275383ca214bb30dd175dfd0d61149b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 29 Jun 2020 21:58:48 +0300 Subject: [PATCH 216/222] drm/i915/icl+: Simplify combo/TBT PLL calculation call-chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To simplify things, call the combo PHY/TBT PLL calculation functions directly from the corresponding combo/TypeC PLL get functions, instead of calling the same calculation functions after having to recheck if the given PHY is combo or TypeC. Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200629185848.20550-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 66 ++++++++----------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 720aff8b9313..aeb6ee395cce 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3049,49 +3049,26 @@ static int icl_ddi_combo_pll_get_freq(struct drm_i915_private *i915, icl_wrpll_ref_clock(i915)); } -static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder, +static void icl_calc_dpll_state(struct drm_i915_private *i915, + const struct skl_wrpll_params *pll_params, struct intel_dpll_hw_state *pll_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - u32 cfgcr0, cfgcr1; - struct skl_wrpll_params pll_params = { 0 }; - bool ret; - - if (intel_phy_is_tc(dev_priv, intel_port_to_phy(dev_priv, - encoder->port))) - ret = icl_calc_tbt_pll(crtc_state, &pll_params); - else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) || - intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) - ret = icl_calc_wrpll(crtc_state, &pll_params); - else - ret = icl_calc_dp_combo_pll(crtc_state, &pll_params); - - if (!ret) - return false; - - cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) | - pll_params.dco_integer; - - cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | - DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | - DPLL_CFGCR1_KDIV(pll_params.kdiv) | - DPLL_CFGCR1_PDIV(pll_params.pdiv); - - if (INTEL_GEN(dev_priv) >= 12) - cfgcr1 |= TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL; - else - cfgcr1 |= DPLL_CFGCR1_CENTRAL_FREQ_8400; - memset(pll_state, 0, sizeof(*pll_state)); - pll_state->cfgcr0 = cfgcr0; - pll_state->cfgcr1 = cfgcr1; + pll_state->cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params->dco_fraction) | + pll_params->dco_integer; - return true; + pll_state->cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params->qdiv_ratio) | + DPLL_CFGCR1_QDIV_MODE(pll_params->qdiv_mode) | + DPLL_CFGCR1_KDIV(pll_params->kdiv) | + DPLL_CFGCR1_PDIV(pll_params->pdiv); + + if (INTEL_GEN(i915) >= 12) + pll_state->cfgcr1 |= TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL; + else + pll_state->cfgcr1 |= DPLL_CFGCR1_CENTRAL_FREQ_8400; } - static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id) { return id - DPLL_ID_ICL_MGPLL1; @@ -3504,19 +3481,29 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, { struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + struct skl_wrpll_params pll_params = { }; struct icl_port_dpll *port_dpll = &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum port port = encoder->port; unsigned long dpll_mask; + int ret; - if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) || + intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) + ret = icl_calc_wrpll(crtc_state, &pll_params); + else + ret = icl_calc_dp_combo_pll(crtc_state, &pll_params); + + if (!ret) { drm_dbg_kms(&dev_priv->drm, "Could not calculate combo PHY PLL state.\n"); return false; } + icl_calc_dpll_state(dev_priv, &pll_params, &port_dpll->hw_state); + if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) dpll_mask = BIT(DPLL_ID_EHL_DPLL4) | @@ -3550,16 +3537,19 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + struct skl_wrpll_params pll_params = { }; struct icl_port_dpll *port_dpll; enum intel_dpll_id dpll_id; port_dpll = &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; - if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { + if (!icl_calc_tbt_pll(crtc_state, &pll_params)) { drm_dbg_kms(&dev_priv->drm, "Could not calculate TBT PLL state.\n"); return false; } + icl_calc_dpll_state(dev_priv, &pll_params, &port_dpll->hw_state); + port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, BIT(DPLL_ID_ICL_TBTPLL)); From fd7a9d8fa1aa4aed276ae3f12db74db9b5b33455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:33 +0300 Subject: [PATCH 217/222] drm/i915: Fix g4x fbc watermark enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'level' here means the highest level we can't use, so when checking the fbc watermarks we need a -1 to get at the last enabled level. While at if refactor the code a bit to declutter g4x_compute_pipe_wm(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-12-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/intel_pm.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2a32d6230795..565a2b9da3b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1345,6 +1345,23 @@ static void g4x_invalidate_wms(struct intel_crtc *crtc, } } +static bool g4x_compute_fbc_en(const struct g4x_wm_state *wm_state, + int level) +{ + if (level < G4X_WM_LEVEL_SR) + return false; + + if (level >= G4X_WM_LEVEL_SR && + wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR)) + return false; + + if (level >= G4X_WM_LEVEL_HPLL && + wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL)) + return false; + + return true; +} + static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -1384,7 +1401,6 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) wm_state->wm.plane[plane_id] = raw->plane[plane_id]; level = G4X_WM_LEVEL_SR; - if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) goto out; @@ -1396,7 +1412,6 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY); level = G4X_WM_LEVEL_HPLL; - if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) goto out; @@ -1419,17 +1434,11 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) /* * Determine if the FBC watermark(s) can be used. IF * this isn't the case we prefer to disable the FBC - ( watermark(s) rather than disable the SR/HPLL - * level(s) entirely. + * watermark(s) rather than disable the SR/HPLL + * level(s) entirely. 'level-1' is the highest valid + * level here. */ - wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL; - - if (level >= G4X_WM_LEVEL_SR && - wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR)) - wm_state->fbc_en = false; - else if (level >= G4X_WM_LEVEL_HPLL && - wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL)) - wm_state->fbc_en = false; + wm_state->fbc_en = g4x_compute_fbc_en(wm_state, level - 1); return 0; } From 680c45c767f63e35f063d3ea04f388a9f7ae7079 Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Tue, 30 Jun 2020 16:33:10 -0700 Subject: [PATCH 218/222] drm/i915/dp: Correctly advertise HBR3 for GEN11+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_dp_set_source_rates() calls intel_dp_is_edp(), which is unsafe to use before encoder_type is set. This caused GEN11+ to incorrectly strip HBR3 from source rates for edp. Move intel_dp_set_source_rates() to after encoder_type is set. Add comment to intel_dp_is_edp() describing unsafe usages. v2: Alter intel_dp_set_source_rates final position (Ville/Manasi). Remove outdated comment (Ville). Slight optimization of control flow in intel_dp_init_connector. Slight rewording in commit message. Signed-off-by: Matt Atwood Reviewed-by: Ville Syrjälä Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200630233310.10191-1-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 28 ++++++++++--------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3df5d901dd9d..c9b93c5706af 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -137,6 +137,8 @@ static const u8 valid_dsc_slicecount[] = {1, 2, 4}; * * If a CPU or PCH DP output is attached to an eDP panel, this function * will return true, and false otherwise. + * + * This function is not safe to use prior to encoder type being set. */ bool intel_dp_is_edp(struct intel_dp *intel_dp) { @@ -8157,8 +8159,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_encoder->base.name)) return false; - intel_dp_set_source_rates(intel_dp); - intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; @@ -8174,28 +8174,22 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, */ drm_WARN_ON(dev, intel_phy_is_tc(dev_priv, phy)); type = DRM_MODE_CONNECTOR_eDP; + intel_encoder->type = INTEL_OUTPUT_EDP; + + /* eDP only on port B and/or C on vlv/chv */ + if (drm_WARN_ON(dev, (IS_VALLEYVIEW(dev_priv) || + IS_CHERRYVIEW(dev_priv)) && + port != PORT_B && port != PORT_C)) + return false; } else { type = DRM_MODE_CONNECTOR_DisplayPort; } + intel_dp_set_source_rates(intel_dp); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) intel_dp->active_pipe = vlv_active_pipe(intel_dp); - /* - * For eDP we always set the encoder type to INTEL_OUTPUT_EDP, but - * for DP the encoder type can be set by the caller to - * INTEL_OUTPUT_UNKNOWN for DDI, so don't rewrite it. - */ - if (type == DRM_MODE_CONNECTOR_eDP) - intel_encoder->type = INTEL_OUTPUT_EDP; - - /* eDP only on port B and/or C on vlv/chv */ - if (drm_WARN_ON(dev, (IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv)) && - intel_dp_is_edp(intel_dp) && - port != PORT_B && port != PORT_C)) - return false; - drm_dbg_kms(&dev_priv->drm, "Adding %s connector on [ENCODER:%d:%s]\n", type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", From aab4707fdd754d4c4f0df718f3c7546b6eb40d20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jul 2020 10:52:18 +0100 Subject: [PATCH 219/222] drm/i915/gt: Harden the heartbeat against a stuck driver If the driver gets stuck holding the kernel timeline, we cannot issue a heartbeat and so fail to discover that the driver is indeed stuck and do not issue a GPU reset (which would hopefully unstick the driver!). Switch to using a trylock so that we can query if the heartbeat's timeline mutex is locked elsewhere, and then use the timer to probe if it remains stuck at the same spot for consecutive heartbeats, indicating that the mutex has not been released and the engine has not progressed. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200702095219.963-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 14 ++++++++++++-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 8db7e93abde5..1c6c6692dd17 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -65,6 +65,7 @@ static void heartbeat(struct work_struct *wrk) container_of(wrk, typeof(*engine), heartbeat.work.work); struct intel_context *ce = engine->kernel_context; struct i915_request *rq; + unsigned long serial; /* Just in case everything has gone horribly wrong, give it a kick */ intel_engine_flush_submission(engine); @@ -122,10 +123,19 @@ static void heartbeat(struct work_struct *wrk) goto out; } - if (engine->wakeref_serial == engine->serial) + serial = READ_ONCE(engine->serial); + if (engine->wakeref_serial == serial) goto out; - mutex_lock(&ce->timeline->mutex); + if (!mutex_trylock(&ce->timeline->mutex)) { + /* Unable to lock the kernel timeline, is the engine stuck? */ + if (xchg(&engine->heartbeat.blocked, serial) == serial) + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "no heartbeat on %s", + engine->name); + goto out; + } intel_context_enter(ce); rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 073c3769e8cc..490af81bd6f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -348,6 +348,7 @@ struct intel_engine_cs { struct { struct delayed_work work; struct i915_request *systole; + unsigned long blocked; } heartbeat; unsigned long serial; From 8f125dafb390162723884510af074a3ef0dc667a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jul 2020 10:52:19 +0100 Subject: [PATCH 220/222] drm/i915/gt: Move the heartbeat into the high priority system wq As we ensure that the heartbeat is reasonably fast (and should not block), move the heartbeat work into the system_highpri_wq to avoid having this essential task be blocked behind other slow work, such as our own retire_work_handler. References: https://gitlab.freedesktop.org/drm/intel/-/issues/2119 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200702095219.963-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 1c6c6692dd17..8ffdf676c0a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -32,7 +32,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine) delay = msecs_to_jiffies_timeout(delay); if (delay >= HZ) delay = round_jiffies_up_relative(delay); - mod_delayed_work(system_wq, &engine->heartbeat.work, delay); + mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay); return true; } From 2377427cdd2b7514eb4c40241cf5c4dec63c1bec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jul 2020 09:32:03 +0100 Subject: [PATCH 221/222] drm/i915: Drop vm.ref for duplicate vma on construction As we allow for parallel threads to create the same vma instance concurrently, and we only filter out the duplicates upon reacquiring the spinlock for the rbtree, we have to free the loser of the constructors' race. When freeing, we should also drop any resource references acquired for the redundant vma. Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.5+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200702083225.20044-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 1f63c4a1f055..7fe1f317cd2b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -198,6 +198,7 @@ vma_create(struct drm_i915_gem_object *obj, cmp = i915_vma_compare(pos, vm, view); if (cmp == 0) { spin_unlock(&obj->vma.lock); + i915_vm_put(vm); i915_vma_free(vma); return pos; } From d524b87f77364db096855d7eb714ffacec974ddf Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 2 Jul 2020 21:25:28 +0300 Subject: [PATCH 222/222] drm/i915: Update DRIVER_DATE to 20200702 Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6e9072ab30a1..2c2e88d49f3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -108,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20200515" -#define DRIVER_TIMESTAMP 1589543364 +#define DRIVER_DATE "20200702" +#define DRIVER_TIMESTAMP 1593714328 struct drm_i915_gem_object;