From 858bc21f0637c407601a05626854ae58b242f75d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 4 Jan 2011 10:46:49 -0800 Subject: [PATCH 01/37] drm/i915: check eDP encoder correctly when setting modes We were using a stale pointer in the check which caused us to use CPU attached DP params when we should have been using PCH attached params. Signed-off-by: Jesse Barnes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=31988 Tested-by: Jan-Hendrik Zab Tested-by: Christoph Lukas Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0abe79fb6385..f4d1797350f6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3951,7 +3951,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int lane = 0, link_bw, bpp; /* CPU eDP doesn't require FDI link, so just set DP M/N according to current link config */ - if (has_edp_encoder && !intel_encoder_is_pch_edp(&encoder->base)) { + if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) { target_clock = mode->clock; intel_edp_link_config(has_edp_encoder, &lane, &link_bw); From 37f809755845cc3e18e8216c04525bdb885fa13b Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 5 Jan 2011 14:45:24 -0800 Subject: [PATCH 02/37] drm/i915: make DP training try a little harder When trying to do channel equalization, we need to make sure we still have clock recovery on all lanes while training. We also need to try clock recovery again if we lose the clock or if channel eq fails 5 times. We'll try clock recovery up to 5 more times before giving up entirely. Gets suspend/resume working on my Vaio again and brings us back into compliance with the DP training sequence spec. Signed-off-by: Jesse Barnes Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_dp.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1dc60408d5b8..c768e30e5e85 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1334,17 +1334,24 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) struct drm_device *dev = intel_dp->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; bool channel_eq = false; - int tries; + int tries, cr_tries; u32 reg; uint32_t DP = intel_dp->DP; /* channel equalization */ tries = 0; + cr_tries = 0; channel_eq = false; for (;;) { /* Use intel_dp->train_set[0] to set the voltage and pre emphasis values */ uint32_t signal_levels; + if (cr_tries > 5) { + DRM_ERROR("failed to train DP, aborting\n"); + intel_dp_link_down(intel_dp); + break; + } + if (IS_GEN6(dev) && is_edp(intel_dp)) { signal_levels = intel_gen6_edp_signal_levels(intel_dp->train_set[0]); DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB) | signal_levels; @@ -1367,14 +1374,26 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) if (!intel_dp_get_link_status(intel_dp)) break; + /* Make sure clock is still ok */ + if (!intel_clock_recovery_ok(intel_dp->link_status, intel_dp->lane_count)) { + intel_dp_start_link_train(intel_dp); + cr_tries++; + continue; + } + if (intel_channel_eq_ok(intel_dp)) { channel_eq = true; break; } - /* Try 5 times */ - if (tries > 5) - break; + /* Try 5 times, then try clock recovery if that fails */ + if (tries > 5) { + intel_dp_link_down(intel_dp); + intel_dp_start_link_train(intel_dp); + tries = 0; + cr_tries++; + continue; + } /* Compute new intel_dp->train_set as requested by target */ intel_get_adjust_train(intel_dp); From f5afcd3dd0dca7fe869311c51da54d5a889191ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20M=C3=BCller?= Date: Thu, 6 Jan 2011 12:29:32 +0000 Subject: [PATCH 03/37] drm/i915/crt: Check for a analog monitor in case of DVI-I MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since Linux 2.6.36 the digital output on my system (855GME + DVI-I) is not working any longer. The analog output is always activated regardless of the type of monitor attached. The culprit seems to be intel_crt_detect_ddc(), which returns true as soon as an ACK from the EDID device is received. Obviously this approach does not work with DVI-I where the analog and digital outputs share a common DDC bus. In a similar manner to the shared DDC wire, ala the "Mac Mini Hack", we need an additional check to make sure that there really is an analog device attached to the DDC. Signed-off-by: David Müller Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/intel_crt.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 8df574316063..17035b87ee46 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -30,6 +30,7 @@ #include "drm.h" #include "drm_crtc.h" #include "drm_crtc_helper.h" +#include "drm_edid.h" #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" @@ -287,8 +288,9 @@ static bool intel_crt_ddc_probe(struct drm_i915_private *dev_priv, int ddc_bus) return i2c_transfer(&dev_priv->gmbus[ddc_bus].adapter, msgs, 1) == 1; } -static bool intel_crt_detect_ddc(struct intel_crt *crt) +static bool intel_crt_detect_ddc(struct drm_connector *connector) { + struct intel_crt *crt = intel_attached_crt(connector); struct drm_i915_private *dev_priv = crt->base.base.dev->dev_private; /* CRT should always be at 0, but check anyway */ @@ -301,8 +303,26 @@ static bool intel_crt_detect_ddc(struct intel_crt *crt) } if (intel_ddc_probe(&crt->base, dev_priv->crt_ddc_pin)) { - DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + struct edid *edid; + bool is_digital = false; + + edid = drm_get_edid(connector, + &dev_priv->gmbus[dev_priv->crt_ddc_pin].adapter); + /* + * This may be a DVI-I connector with a shared DDC + * link between analog and digital outputs, so we + * have to check the EDID input spec of the attached device. + */ + if (edid != NULL) { + is_digital = edid->input & DRM_EDID_INPUT_DIGITAL; + connector->display_info.raw_edid = NULL; + kfree(edid); + } + + if (!is_digital) { + DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); + return true; + } } return false; @@ -458,7 +478,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) } } - if (intel_crt_detect_ddc(crt)) + if (intel_crt_detect_ddc(connector)) return connector_status_connected; if (!force) @@ -472,7 +492,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) crtc = intel_get_load_detect_pipe(&crt->base, connector, NULL, &dpms_mode); if (crtc) { - if (intel_crt_detect_ddc(crt)) + if (intel_crt_detect_ddc(connector)) status = connector_status_connected; else status = intel_crt_load_detect(crtc, crt); From 3c5a62b5226ca5db993660281e9c2a7275d9fb02 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Thu, 6 Jan 2011 18:26:08 +0800 Subject: [PATCH 04/37] drm/i915: fix calculation of eDP signal levels on Sandybridge Some voltage swing/pre-emphasis level use the same value on eDP Sandybridge, like 400mv_0db and 600mv_0db are with the same value of (0x0 << 22). So, fix them, and point out the value if it isn't a supported voltage swing/pre-emphasis level. Signed-off-by: Yuanhan Liu Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_reg.h | 9 +++++---- drivers/gpu/drm/i915/intel_dp.c | 23 ++++++++++++++++------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f948a6fbc1c..677eca65a4bc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3174,10 +3174,11 @@ #define EDP_LINK_TRAIN_600MV_3_5DB_SNB_A (0x01<<22) #define EDP_LINK_TRAIN_800MV_0DB_SNB_A (0x0<<22) /* SNB B-stepping */ -#define EDP_LINK_TRAIN_400MV_0DB_SNB_B (0x0<<22) -#define EDP_LINK_TRAIN_400MV_6DB_SNB_B (0x3a<<22) -#define EDP_LINK_TRAIN_600MV_3_5DB_SNB_B (0x39<<22) -#define EDP_LINK_TRAIN_800MV_0DB_SNB_B (0x38<<22) +#define EDP_LINK_TRAIN_400_600MV_0DB_SNB_B (0x0<<22) +#define EDP_LINK_TRAIN_400MV_3_5DB_SNB_B (0x1<<22) +#define EDP_LINK_TRAIN_400_600MV_6DB_SNB_B (0x3a<<22) +#define EDP_LINK_TRAIN_600_800MV_3_5DB_SNB_B (0x39<<22) +#define EDP_LINK_TRAIN_800_1200MV_0DB_SNB_B (0x38<<22) #define EDP_LINK_TRAIN_VOL_EMP_MASK_SNB (0x3f<<22) #define FORCEWAKE 0xA18C diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c768e30e5e85..1f4242b682c8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1153,18 +1153,27 @@ intel_dp_signal_levels(uint8_t train_set, int lane_count) static uint32_t intel_gen6_edp_signal_levels(uint8_t train_set) { - switch (train_set & (DP_TRAIN_VOLTAGE_SWING_MASK|DP_TRAIN_PRE_EMPHASIS_MASK)) { + int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | + DP_TRAIN_PRE_EMPHASIS_MASK); + switch (signal_levels) { case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_0: - return EDP_LINK_TRAIN_400MV_0DB_SNB_B; + case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_0: + return EDP_LINK_TRAIN_400_600MV_0DB_SNB_B; + case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_3_5: + return EDP_LINK_TRAIN_400MV_3_5DB_SNB_B; case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_6: - return EDP_LINK_TRAIN_400MV_6DB_SNB_B; + case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_6: + return EDP_LINK_TRAIN_400_600MV_6DB_SNB_B; case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_3_5: - return EDP_LINK_TRAIN_600MV_3_5DB_SNB_B; + case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_3_5: + return EDP_LINK_TRAIN_600_800MV_3_5DB_SNB_B; case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_0: - return EDP_LINK_TRAIN_800MV_0DB_SNB_B; + case DP_TRAIN_VOLTAGE_SWING_1200 | DP_TRAIN_PRE_EMPHASIS_0: + return EDP_LINK_TRAIN_800_1200MV_0DB_SNB_B; default: - DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level\n"); - return EDP_LINK_TRAIN_400MV_0DB_SNB_B; + DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level:" + "0x%x\n", signal_levels); + return EDP_LINK_TRAIN_400_600MV_0DB_SNB_B; } } From 97aaf910731b03b27b1c4c8a58006a1dc99dcd9a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jan 2011 20:10:52 +0000 Subject: [PATCH 05/37] drm/i915/sdvo: Defer detection of output capabilities until probing Alex Fiestas reported an issue with his HDMI connector being misdetected as DVI unless he had something connected upon boot. By moving the decision as to whether to use HDMI or DVI encoding for the HDMI capable output until we probe the monitor means that we should avoid sending a HDMI signal to a DVI monitor and also correctly detect hardware like Alex's. However, to really determine what connector is soldered onto the wire we need to inspect the VBT sdvo child devices - but can we trust it? Reported-by: Alex Fiestas Tested-by: Alex Fiestas Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32828 Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/intel_sdvo.c | 33 +++++++++++-------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 9d0af36a13ec..45cd37652a37 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1024,9 +1024,13 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder, if (!intel_sdvo_set_target_input(intel_sdvo)) return; - if (intel_sdvo->has_hdmi_monitor && - !intel_sdvo_set_avi_infoframe(intel_sdvo)) - return; + if (intel_sdvo->has_hdmi_monitor) { + intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_HDMI); + intel_sdvo_set_colorimetry(intel_sdvo, + SDVO_COLORIMETRY_RGB256); + intel_sdvo_set_avi_infoframe(intel_sdvo); + } else + intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_DVI); if (intel_sdvo->is_tv && !intel_sdvo_set_tv_format(intel_sdvo)) @@ -1398,6 +1402,9 @@ intel_sdvo_detect(struct drm_connector *connector, bool force) intel_sdvo->attached_output = response; + intel_sdvo->has_hdmi_monitor = false; + intel_sdvo->has_hdmi_audio = false; + if ((intel_sdvo_connector->output_flag & response) == 0) ret = connector_status_disconnected; else if (response & SDVO_TMDS_MASK) @@ -1922,20 +1929,7 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv, static bool intel_sdvo_is_hdmi_connector(struct intel_sdvo *intel_sdvo, int device) { - int is_hdmi; - - if (!intel_sdvo_check_supp_encode(intel_sdvo)) - return false; - - if (!intel_sdvo_set_target_output(intel_sdvo, - device == 0 ? SDVO_OUTPUT_TMDS0 : SDVO_OUTPUT_TMDS1)) - return false; - - is_hdmi = 0; - if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ENCODE, &is_hdmi, 1)) - return false; - - return !!is_hdmi; + return intel_sdvo_check_supp_encode(intel_sdvo); } static u8 @@ -2037,12 +2031,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) connector->connector_type = DRM_MODE_CONNECTOR_DVID; if (intel_sdvo_is_hdmi_connector(intel_sdvo, device)) { - /* enable hdmi encoding mode if supported */ - intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_HDMI); - intel_sdvo_set_colorimetry(intel_sdvo, - SDVO_COLORIMETRY_RGB256); connector->connector_type = DRM_MODE_CONNECTOR_HDMIA; - intel_sdvo->is_hdmi = true; } intel_sdvo->base.clone_mask = ((1 << INTEL_SDVO_NON_TV_CLONE_BIT) | From 47356eb67285014527a5ab87543ba1fae3d1e10a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Jan 2011 17:06:04 +0000 Subject: [PATCH 06/37] drm/i915/panel: Only record the backlight level when it is enabled By tracking the current status of the backlight we can prevent recording the value of the current backlight when we have disabled it. And so prevent restoring it to 'off' after an unbalanced sequence of intel_lvds_disable/enable. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=22672 Tested-by: Alex Riesen Tested-by: Larry Finger Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 2 ++ drivers/gpu/drm/i915/intel_drv.h | 3 +++ drivers/gpu/drm/i915/intel_lvds.c | 10 ++------- drivers/gpu/drm/i915/intel_panel.c | 31 ++++++++++++++++++++++++++++ 5 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index aac1bf332f75..972e08e4e054 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -332,6 +332,7 @@ typedef struct drm_i915_private { /* LVDS info */ int backlight_level; /* restore backlight to this value */ + bool backlight_enabled; struct drm_display_mode *panel_fixed_mode; struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */ struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f4d1797350f6..bc829bbc14c8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5802,6 +5802,8 @@ static void intel_setup_outputs(struct drm_device *dev) encoder->base.possible_clones = intel_encoder_clones(dev, encoder->clone_mask); } + + intel_panel_setup_backlight(dev); } static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d782ad9fd6db..74db2557d644 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -257,6 +257,9 @@ extern void intel_pch_panel_fitting(struct drm_device *dev, extern u32 intel_panel_get_max_backlight(struct drm_device *dev); extern u32 intel_panel_get_backlight(struct drm_device *dev); extern void intel_panel_set_backlight(struct drm_device *dev, u32 level); +extern void intel_panel_setup_backlight(struct drm_device *dev); +extern void intel_panel_enable_backlight(struct drm_device *dev); +extern void intel_panel_disable_backlight(struct drm_device *dev); extern void intel_crtc_load_lut(struct drm_crtc *crtc); extern void intel_encoder_prepare (struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index aa2307080be2..4e7dcb330dad 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -106,7 +106,7 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds) I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); POSTING_READ(lvds_reg); - intel_panel_set_backlight(dev, dev_priv->backlight_level); + intel_panel_enable_backlight(dev); } static void intel_lvds_disable(struct intel_lvds *intel_lvds) @@ -123,8 +123,7 @@ static void intel_lvds_disable(struct intel_lvds *intel_lvds) lvds_reg = LVDS; } - dev_priv->backlight_level = intel_panel_get_backlight(dev); - intel_panel_set_backlight(dev, 0); + intel_panel_disable_backlight(dev); I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); @@ -398,8 +397,6 @@ static void intel_lvds_prepare(struct drm_encoder *encoder) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_lvds *intel_lvds = to_intel_lvds(encoder); - dev_priv->backlight_level = intel_panel_get_backlight(dev); - /* We try to do the minimum that is necessary in order to unlock * the registers for mode setting. * @@ -430,9 +427,6 @@ static void intel_lvds_commit(struct drm_encoder *encoder) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_lvds *intel_lvds = to_intel_lvds(encoder); - if (dev_priv->backlight_level == 0) - dev_priv->backlight_level = intel_panel_get_max_backlight(dev); - /* Undo any unlocking done in prepare to prevent accidental * adjustment of the registers. */ diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 7350ec2515c6..e00d200df3db 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -250,3 +250,34 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level) tmp &= ~BACKLIGHT_DUTY_CYCLE_MASK; I915_WRITE(BLC_PWM_CTL, tmp | level); } + +void intel_panel_disable_backlight(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->backlight_enabled) { + dev_priv->backlight_level = intel_panel_get_backlight(dev); + dev_priv->backlight_enabled = false; + } + + intel_panel_set_backlight(dev, 0); +} + +void intel_panel_enable_backlight(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->backlight_level == 0) + dev_priv->backlight_level = intel_panel_get_max_backlight(dev); + + intel_panel_set_backlight(dev, dev_priv->backlight_level); + dev_priv->backlight_enabled = true; +} + +void intel_panel_setup_backlight(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + dev_priv->backlight_level = intel_panel_get_max_backlight(dev); + dev_priv->backlight_enabled = dev_priv->backlight_level != 0; +} From bee17e5ae6b68d21b9d193f34ccefeef9d4fffe0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Jan 2011 18:09:58 +0000 Subject: [PATCH 07/37] drm/i915/lvds: Always use 0 to disable the pfit controller ... and just any combination of bits & ~PFIT_ENABLE. This way we do not attempt disable to the panel fitter controller uselessly upon intel_lvds_disable(). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lvds.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 4e7dcb330dad..8f4f6bd33ee9 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -374,6 +374,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder, } out: + if ((pfit_control & PFIT_ENABLE) == 0) { + pfit_control = 0; + pfit_pgm_ratios = 0; + } if (pfit_control != intel_lvds->pfit_control || pfit_pgm_ratios != intel_lvds->pfit_pgm_ratios) { intel_lvds->pfit_control = pfit_control; From a6044e23b784544fe567db75dbf9c4f684bd6d5b Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 20 Dec 2010 11:34:20 -0800 Subject: [PATCH 08/37] drm/i915: support overclocking on Sandy Bridge In some configuration, the PCU may allow us to overclock the GPU. Check for this case and adjust the max frequency as appropriate. Also initialize the min/max frequencies to default values as indicated by hardware. Signed-off-by: Jesse Barnes Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_display.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 677eca65a4bc..1bc816f3934b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3240,6 +3240,7 @@ #define GEN6_PCODE_MAILBOX 0x138124 #define GEN6_PCODE_READY (1<<31) +#define GEN6_READ_OC_PARAMS 0xc #define GEN6_PCODE_WRITE_MIN_FREQ_TABLE 0x9 #define GEN6_PCODE_DATA 0x138128 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bc829bbc14c8..efa88551cbe4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6147,6 +6147,10 @@ void intel_init_emon(struct drm_device *dev) void gen6_enable_rps(struct drm_i915_private *dev_priv) { + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); + u32 pcu_mbox; + int cur_freq, min_freq, max_freq; int i; /* Here begins a magic sequence of register writes to enable @@ -6218,6 +6222,29 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) 500)) DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); + min_freq = (rp_state_cap & 0xff0000) >> 16; + max_freq = rp_state_cap & 0xff; + cur_freq = (gt_perf_status & 0xff00) >> 8; + + /* Check for overclock support */ + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, + 500)) + DRM_ERROR("timeout waiting for pcode mailbox to become idle\n"); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_READ_OC_PARAMS); + pcu_mbox = I915_READ(GEN6_PCODE_DATA); + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, + 500)) + DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); + if (pcu_mbox & (1<<31)) { /* OC supported */ + max_freq = pcu_mbox & 0xff; + DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 100); + } + + /* In units of 100MHz */ + dev_priv->max_delay = max_freq; + dev_priv->min_delay = min_freq; + dev_priv->cur_delay = cur_freq; + /* requires MSI enabled */ I915_WRITE(GEN6_PMIER, GEN6_PM_MBOX_EVENT | From 35c3047ad15849335242b847c94f180ef45db490 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Dec 2010 14:07:12 +0000 Subject: [PATCH 09/37] drm/i915: Use the mappable sizes determined by GTT for consistency. There should be no difference, but we can eliminate redundant code. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 7 +------ drivers/gpu/drm/i915/intel_fb.c | 20 +++++++++----------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index efa88551cbe4..9d97fd4e5558 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6694,12 +6694,7 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; } - - /* set memory base */ - if (IS_GEN2(dev)) - dev->mode_config.fb_base = pci_resource_start(dev->pdev, 0); - else - dev->mode_config.fb_base = pci_resource_start(dev->pdev, 2); + dev->mode_config.fb_base = dev->agp->base; if (IS_MOBILE(dev) || !IS_GEN2(dev)) dev_priv->num_pipe = 2; diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 701e830d0012..ee145a257287 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -62,6 +62,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, struct drm_fb_helper_surface_size *sizes) { struct drm_device *dev = ifbdev->helper.dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct fb_info *info; struct drm_framebuffer *fb; struct drm_mode_fb_cmd mode_cmd; @@ -77,7 +78,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, mode_cmd.height = sizes->surface_height; mode_cmd.bpp = sizes->surface_bpp; - mode_cmd.pitch = ALIGN(mode_cmd.width * ((mode_cmd.bpp + 1) / 8), 64); + mode_cmd.pitch = ALIGN(mode_cmd.width * ((mode_cmd.bpp + 7) / 8), 64); mode_cmd.depth = sizes->surface_depth; size = mode_cmd.pitch * mode_cmd.height; @@ -120,6 +121,11 @@ static int intelfb_create(struct intel_fbdev *ifbdev, info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &intelfb_ops; + ret = fb_alloc_cmap(&info->cmap, 256, 0); + if (ret) { + ret = -ENOMEM; + goto out_unpin; + } /* setup aperture base/size for vesafb takeover */ info->apertures = alloc_apertures(1); if (!info->apertures) { @@ -127,10 +133,8 @@ static int intelfb_create(struct intel_fbdev *ifbdev, goto out_unpin; } info->apertures->ranges[0].base = dev->mode_config.fb_base; - if (!IS_GEN2(dev)) - info->apertures->ranges[0].size = pci_resource_len(dev->pdev, 2); - else - info->apertures->ranges[0].size = pci_resource_len(dev->pdev, 0); + info->apertures->ranges[0].size = + dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; info->fix.smem_start = dev->mode_config.fb_base + obj->gtt_offset; info->fix.smem_len = size; @@ -140,12 +144,6 @@ static int intelfb_create(struct intel_fbdev *ifbdev, ret = -ENOSPC; goto out_unpin; } - - ret = fb_alloc_cmap(&info->cmap, 256, 0); - if (ret) { - ret = -ENOMEM; - goto out_unpin; - } info->screen_size = size; // memset(info->screen_base, 0, size); From 55249baaa5cd188ebd9acdb047eeaed8092e4a93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Dec 2010 14:04:47 +0000 Subject: [PATCH 10/37] drm/i915: Workaround erratum on i830 for TAIL pointer within last 2 cachelines On i830 if the tail pointer is set to within 2 cachelines of the end of the buffer, the chip may hang. So instead if the tail were to land in that location, we pad the end of the buffer with NOPs, and start again at the beginning. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 13 ++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 56bc95c056dd..2de0e45464c5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -785,6 +785,14 @@ int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto err_unmap; + /* Workaround an erratum on the i830 which causes a hang if + * the TAIL pointer points to within the last 2 cachelines + * of the buffer. + */ + ring->effective_size = ring->size; + if (IS_I830(ring->dev)) + ring->effective_size -= 128; + return 0; err_unmap: @@ -827,8 +835,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) { unsigned int *virt; - int rem; - rem = ring->size - ring->tail; + int rem = ring->size - ring->tail; if (ring->space < rem) { int ret = intel_wait_ring_buffer(ring, rem); @@ -895,7 +902,7 @@ int intel_ring_begin(struct intel_ring_buffer *ring, int n = 4*num_dwords; int ret; - if (unlikely(ring->tail + n > ring->size)) { + if (unlikely(ring->tail + n > ring->effective_size)) { ret = intel_wrap_ring_buffer(ring); if (unlikely(ret)) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8e2e357ad6ee..bbbf505c8b56 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -49,6 +49,7 @@ struct intel_ring_buffer { u32 tail; int space; int size; + int effective_size; struct intel_hw_status_page status_page; u32 irq_seqno; /* last seq seem at irq time */ From c97689d8860f086125e7ff9cd730027a0057ca4f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Dec 2010 10:40:38 +0000 Subject: [PATCH 11/37] agp/intel: Flush the chipset write buffers when changing GTT base Flush the chipset write buffers before and after adjusting the GTT base register, just in case. We only modify this value upon initialisation (boot and resume) so there should be no outstanding writes, however there are always those persistent PGTBL_ER that keep getting reported upon resume. Signed-off-by: Chris Wilson --- drivers/char/agp/intel-agp.h | 2 ++ drivers/char/agp/intel-gtt.c | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index 010e3defd6c3..c195bfeade11 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -94,6 +94,8 @@ #define G4x_GMCH_SIZE_VT_1_5M (0xa << 8) #define G4x_GMCH_SIZE_VT_2M (0xc << 8) +#define GFX_FLSH_CNTL 0x2170 /* 915+ */ + #define I810_DRAM_CTL 0x3000 #define I810_DRAM_ROW_0 0x00000001 #define I810_DRAM_ROW_0_SDRAM 0x00000001 diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 356f73e0d17e..da8161806f39 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -814,6 +814,12 @@ static bool intel_enable_gtt(void) } } + /* On the resume path we may be adjusting the PGTBL value, so + * be paranoid and flush all chipset write buffers... + */ + if (INTEL_GTT_GEN >= 3) + writel(0, intel_private.registers+GFX_FLSH_CNTL); + reg = intel_private.registers+I810_PGETBL_CTL; writel(intel_private.PGETBL_save, reg); if (HAS_PGTBL_EN && (readl(reg) & I810_PGETBL_ENABLED) == 0) { @@ -823,6 +829,9 @@ static bool intel_enable_gtt(void) return false; } + if (INTEL_GTT_GEN >= 3) + writel(0, intel_private.registers+GFX_FLSH_CNTL); + return true; } From b79d4990226defc3789f9ba492b27e9e56790857 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 21 Dec 2010 13:10:23 -0800 Subject: [PATCH 12/37] drm/i915: support low power watermarks on Ironlake This patch actually makes the watermark code even uglier (if that's possible), but has the advantage of sharing code between SNB and ILK at least. Longer term we should refactor the watermark stuff into its own file and clean it up now that we know how it's supposed to work. Supporting WM2 on my Vaio reduced power consumption by around 0.5W, so this patch is definitely worthwhile (though it also needs lots of test coverage). Signed-off-by: Jesse Barnes [ickle: pass the watermark structs arounds] Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 5 + drivers/gpu/drm/i915/intel_display.c | 300 +++++++++++++-------------- 2 files changed, 154 insertions(+), 151 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1bc816f3934b..ecfb0023f60d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2345,8 +2345,13 @@ /* Memory latency timer register */ #define MLTR_ILK 0x11222 +#define MLTR_WM1_SHIFT 0 +#define MLTR_WM2_SHIFT 8 /* the unit of memory self-refresh latency time is 0.5us */ #define ILK_SRLT_MASK 0x3f +#define ILK_LATENCY(shift) (I915_READ(MLTR_ILK) >> (shift) & ILK_SRLT_MASK) +#define ILK_READ_WM1_LATENCY() ILK_LATENCY(MLTR_WM1_SHIFT) +#define ILK_READ_WM2_LATENCY() ILK_LATENCY(MLTR_WM2_SHIFT) /* define the fifo size on Ironlake */ #define ILK_DISPLAY_FIFO 128 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9d97fd4e5558..79753b8ac797 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3456,14 +3456,109 @@ static bool ironlake_compute_wm0(struct drm_device *dev, return true; } +/* + * Check the wm result. + * + * If any calculated watermark values is larger than the maximum value that + * can be programmed into the associated watermark register, that watermark + * must be disabled. + */ +static bool ironlake_check_srwm(struct drm_device *dev, int level, + int fbc_wm, int display_wm, int cursor_wm, + const struct intel_watermark_params *display, + const struct intel_watermark_params *cursor) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + DRM_DEBUG_KMS("watermark %d: display plane %d, fbc lines %d," + " cursor %d\n", level, display_wm, fbc_wm, cursor_wm); + + if (fbc_wm > SNB_FBC_MAX_SRWM) { + DRM_DEBUG_KMS("fbc watermark(%d) is too large(%d), disabling wm%d+\n", + fbc_wm, SNB_FBC_MAX_SRWM, level); + + /* fbc has it's own way to disable FBC WM */ + I915_WRITE(DISP_ARB_CTL, + I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS); + return false; + } + + if (display_wm > display->max_wm) { + DRM_DEBUG_KMS("display watermark(%d) is too large(%d), disabling wm%d+\n", + display_wm, SNB_DISPLAY_MAX_SRWM, level); + return false; + } + + if (cursor_wm > cursor->max_wm) { + DRM_DEBUG_KMS("cursor watermark(%d) is too large(%d), disabling wm%d+\n", + cursor_wm, SNB_CURSOR_MAX_SRWM, level); + return false; + } + + if (!(fbc_wm || display_wm || cursor_wm)) { + DRM_DEBUG_KMS("latency %d is 0, disabling wm%d+\n", level, level); + return false; + } + + return true; +} + +/* + * Compute watermark values of WM[1-3], + */ +static bool ironlake_compute_srwm(struct drm_device *dev, int level, + int hdisplay, int htotal, + int pixel_size, int clock, int latency_ns, + const struct intel_watermark_params *display, + const struct intel_watermark_params *cursor, + int *fbc_wm, int *display_wm, int *cursor_wm) +{ + + unsigned long line_time_us; + int line_count, line_size; + int small, large; + int entries; + + if (!latency_ns) { + *fbc_wm = *display_wm = *cursor_wm = 0; + return false; + } + + line_time_us = (htotal * 1000) / clock; + line_count = (latency_ns / line_time_us + 1000) / 1000; + line_size = hdisplay * pixel_size; + + /* Use the minimum of the small and large buffer method for primary */ + small = ((clock * pixel_size / 1000) * latency_ns) / 1000; + large = line_count * line_size; + + entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); + *display_wm = entries + display->guard_size; + + /* + * Spec says: + * FBC WM = ((Final Primary WM * 64) / number of bytes per line) + 2 + */ + *fbc_wm = DIV_ROUND_UP(*display_wm * 64, line_size) + 2; + + /* calculate the self-refresh watermark for display cursor */ + entries = line_count * pixel_size * 64; + entries = DIV_ROUND_UP(entries, cursor->cacheline_size); + *cursor_wm = entries + cursor->guard_size; + + return ironlake_check_srwm(dev, level, + *fbc_wm, *display_wm, *cursor_wm, + display, cursor); +} + static void ironlake_update_wm(struct drm_device *dev, int planea_clock, int planeb_clock, - int sr_hdisplay, int sr_htotal, + int hdisplay, int htotal, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; - int plane_wm, cursor_wm, enabled; - int tmp; + int fbc_wm, plane_wm, cursor_wm, enabled; + int clock; enabled = 0; if (ironlake_compute_wm0(dev, 0, @@ -3498,152 +3593,49 @@ static void ironlake_update_wm(struct drm_device *dev, * Calculate and update the self-refresh watermark only when one * display plane is used. */ - tmp = 0; - if (enabled == 1) { - unsigned long line_time_us; - int small, large, plane_fbc; - int sr_clock, entries; - int line_count, line_size; - /* Read the self-refresh latency. The unit is 0.5us */ - int ilk_sr_latency = I915_READ(MLTR_ILK) & ILK_SRLT_MASK; + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); - sr_clock = planea_clock ? planea_clock : planeb_clock; - line_time_us = (sr_htotal * 1000) / sr_clock; + if (enabled != 1) + return; - /* Use ns/us then divide to preserve precision */ - line_count = ((ilk_sr_latency * 500) / line_time_us + 1000) - / 1000; - line_size = sr_hdisplay * pixel_size; + clock = planea_clock ? planea_clock : planeb_clock; - /* Use the minimum of the small and large buffer method for primary */ - small = ((sr_clock * pixel_size / 1000) * (ilk_sr_latency * 500)) / 1000; - large = line_count * line_size; + /* WM1 */ + if (!ironlake_compute_srwm(dev, 1, hdisplay, htotal, pixel_size, + clock, ILK_READ_WM1_LATENCY() * 500, + &ironlake_display_srwm_info, + &ironlake_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; - entries = DIV_ROUND_UP(min(small, large), - ironlake_display_srwm_info.cacheline_size); + I915_WRITE(WM1_LP_ILK, + WM1_LP_SR_EN | + (ILK_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); - plane_fbc = entries * 64; - plane_fbc = DIV_ROUND_UP(plane_fbc, line_size); + /* WM2 */ + if (!ironlake_compute_srwm(dev, 2, hdisplay, htotal, pixel_size, + clock, ILK_READ_WM2_LATENCY() * 500, + &ironlake_display_srwm_info, + &ironlake_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; - plane_wm = entries + ironlake_display_srwm_info.guard_size; - if (plane_wm > (int)ironlake_display_srwm_info.max_wm) - plane_wm = ironlake_display_srwm_info.max_wm; - - /* calculate the self-refresh watermark for display cursor */ - entries = line_count * pixel_size * 64; - entries = DIV_ROUND_UP(entries, - ironlake_cursor_srwm_info.cacheline_size); - - cursor_wm = entries + ironlake_cursor_srwm_info.guard_size; - if (cursor_wm > (int)ironlake_cursor_srwm_info.max_wm) - cursor_wm = ironlake_cursor_srwm_info.max_wm; - - /* configure watermark and enable self-refresh */ - tmp = (WM1_LP_SR_EN | - (ilk_sr_latency << WM1_LP_LATENCY_SHIFT) | - (plane_fbc << WM1_LP_FBC_SHIFT) | - (plane_wm << WM1_LP_SR_SHIFT) | - cursor_wm); - DRM_DEBUG_KMS("self-refresh watermark: display plane %d, fbc lines %d," - " cursor %d\n", plane_wm, plane_fbc, cursor_wm); - } - I915_WRITE(WM1_LP_ILK, tmp); - /* XXX setup WM2 and WM3 */ -} - -/* - * Check the wm result. - * - * If any calculated watermark values is larger than the maximum value that - * can be programmed into the associated watermark register, that watermark - * must be disabled. - * - * Also return true if all of those watermark values is 0, which is set by - * sandybridge_compute_srwm, to indicate the latency is ZERO. - */ -static bool sandybridge_check_srwm(struct drm_device *dev, int level, - int fbc_wm, int display_wm, int cursor_wm) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - DRM_DEBUG_KMS("watermark %d: display plane %d, fbc lines %d," - " cursor %d\n", level, display_wm, fbc_wm, cursor_wm); - - if (fbc_wm > SNB_FBC_MAX_SRWM) { - DRM_DEBUG_KMS("fbc watermark(%d) is too large(%d), disabling wm%d+\n", - fbc_wm, SNB_FBC_MAX_SRWM, level); - - /* fbc has it's own way to disable FBC WM */ - I915_WRITE(DISP_ARB_CTL, - I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS); - return false; - } - - if (display_wm > SNB_DISPLAY_MAX_SRWM) { - DRM_DEBUG_KMS("display watermark(%d) is too large(%d), disabling wm%d+\n", - display_wm, SNB_DISPLAY_MAX_SRWM, level); - return false; - } - - if (cursor_wm > SNB_CURSOR_MAX_SRWM) { - DRM_DEBUG_KMS("cursor watermark(%d) is too large(%d), disabling wm%d+\n", - cursor_wm, SNB_CURSOR_MAX_SRWM, level); - return false; - } - - if (!(fbc_wm || display_wm || cursor_wm)) { - DRM_DEBUG_KMS("latency %d is 0, disabling wm%d+\n", level, level); - return false; - } - - return true; -} - -/* - * Compute watermark values of WM[1-3], - */ -static bool sandybridge_compute_srwm(struct drm_device *dev, int level, - int hdisplay, int htotal, int pixel_size, - int clock, int latency_ns, int *fbc_wm, - int *display_wm, int *cursor_wm) -{ - - unsigned long line_time_us; - int small, large; - int entries; - int line_count, line_size; - - if (!latency_ns) { - *fbc_wm = *display_wm = *cursor_wm = 0; - return false; - } - - line_time_us = (htotal * 1000) / clock; - line_count = (latency_ns / line_time_us + 1000) / 1000; - line_size = hdisplay * pixel_size; - - /* Use the minimum of the small and large buffer method for primary */ - small = ((clock * pixel_size / 1000) * latency_ns) / 1000; - large = line_count * line_size; - - entries = DIV_ROUND_UP(min(small, large), - sandybridge_display_srwm_info.cacheline_size); - *display_wm = entries + sandybridge_display_srwm_info.guard_size; + I915_WRITE(WM2_LP_ILK, + WM2_LP_EN | + (ILK_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); /* - * Spec said: - * FBC WM = ((Final Primary WM * 64) / number of bytes per line) + 2 + * WM3 is unsupported on ILK, probably because we don't have latency + * data for that power state */ - *fbc_wm = DIV_ROUND_UP(*display_wm * 64, line_size) + 2; - - /* calculate the self-refresh watermark for display cursor */ - entries = line_count * pixel_size * 64; - entries = DIV_ROUND_UP(entries, - sandybridge_cursor_srwm_info.cacheline_size); - *cursor_wm = entries + sandybridge_cursor_srwm_info.guard_size; - - return sandybridge_check_srwm(dev, level, - *fbc_wm, *display_wm, *cursor_wm); } static void sandybridge_update_wm(struct drm_device *dev, @@ -3701,9 +3693,11 @@ static void sandybridge_update_wm(struct drm_device *dev, clock = planea_clock ? planea_clock : planeb_clock; /* WM1 */ - if (!sandybridge_compute_srwm(dev, 1, hdisplay, htotal, pixel_size, - clock, SNB_READ_WM1_LATENCY() * 500, - &fbc_wm, &plane_wm, &cursor_wm)) + if (!ironlake_compute_srwm(dev, 1, hdisplay, htotal, pixel_size, + clock, SNB_READ_WM1_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) return; I915_WRITE(WM1_LP_ILK, @@ -3714,10 +3708,12 @@ static void sandybridge_update_wm(struct drm_device *dev, cursor_wm); /* WM2 */ - if (!sandybridge_compute_srwm(dev, 2, - hdisplay, htotal, pixel_size, - clock, SNB_READ_WM2_LATENCY() * 500, - &fbc_wm, &plane_wm, &cursor_wm)) + if (!ironlake_compute_srwm(dev, 2, + hdisplay, htotal, pixel_size, + clock, SNB_READ_WM2_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) return; I915_WRITE(WM2_LP_ILK, @@ -3728,10 +3724,12 @@ static void sandybridge_update_wm(struct drm_device *dev, cursor_wm); /* WM3 */ - if (!sandybridge_compute_srwm(dev, 3, - hdisplay, htotal, pixel_size, - clock, SNB_READ_WM3_LATENCY() * 500, - &fbc_wm, &plane_wm, &cursor_wm)) + if (!ironlake_compute_srwm(dev, 3, + hdisplay, htotal, pixel_size, + clock, SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) return; I915_WRITE(WM3_LP_ILK, From a0fa62d3b6afaa260cad8ccd6944e81ad01c7cf3 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Thu, 23 Dec 2010 16:35:40 +0800 Subject: [PATCH 13/37] drm/i915: fix the wrong latency value while computing wm0 On Ironlake, the LP0 latency is hardcoded and in ns unit, while on Sandybridge, it comes from a register and with unit 0.1 us. So, fix the wrong latency value while computing wm0 on Ironlake and Sandybridge. Signed-off-by: Yuanhan Liu Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 79753b8ac797..75c433e6f457 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3418,9 +3418,9 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused, static bool ironlake_compute_wm0(struct drm_device *dev, int pipe, const struct intel_watermark_params *display, - int display_latency, + int display_latency_ns, const struct intel_watermark_params *cursor, - int cursor_latency, + int cursor_latency_ns, int *plane_wm, int *cursor_wm) { @@ -3438,7 +3438,7 @@ static bool ironlake_compute_wm0(struct drm_device *dev, pixel_size = crtc->fb->bits_per_pixel / 8; /* Use the small buffer method to calculate plane watermark */ - entries = ((clock * pixel_size / 1000) * display_latency * 100) / 1000; + entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000; entries = DIV_ROUND_UP(entries, display->cacheline_size); *plane_wm = entries + display->guard_size; if (*plane_wm > (int)display->max_wm) @@ -3446,7 +3446,7 @@ static bool ironlake_compute_wm0(struct drm_device *dev, /* Use the large buffer method to calculate cursor watermark */ line_time_us = ((htotal * 1000) / clock); - line_count = (cursor_latency * 100 / line_time_us + 1000) / 1000; + line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; entries = line_count * 64 * pixel_size; entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; @@ -3644,7 +3644,7 @@ static void sandybridge_update_wm(struct drm_device *dev, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; - int latency = SNB_READ_WM0_LATENCY(); + int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */ int fbc_wm, plane_wm, cursor_wm, enabled; int clock; From d78cb50baa9177353d6719612b83558a9bf2d59b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Dec 2010 13:33:15 +0000 Subject: [PATCH 14/37] drm/i915: add 'reset' parameter When bringing up new hardware, or otherwise experimenting, GPU hangs are a way of life. However, the automatic GPU reset can do more harm than good under these circumstances, as we may wish to capture a full trace for debugging. Based on a patch by Zhenyu Wang. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 872493331988..2913496e8716 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,6 +49,9 @@ module_param_named(powersave, i915_powersave, int, 0600); unsigned int i915_lvds_downclock = 0; module_param_named(lvds_downclock, i915_lvds_downclock, int, 0400); +bool i915_try_reset = true; +module_param_named(reset, i915_try_reset, bool, 0600); + static struct drm_driver driver; extern int intel_agp_enabled; @@ -475,6 +478,9 @@ int i915_reset(struct drm_device *dev, u8 flags) bool need_display = true; int ret; + if (!i915_try_reset) + return 0; + if (!mutex_trylock(&dev->struct_mutex)) return -EBUSY; From dbdc647927a0f4b34e7cf486889d8f671f73d2e5 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 30 Dec 2010 09:36:39 -0800 Subject: [PATCH 15/37] drm/i915: avoid reading non-existent PLL reg on Ironlake+ These functions need to be reworked for Ironlake and above, but until then at least avoid reading non-existent registers. Signed-off-by: Jesse Barnes [ickle: combine with a gratuitous tidy] Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 75c433e6f457..043825c9ddcf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5036,8 +5036,8 @@ static void intel_increase_pllclock(struct drm_crtc *crtc) drm_i915_private_t *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; - int dpll = I915_READ(dpll_reg); + int dpll_reg = DPLL(pipe); + int dpll; if (HAS_PCH_SPLIT(dev)) return; @@ -5045,17 +5045,19 @@ static void intel_increase_pllclock(struct drm_crtc *crtc) if (!dev_priv->lvds_downclock_avail) return; + dpll = I915_READ(dpll_reg); if (!HAS_PIPE_CXSR(dev) && (dpll & DISPLAY_RATE_SELECT_FPA1)) { DRM_DEBUG_DRIVER("upclocking LVDS\n"); /* Unlock panel regs */ - I915_WRITE(PP_CONTROL, I915_READ(PP_CONTROL) | - PANEL_UNLOCK_REGS); + I915_WRITE(PP_CONTROL, + I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); dpll &= ~DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); - dpll = I915_READ(dpll_reg); + POSTING_READ(dpll_reg); intel_wait_for_vblank(dev, pipe); + dpll = I915_READ(dpll_reg); if (dpll & DISPLAY_RATE_SELECT_FPA1) DRM_DEBUG_DRIVER("failed to upclock LVDS!\n"); From 759010728b1323aec03c5baae13fde8f76e44a99 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Jan 2011 16:39:21 +0000 Subject: [PATCH 16/37] drm/i915: Remove impossible test As has_gem is unconditionally set to true, the conditional immediately following that assignment is superfluous. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0568dbdc10ef..844f3c972b04 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1962,13 +1962,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) /* enable GEM by default */ dev_priv->has_gem = 1; - if (dev_priv->has_gem == 0 && - drm_core_check_feature(dev, DRIVER_MODESET)) { - DRM_ERROR("kernel modesetting requires GEM, disabling driver.\n"); - ret = -ENODEV; - goto out_workqueue_free; - } - dev->driver->get_vblank_counter = i915_get_vblank_counter; dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ if (IS_G4X(dev) || IS_GEN5(dev) || IS_GEN6(dev)) { @@ -2055,7 +2048,6 @@ out_gem_unload: intel_teardown_gmbus(dev); intel_teardown_mchbar(dev); -out_workqueue_free: destroy_workqueue(dev_priv->wq); out_iomapfree: io_mapping_free(dev_priv->mm.gtt_mapping); From 63256ec5347fb2344a42adbae732b90603c92f35 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jan 2011 18:42:07 +0000 Subject: [PATCH 17/37] drm/i915: Enforce write ordering through the GTT We need to ensure that writes through the GTT land before any modification to the MMIO registers and so must impose a mandatory write barrier when flushing the GTT domain. This was revealed by relaxing the write ordering by experimentally mapping the registers and the GATT as write-combining. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 14 +++++++++++++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c79c0b62ef60..f9c093c08d58 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2393,6 +2393,12 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, obj->last_fenced_ring = NULL; } + /* Ensure that all CPU reads are completed before installing a fence + * and all writes before removing the fence. + */ + if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) + mb(); + return 0; } @@ -2833,10 +2839,16 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) return; - /* No actual flushing is required for the GTT write domain. Writes + /* No actual flushing is required for the GTT write domain. Writes * to it immediately go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. */ + wmb(); + i915_gem_release_mmap(obj); old_write_domain = obj->base.write_domain; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 61129e6759eb..0d42de42868c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -725,6 +725,9 @@ i915_gem_execbuffer_flush(struct drm_device *dev, if (flush_domains & I915_GEM_DOMAIN_CPU) intel_gtt_chipset_flush(); + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { for (i = 0; i < I915_NUM_RINGS; i++) if (flush_rings & (1 << i)) From b72f3acb71646de073abdc070fe1108866c96634 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jan 2011 17:34:02 +0000 Subject: [PATCH 18/37] drm/i915: Handle ringbuffer stalls when flushing Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 92 +++++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +- 4 files changed, 68 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f9c093c08d58..07b62449b9e1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2148,8 +2148,8 @@ i915_gem_flush_ring(struct drm_device *dev, uint32_t invalidate_domains, uint32_t flush_domains) { - ring->flush(ring, invalidate_domains, flush_domains); - i915_gem_process_flushing_list(dev, flush_domains, ring); + if (ring->flush(ring, invalidate_domains, flush_domains) == 0) + i915_gem_process_flushing_list(dev, flush_domains, ring); } static int i915_ring_idle(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0d42de42868c..1b78b66dd77e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -924,7 +924,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring) { struct drm_i915_gem_request *request; - u32 flush_domains; + u32 invalidate; /* * Ensure that the commands in the batch buffer are @@ -932,11 +932,13 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, * * The sampler always gets flushed on i965 (sigh). */ - flush_domains = 0; + invalidate = I915_GEM_DOMAIN_COMMAND; if (INTEL_INFO(dev)->gen >= 4) - flush_domains |= I915_GEM_DOMAIN_SAMPLER; - - ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains); + invalidate |= I915_GEM_DOMAIN_SAMPLER; + if (ring->flush(ring, invalidate, 0)) { + i915_gem_next_request_seqno(dev, ring); + return; + } /* Add a breadcrumb for the completion of the batch buffer */ request = kzalloc(sizeof(*request), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2de0e45464c5..aa8f6abf16f2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -48,7 +48,7 @@ static u32 i915_gem_get_seqno(struct drm_device *dev) return seqno; } -static void +static int render_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, u32 flush_domains) @@ -56,6 +56,7 @@ render_ring_flush(struct intel_ring_buffer *ring, struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; u32 cmd; + int ret; #if WATCH_EXEC DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, @@ -116,12 +117,16 @@ render_ring_flush(struct intel_ring_buffer *ring, #if WATCH_EXEC DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); #endif - if (intel_ring_begin(ring, 2) == 0) { - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - } + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; + + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); } + + return 0; } static void ring_write_tail(struct intel_ring_buffer *ring, @@ -534,19 +539,24 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) POSTING_READ(mmio); } -static void +static int bsd_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, u32 flush_domains) { - if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) - return; + int ret; - if (intel_ring_begin(ring, 2) == 0) { - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - } + if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) + return 0; + + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; + + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + return 0; } static int @@ -980,20 +990,25 @@ static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE); } -static void gen6_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate_domains, - u32 flush_domains) +static int gen6_ring_flush(struct intel_ring_buffer *ring, + u32 invalidate_domains, + u32 flush_domains) { - if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) - return; + int ret; - if (intel_ring_begin(ring, 4) == 0) { - intel_ring_emit(ring, MI_FLUSH_DW); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); - } + if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) + return 0; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, MI_FLUSH_DW); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + return 0; } static int @@ -1122,20 +1137,25 @@ static int blt_ring_begin(struct intel_ring_buffer *ring, return intel_ring_begin(ring, 4); } -static void blt_ring_flush(struct intel_ring_buffer *ring, +static int blt_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, u32 flush_domains) { - if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) - return; + int ret; - if (blt_ring_begin(ring, 4) == 0) { - intel_ring_emit(ring, MI_FLUSH_DW); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); - } + if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) + return 0; + + ret = blt_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, MI_FLUSH_DW); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + return 0; } static void blt_ring_cleanup(struct intel_ring_buffer *ring) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index bbbf505c8b56..5969c2ed1028 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -63,9 +63,9 @@ struct intel_ring_buffer { void (*write_tail)(struct intel_ring_buffer *ring, u32 value); - void (*flush)(struct intel_ring_buffer *ring, - u32 invalidate_domains, - u32 flush_domains); + int __must_check (*flush)(struct intel_ring_buffer *ring, + u32 invalidate_domains, + u32 flush_domains); int (*add_request)(struct intel_ring_buffer *ring, u32 *seqno); u32 (*get_seqno)(struct intel_ring_buffer *ring); From 0f46832fab779a9a3314ce5e833155fe4cf18f6c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jan 2011 17:35:21 +0000 Subject: [PATCH 19/37] drm/i915: Mask USER interrupts on gen6 (until required) Otherwise we may consume 20% of the CPU just handling IRQs whilst rendering. Ouch. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 6 -- drivers/gpu/drm/i915/i915_irq.c | 45 --------- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 120 +++++++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 5 files changed, 113 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 972e08e4e054..1f77d8c6c6a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1007,12 +1007,6 @@ extern u32 i915_get_vblank_counter(struct drm_device *dev, int crtc); extern u32 gm45_get_vblank_counter(struct drm_device *dev, int crtc); extern int i915_vblank_swap(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern void i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask); -extern void i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask); -extern void ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv, - u32 mask); -extern void ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv, - u32 mask); void i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0dadc025b77b..826873a23db0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -64,26 +64,6 @@ #define DRM_I915_VBLANK_PIPE_ALL (DRM_I915_VBLANK_PIPE_A | \ DRM_I915_VBLANK_PIPE_B) -void -ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask) -{ - if ((dev_priv->gt_irq_mask & mask) != 0) { - dev_priv->gt_irq_mask &= ~mask; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); - } -} - -void -ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask) -{ - if ((dev_priv->gt_irq_mask & mask) != mask) { - dev_priv->gt_irq_mask |= mask; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); - } -} - /* For display hotplug interrupt */ static void ironlake_enable_display_irq(drm_i915_private_t *dev_priv, u32 mask) @@ -105,26 +85,6 @@ ironlake_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask) } } -void -i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask) -{ - if ((dev_priv->irq_mask & mask) != 0) { - dev_priv->irq_mask &= ~mask; - I915_WRITE(IMR, dev_priv->irq_mask); - POSTING_READ(IMR); - } -} - -void -i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask) -{ - if ((dev_priv->irq_mask & mask) != mask) { - dev_priv->irq_mask |= mask; - I915_WRITE(IMR, dev_priv->irq_mask); - POSTING_READ(IMR); - } -} - static inline u32 i915_pipestat(int pipe) { @@ -1673,11 +1633,6 @@ static int ironlake_irq_postinstall(struct drm_device *dev) I915_WRITE(GTIIR, I915_READ(GTIIR)); I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - if (IS_GEN6(dev)) { - I915_WRITE(GEN6_RENDER_IMR, ~GEN6_RENDER_USER_INTERRUPT); - I915_WRITE(GEN6_BSD_IMR, ~GEN6_BSD_USER_INTERRUPT); - I915_WRITE(GEN6_BLITTER_IMR, ~GEN6_BLITTER_USER_INTERRUPT); - } if (IS_GEN6(dev)) render_irqs = diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ecfb0023f60d..b0ab4247ce48 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -288,6 +288,7 @@ #define RING_HWS_PGA_GEN6(base) ((base)+0x2080) #define RING_ACTHD(base) ((base)+0x74) #define RING_NOPID(base) ((base)+0x94) +#define RING_IMR(base) ((base)+0xa8) #define TAIL_ADDR 0x001FFFF8 #define HEAD_WRAP_COUNT 0xFFE00000 #define HEAD_WRAP_ONE 0x00200000 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index aa8f6abf16f2..3bff7fb72341 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -485,6 +485,38 @@ pc_render_get_seqno(struct intel_ring_buffer *ring) return pc->cpu_page[0]; } +static void +ironlake_enable_irq(drm_i915_private_t *dev_priv, u32 mask) +{ + dev_priv->gt_irq_mask &= ~mask; + I915_WRITE(GTIMR, dev_priv->gt_irq_mask); + POSTING_READ(GTIMR); +} + +static void +ironlake_disable_irq(drm_i915_private_t *dev_priv, u32 mask) +{ + dev_priv->gt_irq_mask |= mask; + I915_WRITE(GTIMR, dev_priv->gt_irq_mask); + POSTING_READ(GTIMR); +} + +static void +i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask) +{ + dev_priv->irq_mask &= ~mask; + I915_WRITE(IMR, dev_priv->irq_mask); + POSTING_READ(IMR); +} + +static void +i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask) +{ + dev_priv->irq_mask |= mask; + I915_WRITE(IMR, dev_priv->irq_mask); + POSTING_READ(IMR); +} + static bool render_ring_get_irq(struct intel_ring_buffer *ring) { @@ -499,8 +531,8 @@ render_ring_get_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, irqflags); if (HAS_PCH_SPLIT(dev)) - ironlake_enable_graphics_irq(dev_priv, - GT_PIPE_NOTIFY | GT_USER_INTERRUPT); + ironlake_enable_irq(dev_priv, + GT_PIPE_NOTIFY | GT_USER_INTERRUPT); else i915_enable_irq(dev_priv, I915_USER_INTERRUPT); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); @@ -520,9 +552,9 @@ render_ring_put_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, irqflags); if (HAS_PCH_SPLIT(dev)) - ironlake_disable_graphics_irq(dev_priv, - GT_USER_INTERRUPT | - GT_PIPE_NOTIFY); + ironlake_disable_irq(dev_priv, + GT_USER_INTERRUPT | + GT_PIPE_NOTIFY); else i915_disable_irq(dev_priv, I915_USER_INTERRUPT); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); @@ -596,7 +628,7 @@ ring_get_irq(struct intel_ring_buffer *ring, u32 flag) unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - ironlake_enable_graphics_irq(dev_priv, flag); + ironlake_enable_irq(dev_priv, flag); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } @@ -613,7 +645,46 @@ ring_put_irq(struct intel_ring_buffer *ring, u32 flag) unsigned long irqflags; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - ironlake_disable_graphics_irq(dev_priv, flag); + ironlake_disable_irq(dev_priv, flag); + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } +} + +static bool +gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) +{ + struct drm_device *dev = ring->dev; + + if (!dev->irq_enabled) + return false; + + if (atomic_inc_return(&ring->irq_refcount) == 1) { + drm_i915_private_t *dev_priv = dev->dev_private; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + ring->irq_mask &= ~rflag; + I915_WRITE_IMR(ring, ring->irq_mask); + ironlake_enable_irq(dev_priv, gflag); + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } + + return true; +} + +static void +gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) +{ + struct drm_device *dev = ring->dev; + + if (atomic_dec_and_test(&ring->irq_refcount)) { + drm_i915_private_t *dev_priv = dev->dev_private; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + ring->irq_mask |= rflag; + I915_WRITE_IMR(ring, ring->irq_mask); + ironlake_disable_irq(dev_priv, gflag); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } } @@ -757,6 +828,7 @@ int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); INIT_LIST_HEAD(&ring->gpu_write_list); + ring->irq_mask = ~0; if (I915_NEED_GFX_HWS(dev)) { ret = init_status_page(ring); @@ -1029,16 +1101,36 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, return 0; } +static bool +gen6_render_ring_get_irq(struct intel_ring_buffer *ring) +{ + return gen6_ring_get_irq(ring, + GT_USER_INTERRUPT, + GEN6_RENDER_USER_INTERRUPT); +} + +static void +gen6_render_ring_put_irq(struct intel_ring_buffer *ring) +{ + return gen6_ring_put_irq(ring, + GT_USER_INTERRUPT, + GEN6_RENDER_USER_INTERRUPT); +} + static bool gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring) { - return ring_get_irq(ring, GT_GEN6_BSD_USER_INTERRUPT); + return gen6_ring_get_irq(ring, + GT_GEN6_BSD_USER_INTERRUPT, + GEN6_BSD_USER_INTERRUPT); } static void gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) { - ring_put_irq(ring, GT_GEN6_BSD_USER_INTERRUPT); + return gen6_ring_put_irq(ring, + GT_GEN6_BSD_USER_INTERRUPT, + GEN6_BSD_USER_INTERRUPT); } /* ring buffer for Video Codec for Gen6+ */ @@ -1062,13 +1154,17 @@ static const struct intel_ring_buffer gen6_bsd_ring = { static bool blt_ring_get_irq(struct intel_ring_buffer *ring) { - return ring_get_irq(ring, GT_BLT_USER_INTERRUPT); + return gen6_ring_get_irq(ring, + GT_BLT_USER_INTERRUPT, + GEN6_BLITTER_USER_INTERRUPT); } static void blt_ring_put_irq(struct intel_ring_buffer *ring) { - ring_put_irq(ring, GT_BLT_USER_INTERRUPT); + gen6_ring_put_irq(ring, + GT_BLT_USER_INTERRUPT, + GEN6_BLITTER_USER_INTERRUPT); } @@ -1192,6 +1288,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) *ring = render_ring; if (INTEL_INFO(dev)->gen >= 6) { ring->add_request = gen6_add_request; + ring->irq_get = gen6_render_ring_get_irq; + ring->irq_put = gen6_render_ring_put_irq; } else if (IS_GEN5(dev)) { ring->add_request = pc_render_add_request; ring->get_seqno = pc_render_get_seqno; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5969c2ed1028..634f6f84cb57 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -28,6 +28,8 @@ struct intel_hw_status_page { #define I915_READ_CTL(ring) I915_RING_READ(RING_CTL(ring->mmio_base)) #define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL(ring->mmio_base), val) +#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR(ring->mmio_base), val) + #define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID(ring->mmio_base)) #define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0(ring->mmio_base)) #define I915_READ_SYNC_1(ring) I915_RING_READ(RING_SYNC_1(ring->mmio_base)) @@ -52,6 +54,7 @@ struct intel_ring_buffer { int effective_size; struct intel_hw_status_page status_page; + u32 irq_mask; u32 irq_seqno; /* last seq seem at irq time */ u32 waiting_seqno; u32 sync_seqno[I915_NUM_RINGS-1]; From 9862e600cef87de0e301bad7d1435b87e03ea84d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jan 2011 22:22:17 +0000 Subject: [PATCH 20/37] drm/i915/debugfs: Show the per-ring IMR Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 8 +++++++- drivers/gpu/drm/i915/i915_irq.c | 5 ++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 25 +++++++++++++------------ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 92f75782c332..7243d6418651 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -456,8 +456,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } seq_printf(m, "Interrupts received: %d\n", atomic_read(&dev_priv->irq_received)); - for (i = 0; i < I915_NUM_RINGS; i++) + for (i = 0; i < I915_NUM_RINGS; i++) { + if (IS_GEN6(dev)) { + seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", + dev_priv->ring[i].name, + I915_READ_IMR(&dev_priv->ring[i])); + } i915_ring_seqno_info(m, &dev_priv->ring[i]); + } mutex_unlock(&dev->struct_mutex); return 0; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 826873a23db0..c9adcdd6ad6a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -349,9 +349,12 @@ static void notify_ring(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev->dev_private; u32 seqno = ring->get_seqno(ring); - ring->irq_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); + + ring->irq_seqno = seqno; wake_up_all(&ring->irq_queue); + dev_priv->hangcheck_count = 0; mod_timer(&dev_priv->hangcheck_timer, jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 634f6f84cb57..9b134b8643cb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -16,23 +16,24 @@ struct intel_hw_status_page { #define I915_RING_READ(reg) i915_safe_read(dev_priv, reg) -#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL(ring->mmio_base)) -#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL(ring->mmio_base), val) +#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base)) +#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) -#define I915_READ_START(ring) I915_RING_READ(RING_START(ring->mmio_base)) -#define I915_WRITE_START(ring, val) I915_WRITE(RING_START(ring->mmio_base), val) +#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base)) +#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) -#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD(ring->mmio_base)) -#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD(ring->mmio_base), val) +#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD((ring)->mmio_base)) +#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) -#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL(ring->mmio_base)) -#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL(ring->mmio_base), val) +#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base)) +#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) -#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR(ring->mmio_base), val) +#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) +#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base)) -#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID(ring->mmio_base)) -#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0(ring->mmio_base)) -#define I915_READ_SYNC_1(ring) I915_RING_READ(RING_SYNC_1(ring->mmio_base)) +#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base)) +#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base)) +#define I915_READ_SYNC_1(ring) I915_RING_READ(RING_SYNC_1((ring)->mmio_base)) struct intel_ring_buffer { const char *name; From 01a03331e5fe91861937f8b8e72c259f5e9eae67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jan 2011 22:22:56 +0000 Subject: [PATCH 21/37] drm/i915/ringbuffer: Simplify the ring irq refcounting ... and move it under the spinlock to gain the appropriate memory barriers. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32752 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 62 ++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3bff7fb72341..13cad981713b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -521,22 +521,20 @@ static bool render_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; if (!dev->irq_enabled) return false; - if (atomic_inc_return(&ring->irq_refcount) == 1) { - drm_i915_private_t *dev_priv = dev->dev_private; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + spin_lock(&dev_priv->irq_lock); + if (ring->irq_refcount++ == 0) { if (HAS_PCH_SPLIT(dev)) ironlake_enable_irq(dev_priv, GT_PIPE_NOTIFY | GT_USER_INTERRUPT); else i915_enable_irq(dev_priv, I915_USER_INTERRUPT); - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } + spin_unlock(&dev_priv->irq_lock); return true; } @@ -545,20 +543,18 @@ static void render_ring_put_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; - if (atomic_dec_and_test(&ring->irq_refcount)) { - drm_i915_private_t *dev_priv = dev->dev_private; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + spin_lock(&dev_priv->irq_lock); + if (--ring->irq_refcount == 0) { if (HAS_PCH_SPLIT(dev)) ironlake_disable_irq(dev_priv, GT_USER_INTERRUPT | GT_PIPE_NOTIFY); else i915_disable_irq(dev_priv, I915_USER_INTERRUPT); - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } + spin_unlock(&dev_priv->irq_lock); } void intel_ring_setup_status_page(struct intel_ring_buffer *ring) @@ -619,18 +615,15 @@ static bool ring_get_irq(struct intel_ring_buffer *ring, u32 flag) { struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; if (!dev->irq_enabled) return false; - if (atomic_inc_return(&ring->irq_refcount) == 1) { - drm_i915_private_t *dev_priv = dev->dev_private; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + spin_lock(&dev_priv->irq_lock); + if (ring->irq_refcount++ == 0) ironlake_enable_irq(dev_priv, flag); - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); - } + spin_unlock(&dev_priv->irq_lock); return true; } @@ -639,35 +632,30 @@ static void ring_put_irq(struct intel_ring_buffer *ring, u32 flag) { struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; - if (atomic_dec_and_test(&ring->irq_refcount)) { - drm_i915_private_t *dev_priv = dev->dev_private; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + spin_lock(&dev_priv->irq_lock); + if (--ring->irq_refcount == 0) ironlake_disable_irq(dev_priv, flag); - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); - } + spin_unlock(&dev_priv->irq_lock); } static bool gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) { struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; if (!dev->irq_enabled) return false; - if (atomic_inc_return(&ring->irq_refcount) == 1) { - drm_i915_private_t *dev_priv = dev->dev_private; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + spin_lock(&dev_priv->irq_lock); + if (ring->irq_refcount++ == 0) { ring->irq_mask &= ~rflag; I915_WRITE_IMR(ring, ring->irq_mask); ironlake_enable_irq(dev_priv, gflag); - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } + spin_unlock(&dev_priv->irq_lock); return true; } @@ -676,17 +664,15 @@ static void gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) { struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; - if (atomic_dec_and_test(&ring->irq_refcount)) { - drm_i915_private_t *dev_priv = dev->dev_private; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + spin_lock(&dev_priv->irq_lock); + if (--ring->irq_refcount == 0) { ring->irq_mask |= rflag; I915_WRITE_IMR(ring, ring->irq_mask); ironlake_disable_irq(dev_priv, gflag); - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } + spin_unlock(&dev_priv->irq_lock); } static bool diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9b134b8643cb..6b1d9a5a7d07 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -55,11 +55,11 @@ struct intel_ring_buffer { int effective_size; struct intel_hw_status_page status_page; + u32 irq_refcount; u32 irq_mask; u32 irq_seqno; /* last seq seem at irq time */ u32 waiting_seqno; u32 sync_seqno[I915_NUM_RINGS-1]; - atomic_t irq_refcount; bool __must_check (*irq_get)(struct intel_ring_buffer *ring); void (*irq_put)(struct intel_ring_buffer *ring); From 0dc79fb2a36efcadbb39bd8b28933d8aa40408b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 5 Jan 2011 10:32:24 +0000 Subject: [PATCH 22/37] drm/i915: Make the ring IMR handling private As the IMR for the USER interrupts are not modified elsewhere, we can separate the spinlock used for these from that of hpd and pipestats. Those two IMR are manipulated under an IRQ and so need heavier locking. Reported-and-tested-by: Alexey Fisher Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 26 +++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 13cad981713b..03e337072517 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -526,7 +526,7 @@ render_ring_get_irq(struct intel_ring_buffer *ring) if (!dev->irq_enabled) return false; - spin_lock(&dev_priv->irq_lock); + spin_lock(&ring->irq_lock); if (ring->irq_refcount++ == 0) { if (HAS_PCH_SPLIT(dev)) ironlake_enable_irq(dev_priv, @@ -534,7 +534,7 @@ render_ring_get_irq(struct intel_ring_buffer *ring) else i915_enable_irq(dev_priv, I915_USER_INTERRUPT); } - spin_unlock(&dev_priv->irq_lock); + spin_unlock(&ring->irq_lock); return true; } @@ -545,7 +545,7 @@ render_ring_put_irq(struct intel_ring_buffer *ring) struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - spin_lock(&dev_priv->irq_lock); + spin_lock(&ring->irq_lock); if (--ring->irq_refcount == 0) { if (HAS_PCH_SPLIT(dev)) ironlake_disable_irq(dev_priv, @@ -554,7 +554,7 @@ render_ring_put_irq(struct intel_ring_buffer *ring) else i915_disable_irq(dev_priv, I915_USER_INTERRUPT); } - spin_unlock(&dev_priv->irq_lock); + spin_unlock(&ring->irq_lock); } void intel_ring_setup_status_page(struct intel_ring_buffer *ring) @@ -620,10 +620,10 @@ ring_get_irq(struct intel_ring_buffer *ring, u32 flag) if (!dev->irq_enabled) return false; - spin_lock(&dev_priv->irq_lock); + spin_lock(&ring->irq_lock); if (ring->irq_refcount++ == 0) ironlake_enable_irq(dev_priv, flag); - spin_unlock(&dev_priv->irq_lock); + spin_unlock(&ring->irq_lock); return true; } @@ -634,10 +634,10 @@ ring_put_irq(struct intel_ring_buffer *ring, u32 flag) struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - spin_lock(&dev_priv->irq_lock); + spin_lock(&ring->irq_lock); if (--ring->irq_refcount == 0) ironlake_disable_irq(dev_priv, flag); - spin_unlock(&dev_priv->irq_lock); + spin_unlock(&ring->irq_lock); } static bool @@ -649,13 +649,13 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) if (!dev->irq_enabled) return false; - spin_lock(&dev_priv->irq_lock); + spin_lock(&ring->irq_lock); if (ring->irq_refcount++ == 0) { ring->irq_mask &= ~rflag; I915_WRITE_IMR(ring, ring->irq_mask); ironlake_enable_irq(dev_priv, gflag); } - spin_unlock(&dev_priv->irq_lock); + spin_unlock(&ring->irq_lock); return true; } @@ -666,13 +666,13 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - spin_lock(&dev_priv->irq_lock); + spin_lock(&ring->irq_lock); if (--ring->irq_refcount == 0) { ring->irq_mask |= rflag; I915_WRITE_IMR(ring, ring->irq_mask); ironlake_disable_irq(dev_priv, gflag); } - spin_unlock(&dev_priv->irq_lock); + spin_unlock(&ring->irq_lock); } static bool @@ -814,6 +814,8 @@ int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); INIT_LIST_HEAD(&ring->gpu_write_list); + + spin_lock_init(&ring->irq_lock); ring->irq_mask = ~0; if (I915_NEED_GFX_HWS(dev)) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6b1d9a5a7d07..be9087e4c9be 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -55,6 +55,7 @@ struct intel_ring_buffer { int effective_size; struct intel_hw_status_page status_page; + spinlock_t irq_lock; u32 irq_refcount; u32 irq_mask; u32 irq_seqno; /* last seq seem at irq time */ From 88271da3f3da75d6eaef5e768c82a1627edf7088 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 5 Jan 2011 12:01:24 -0800 Subject: [PATCH 23/37] drm/i915: re-enable rc6 support for Ironlake+ Re-enable rc6 support on Ironlake for power savings. Adds a debugfs file to check current RC state, adds a missing workaround for Ironlake MI_SET_CONTEXT instructions, and renames MCHBAR_RENDER_STANDBY to RSTDBYCTL to match the docs. Keep RC6 and the power context disabled on pre-ILK. It only seems to hang and doesn't save any power. Signed-off-by: Jesse Barnes Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 26 ++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 50 ++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_suspend.c | 4 +-- drivers/gpu/drm/i915/intel_display.c | 32 +++++++++--------- 4 files changed, 91 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7243d6418651..9c4cdc143be9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -896,7 +896,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused) struct drm_device *dev = node->minor->dev; drm_i915_private_t *dev_priv = dev->dev_private; u32 rgvmodectl = I915_READ(MEMMODECTL); - u32 rstdbyctl = I915_READ(MCHBAR_RENDER_STANDBY); + u32 rstdbyctl = I915_READ(RSTDBYCTL); u16 crstandvid = I915_READ16(CRSTANDVID); seq_printf(m, "HD boost: %s\n", (rgvmodectl & MEMMODE_BOOST_EN) ? @@ -919,6 +919,30 @@ static int i915_drpc_info(struct seq_file *m, void *unused) seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f)); seq_printf(m, "Render standby enabled: %s\n", (rstdbyctl & RCX_SW_EXIT) ? "no" : "yes"); + seq_printf(m, "Current RS state: "); + switch (rstdbyctl & RSX_STATUS_MASK) { + case RSX_STATUS_ON: + seq_printf(m, "on\n"); + break; + case RSX_STATUS_RC1: + seq_printf(m, "RC1\n"); + break; + case RSX_STATUS_RC1E: + seq_printf(m, "RC1E\n"); + break; + case RSX_STATUS_RS1: + seq_printf(m, "RS1\n"); + break; + case RSX_STATUS_RS2: + seq_printf(m, "RS2 (RC6)\n"); + break; + case RSX_STATUS_RS3: + seq_printf(m, "RC3 (RC6+)\n"); + break; + default: + seq_printf(m, "unknown\n"); + break; + } return 0; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b0ab4247ce48..33ddf3120f2f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -145,6 +145,8 @@ #define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ #define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ #define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) +#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) +#define MI_SUSPEND_FLUSH_EN (1<<0) #define MI_REPORT_HEAD MI_INSTR(0x07, 0) #define MI_OVERLAY_FLIP MI_INSTR(0x11,0) #define MI_OVERLAY_CONTINUE (0x0<<21) @@ -159,6 +161,7 @@ #define MI_MM_SPACE_PHYSICAL (0<<8) #define MI_SAVE_EXT_STATE_EN (1<<3) #define MI_RESTORE_EXT_STATE_EN (1<<2) +#define MI_FORCE_RESTORE (1<<1) #define MI_RESTORE_INHIBIT (1<<0) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ @@ -1131,9 +1134,50 @@ #define RCBMINAVG 0x111a0 #define RCUPEI 0x111b0 #define RCDNEI 0x111b4 -#define MCHBAR_RENDER_STANDBY 0x111b8 -#define RCX_SW_EXIT (1<<23) -#define RSX_STATUS_MASK 0x00700000 +#define RSTDBYCTL 0x111b8 +#define RS1EN (1<<31) +#define RS2EN (1<<30) +#define RS3EN (1<<29) +#define D3RS3EN (1<<28) /* Display D3 imlies RS3 */ +#define SWPROMORSX (1<<27) /* RSx promotion timers ignored */ +#define RCWAKERW (1<<26) /* Resetwarn from PCH causes wakeup */ +#define DPRSLPVREN (1<<25) /* Fast voltage ramp enable */ +#define GFXTGHYST (1<<24) /* Hysteresis to allow trunk gating */ +#define RCX_SW_EXIT (1<<23) /* Leave RSx and prevent re-entry */ +#define RSX_STATUS_MASK (7<<20) +#define RSX_STATUS_ON (0<<20) +#define RSX_STATUS_RC1 (1<<20) +#define RSX_STATUS_RC1E (2<<20) +#define RSX_STATUS_RS1 (3<<20) +#define RSX_STATUS_RS2 (4<<20) /* aka rc6 */ +#define RSX_STATUS_RSVD (5<<20) /* deep rc6 unsupported on ilk */ +#define RSX_STATUS_RS3 (6<<20) /* rs3 unsupported on ilk */ +#define RSX_STATUS_RSVD2 (7<<20) +#define UWRCRSXE (1<<19) /* wake counter limit prevents rsx */ +#define RSCRP (1<<18) /* rs requests control on rs1/2 reqs */ +#define JRSC (1<<17) /* rsx coupled to cpu c-state */ +#define RS2INC0 (1<<16) /* allow rs2 in cpu c0 */ +#define RS1CONTSAV_MASK (3<<14) +#define RS1CONTSAV_NO_RS1 (0<<14) /* rs1 doesn't save/restore context */ +#define RS1CONTSAV_RSVD (1<<14) +#define RS1CONTSAV_SAVE_RS1 (2<<14) /* rs1 saves context */ +#define RS1CONTSAV_FULL_RS1 (3<<14) /* rs1 saves and restores context */ +#define NORMSLEXLAT_MASK (3<<12) +#define SLOW_RS123 (0<<12) +#define SLOW_RS23 (1<<12) +#define SLOW_RS3 (2<<12) +#define NORMAL_RS123 (3<<12) +#define RCMODE_TIMEOUT (1<<11) /* 0 is eval interval method */ +#define IMPROMOEN (1<<10) /* promo is immediate or delayed until next idle interval (only for timeout method above) */ +#define RCENTSYNC (1<<9) /* rs coupled to cpu c-state (3/6/7) */ +#define STATELOCK (1<<7) /* locked to rs_cstate if 0 */ +#define RS_CSTATE_MASK (3<<4) +#define RS_CSTATE_C367_RS1 (0<<4) +#define RS_CSTATE_C36_RS1_C7_RS2 (1<<4) +#define RS_CSTATE_RSVD (2<<4) +#define RS_CSTATE_C367_RS2 (3<<4) +#define REDSAVES (1<<3) /* no context save if was idle during rs0 */ +#define REDRESTORES (1<<2) /* no restore if was idle during rs0 */ #define VIDCTL 0x111c0 #define VIDSTS 0x111c8 #define VIDSTART 0x111cc /* 8 bits */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 410772466fa7..af530637777a 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -740,7 +740,7 @@ void i915_restore_display(struct drm_device *dev) I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS); I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR); I915_WRITE(PCH_PP_CONTROL, dev_priv->savePP_CONTROL); - I915_WRITE(MCHBAR_RENDER_STANDBY, + I915_WRITE(RSTDBYCTL, dev_priv->saveMCHBAR_RENDER_STANDBY); } else { I915_WRITE(PFIT_PGM_RATIOS, dev_priv->savePFIT_PGM_RATIOS); @@ -811,7 +811,7 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveFDI_RXA_IMR = I915_READ(FDI_RXA_IMR); dev_priv->saveFDI_RXB_IMR = I915_READ(FDI_RXB_IMR); dev_priv->saveMCHBAR_RENDER_STANDBY = - I915_READ(MCHBAR_RENDER_STANDBY); + I915_READ(RSTDBYCTL); } else { dev_priv->saveIER = I915_READ(IER); dev_priv->saveIMR = I915_READ(IMR); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 043825c9ddcf..f3c0525d5328 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6420,35 +6420,37 @@ void intel_enable_clock_gating(struct drm_device *dev) * GPU can automatically power down the render unit if given a page * to save state. */ - if (IS_IRONLAKE_M(dev) && 0) { /* XXX causes a failure during suspend */ + if (IS_IRONLAKE_M(dev)) { if (dev_priv->renderctx == NULL) dev_priv->renderctx = intel_alloc_context_page(dev); if (dev_priv->renderctx) { struct drm_i915_gem_object *obj = dev_priv->renderctx; - if (BEGIN_LP_RING(4) == 0) { - OUT_RING(MI_SET_CONTEXT); - OUT_RING(obj->gtt_offset | - MI_MM_SPACE_GTT | - MI_SAVE_EXT_STATE_EN | - MI_RESTORE_EXT_STATE_EN | - MI_RESTORE_INHIBIT); - OUT_RING(MI_NOOP); - OUT_RING(MI_FLUSH); - ADVANCE_LP_RING(); + if (BEGIN_LP_RING(6) != 0) { + i915_gem_object_unpin(obj); + drm_gem_object_unreference(&obj->base); + dev_priv->renderctx = NULL; + return; } + OUT_RING(MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); + OUT_RING(MI_SET_CONTEXT); + OUT_RING(obj->gtt_offset | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + MI_RESTORE_EXT_STATE_EN | + MI_RESTORE_INHIBIT); + OUT_RING(MI_SUSPEND_FLUSH); + OUT_RING(MI_NOOP); + OUT_RING(MI_FLUSH); + ADVANCE_LP_RING(); } else DRM_DEBUG_KMS("Failed to allocate render context." "Disable RC6\n"); - } - if (IS_GEN4(dev) && IS_MOBILE(dev)) { if (dev_priv->pwrctx == NULL) dev_priv->pwrctx = intel_alloc_context_page(dev); if (dev_priv->pwrctx) { struct drm_i915_gem_object *obj = dev_priv->pwrctx; I915_WRITE(PWRCTXA, obj->gtt_offset | PWRCTX_EN); - I915_WRITE(MCHBAR_RENDER_STANDBY, - I915_READ(MCHBAR_RENDER_STANDBY) & ~RCX_SW_EXIT); } } } From 1daed3fb8324d517a1f9da43f1a1d3619d1b0ddc Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 5 Jan 2011 12:01:25 -0800 Subject: [PATCH 24/37] drm/i915: fix rc6 enabling around suspend/resume Enabling RC6 implies setting a graphics context. Make sure we do that only after the ring has been enabled, otherwise our ring commands will hang. Signed-off-by: Jesse Barnes Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 3 +++ drivers/gpu/drm/i915/i915_suspend.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2913496e8716..02fce7fbcd8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -357,6 +357,9 @@ static int i915_drm_thaw(struct drm_device *dev) drm_helper_resume_force_mode(dev); } + /* Clock gating state */ + intel_enable_clock_gating(dev); + intel_opregion_init(dev); dev_priv->modeset_on_lid = 0; diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index af530637777a..147cd9666700 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -868,9 +868,6 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE (IMR, dev_priv->saveIMR); } - /* Clock gating state */ - intel_enable_clock_gating(dev); - if (IS_IRONLAKE_M(dev)) { ironlake_enable_drps(dev); intel_init_emon(dev); From d5bb081b027b520f9e59b4fb8faea83a136ec15e Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 5 Jan 2011 12:01:26 -0800 Subject: [PATCH 25/37] drm/i915: cleanup rc6 code Cleanup several aspects of the rc6 code: - misnamed intel_disable_clock_gating function (was only about rc6) - remove commented call to intel_disable_clock_gating - rc6 enabling code belongs in its own function (allows us to move the actual clock gating enable call back into restore_state) - allocate power & render contexts up front, only free on unload (avoids ugly lazy init at rc6 enable time) Signed-off-by: Jesse Barnes [ickle: checkpatch cleanup] Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 6 +- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_suspend.c | 7 +- drivers/gpu/drm/i915/intel_display.c | 107 +++++++++++++++++---------- 4 files changed, 75 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 02fce7fbcd8a..0de75a23f8e7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -355,10 +355,10 @@ static int i915_drm_thaw(struct drm_device *dev) /* Resume the modeset for every activated CRTC */ drm_helper_resume_force_mode(dev); - } - /* Clock gating state */ - intel_enable_clock_gating(dev); + if (dev_priv->renderctx && dev_priv->pwrctx) + ironlake_enable_rc6(dev); + } intel_opregion_init(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1f77d8c6c6a2..455260067ff7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1260,6 +1260,7 @@ extern void intel_disable_fbc(struct drm_device *dev); extern void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval); extern bool intel_fbc_enabled(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); +extern void ironlake_enable_rc6(struct drm_device *dev); extern void gen6_set_rps(struct drm_device *dev, u8 val); extern void intel_detect_pch (struct drm_device *dev); extern int intel_trans_dp_port_sel (struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 147cd9666700..0521ecf26017 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -822,10 +822,6 @@ int i915_save_state(struct drm_device *dev) if (IS_GEN6(dev)) gen6_disable_rps(dev); - /* XXX disabling the clock gating breaks suspend on gm45 - intel_disable_clock_gating(dev); - */ - /* Cache mode state */ dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); @@ -868,6 +864,9 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE (IMR, dev_priv->saveIMR); } + /* Clock gating state */ + intel_enable_clock_gating(dev); + if (IS_IRONLAKE_M(dev)) { ironlake_enable_drps(dev); intel_init_emon(dev); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f3c0525d5328..1190efa390bd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6415,44 +6415,6 @@ void intel_enable_clock_gating(struct drm_device *dev) } else if (IS_I830(dev)) { I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); } - - /* - * GPU can automatically power down the render unit if given a page - * to save state. - */ - if (IS_IRONLAKE_M(dev)) { - if (dev_priv->renderctx == NULL) - dev_priv->renderctx = intel_alloc_context_page(dev); - if (dev_priv->renderctx) { - struct drm_i915_gem_object *obj = dev_priv->renderctx; - if (BEGIN_LP_RING(6) != 0) { - i915_gem_object_unpin(obj); - drm_gem_object_unreference(&obj->base); - dev_priv->renderctx = NULL; - return; - } - OUT_RING(MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); - OUT_RING(MI_SET_CONTEXT); - OUT_RING(obj->gtt_offset | - MI_MM_SPACE_GTT | - MI_SAVE_EXT_STATE_EN | - MI_RESTORE_EXT_STATE_EN | - MI_RESTORE_INHIBIT); - OUT_RING(MI_SUSPEND_FLUSH); - OUT_RING(MI_NOOP); - OUT_RING(MI_FLUSH); - ADVANCE_LP_RING(); - } else - DRM_DEBUG_KMS("Failed to allocate render context." - "Disable RC6\n"); - - if (dev_priv->pwrctx == NULL) - dev_priv->pwrctx = intel_alloc_context_page(dev); - if (dev_priv->pwrctx) { - struct drm_i915_gem_object *obj = dev_priv->pwrctx; - I915_WRITE(PWRCTXA, obj->gtt_offset | PWRCTX_EN); - } - } } void intel_disable_clock_gating(struct drm_device *dev) @@ -6482,6 +6444,57 @@ void intel_disable_clock_gating(struct drm_device *dev) } } +static void ironlake_disable_rc6(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); + wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), + 10); + POSTING_READ(CCID); + I915_WRITE(PWRCTXA, 0); + POSTING_READ(PWRCTXA); + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); + POSTING_READ(RSTDBYCTL); + i915_gem_object_unpin(dev_priv->renderctx); + drm_gem_object_unreference(&dev_priv->renderctx->base); + dev_priv->renderctx = NULL; + i915_gem_object_unpin(dev_priv->pwrctx); + drm_gem_object_unreference(&dev_priv->pwrctx->base); + dev_priv->pwrctx = NULL; +} + +void ironlake_enable_rc6(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + /* + * GPU can automatically power down the render unit if given a page + * to save state. + */ + ret = BEGIN_LP_RING(6); + if (ret) { + ironlake_disable_rc6(dev); + return; + } + OUT_RING(MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); + OUT_RING(MI_SET_CONTEXT); + OUT_RING(dev_priv->renderctx->gtt_offset | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + MI_RESTORE_EXT_STATE_EN | + MI_RESTORE_INHIBIT); + OUT_RING(MI_SUSPEND_FLUSH); + OUT_RING(MI_NOOP); + OUT_RING(MI_FLUSH); + ADVANCE_LP_RING(); + + I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN); + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); +} + /* Set up chip specific display functions */ static void intel_init_display(struct drm_device *dev) { @@ -6724,6 +6737,21 @@ void intel_modeset_init(struct drm_device *dev) if (IS_GEN6(dev)) gen6_enable_rps(dev_priv); + if (IS_IRONLAKE_M(dev)) { + dev_priv->renderctx = intel_alloc_context_page(dev); + if (!dev_priv->renderctx) + goto skip_rc6; + dev_priv->pwrctx = intel_alloc_context_page(dev); + if (!dev_priv->pwrctx) { + i915_gem_object_unpin(dev_priv->renderctx); + drm_gem_object_unreference(&dev_priv->renderctx->base); + dev_priv->renderctx = NULL; + goto skip_rc6; + } + ironlake_enable_rc6(dev); + } + +skip_rc6: INIT_WORK(&dev_priv->idle_work, intel_idle_update); setup_timer(&dev_priv->idle_timer, intel_gpu_idle_timer, (unsigned long)dev); @@ -6760,7 +6788,8 @@ void intel_modeset_cleanup(struct drm_device *dev) if (IS_GEN6(dev)) gen6_disable_rps(dev); - intel_disable_clock_gating(dev); + if (IS_IRONLAKE_M(dev)) + ironlake_disable_rc6(dev); mutex_unlock(&dev->struct_mutex); From 776ad8062bb77697b8728a9794e3a394b28cf885 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 4 Jan 2011 15:09:39 -0800 Subject: [PATCH 26/37] drm/i915: detect & report PCH display error interrupts FDI and the transcoders can fail for various reasons, so detect those conditions and report on them. Signed-off-by: Jesse Barnes Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 54 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 29 ++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c9adcdd6ad6a..d431fc4fb84b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -398,6 +398,50 @@ static void gen6_pm_irq_handler(struct drm_device *dev) I915_WRITE(GEN6_PMIIR, pm_iir); } +static void pch_irq_handler(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + u32 pch_iir; + + pch_iir = I915_READ(SDEIIR); + + if (pch_iir & SDE_AUDIO_POWER_MASK) + DRM_DEBUG_DRIVER("PCH audio power change on port %d\n", + (pch_iir & SDE_AUDIO_POWER_MASK) >> + SDE_AUDIO_POWER_SHIFT); + + if (pch_iir & SDE_GMBUS) + DRM_DEBUG_DRIVER("PCH GMBUS interrupt\n"); + + if (pch_iir & SDE_AUDIO_HDCP_MASK) + DRM_DEBUG_DRIVER("PCH HDCP audio interrupt\n"); + + if (pch_iir & SDE_AUDIO_TRANS_MASK) + DRM_DEBUG_DRIVER("PCH transcoder audio interrupt\n"); + + if (pch_iir & SDE_POISON) + DRM_ERROR("PCH poison interrupt\n"); + + if (pch_iir & SDE_FDI_MASK) { + u32 fdia, fdib; + + fdia = I915_READ(FDI_RXA_IIR); + fdib = I915_READ(FDI_RXB_IIR); + DRM_DEBUG_DRIVER("PCH FDI RX interrupt; FDI RXA IIR: 0x%08x, FDI RXB IIR: 0x%08x\n", fdia, fdib); + } + + if (pch_iir & (SDE_TRANSB_CRC_DONE | SDE_TRANSA_CRC_DONE)) + DRM_DEBUG_DRIVER("PCH transcoder CRC done interrupt\n"); + + if (pch_iir & (SDE_TRANSB_CRC_ERR | SDE_TRANSA_CRC_ERR)) + DRM_DEBUG_DRIVER("PCH transcoder CRC error interrupt\n"); + + if (pch_iir & SDE_TRANSB_FIFO_UNDER) + DRM_DEBUG_DRIVER("PCH transcoder B underrun interrupt\n"); + if (pch_iir & SDE_TRANSA_FIFO_UNDER) + DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n"); +} + static irqreturn_t ironlake_irq_handler(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -465,8 +509,11 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev) drm_handle_vblank(dev, 1); /* check event from PCH */ - if ((de_iir & DE_PCH_EVENT) && (pch_iir & hotplug_mask)) - queue_work(dev_priv->wq, &dev_priv->hotplug_work); + if (de_iir & DE_PCH_EVENT) { + if (pch_iir & hotplug_mask) + queue_work(dev_priv->wq, &dev_priv->hotplug_work); + pch_irq_handler(dev); + } if (de_iir & DE_PCU_EVENT) { I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS)); @@ -1656,6 +1703,9 @@ static int ironlake_irq_postinstall(struct drm_device *dev) } else { hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; + hotplug_mask |= SDE_AUX_MASK | SDE_FDI_MASK | SDE_TRANS_MASK; + I915_WRITE(FDI_RXA_IMR, 0); + I915_WRITE(FDI_RXB_IMR, 0); } dev_priv->pch_irq_mask = ~hotplug_mask; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 33ddf3120f2f..40a407f41f61 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2778,12 +2778,41 @@ /* PCH */ /* south display engine interrupt */ +#define SDE_AUDIO_POWER_D (1 << 27) +#define SDE_AUDIO_POWER_C (1 << 26) +#define SDE_AUDIO_POWER_B (1 << 25) +#define SDE_AUDIO_POWER_SHIFT (25) +#define SDE_AUDIO_POWER_MASK (7 << SDE_AUDIO_POWER_SHIFT) +#define SDE_GMBUS (1 << 24) +#define SDE_AUDIO_HDCP_TRANSB (1 << 23) +#define SDE_AUDIO_HDCP_TRANSA (1 << 22) +#define SDE_AUDIO_HDCP_MASK (3 << 22) +#define SDE_AUDIO_TRANSB (1 << 21) +#define SDE_AUDIO_TRANSA (1 << 20) +#define SDE_AUDIO_TRANS_MASK (3 << 20) +#define SDE_POISON (1 << 19) +/* 18 reserved */ +#define SDE_FDI_RXB (1 << 17) +#define SDE_FDI_RXA (1 << 16) +#define SDE_FDI_MASK (3 << 16) +#define SDE_AUXD (1 << 15) +#define SDE_AUXC (1 << 14) +#define SDE_AUXB (1 << 13) +#define SDE_AUX_MASK (7 << 13) +/* 12 reserved */ #define SDE_CRT_HOTPLUG (1 << 11) #define SDE_PORTD_HOTPLUG (1 << 10) #define SDE_PORTC_HOTPLUG (1 << 9) #define SDE_PORTB_HOTPLUG (1 << 8) #define SDE_SDVOB_HOTPLUG (1 << 6) #define SDE_HOTPLUG_MASK (0xf << 8) +#define SDE_TRANSB_CRC_DONE (1 << 5) +#define SDE_TRANSB_CRC_ERR (1 << 4) +#define SDE_TRANSB_FIFO_UNDER (1 << 3) +#define SDE_TRANSA_CRC_DONE (1 << 2) +#define SDE_TRANSA_CRC_ERR (1 << 1) +#define SDE_TRANSA_FIFO_UNDER (1 << 0) +#define SDE_TRANS_MASK (0x3f) /* CPT */ #define SDE_CRT_HOTPLUG_CPT (1 << 19) #define SDE_PORTD_HOTPLUG_CPT (1 << 23) From 882417851a0f2e09e110038a13e88e9b5a100800 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Jan 2011 17:09:48 +0000 Subject: [PATCH 27/37] drm/i915: Propagate error from flushing the ring ... in order to avoid a BUG() and potential unbounded waits. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 8 +- drivers/gpu/drm/i915/i915_gem.c | 102 ++++++++++++++------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 28 ++++-- 3 files changed, 90 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 455260067ff7..3e78314514a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1086,10 +1086,10 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_load(struct drm_device *dev); int i915_gem_init_object(struct drm_gem_object *obj); -void i915_gem_flush_ring(struct drm_device *dev, - struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains); +int __must_check i915_gem_flush_ring(struct drm_device *dev, + struct intel_ring_buffer *ring, + uint32_t invalidate_domains, + uint32_t flush_domains); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); void i915_gem_free_object(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 07b62449b9e1..2873d068eb1f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,18 +35,18 @@ #include #include -static void i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); +static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, - bool write); -static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, - uint64_t offset, - uint64_t size); +static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, + bool write); +static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, + uint64_t offset, + uint64_t size); static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); -static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, - unsigned alignment, - bool map_and_fenceable); +static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, + unsigned alignment, + bool map_and_fenceable); static void i915_gem_clear_fence_reg(struct drm_device *dev, struct drm_i915_fence_reg *reg); static int i915_gem_phys_pwrite(struct drm_device *dev, @@ -2142,25 +2142,37 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } -void +int i915_gem_flush_ring(struct drm_device *dev, struct intel_ring_buffer *ring, uint32_t invalidate_domains, uint32_t flush_domains) { - if (ring->flush(ring, invalidate_domains, flush_domains) == 0) - i915_gem_process_flushing_list(dev, flush_domains, ring); + int ret; + + ret = ring->flush(ring, invalidate_domains, flush_domains); + if (ret) + return ret; + + i915_gem_process_flushing_list(dev, flush_domains, ring); + return 0; } static int i915_ring_idle(struct drm_device *dev, struct intel_ring_buffer *ring) { + int ret; + if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) return 0; - if (!list_empty(&ring->gpu_write_list)) - i915_gem_flush_ring(dev, ring, + if (!list_empty(&ring->gpu_write_list)) { + ret = i915_gem_flush_ring(dev, ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + } + return i915_wait_request(dev, i915_gem_next_request_seqno(dev, ring), ring); @@ -2370,10 +2382,13 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, int ret; if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) - i915_gem_flush_ring(obj->base.dev, - obj->last_fenced_ring, - 0, obj->base.write_domain); + if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { + ret = i915_gem_flush_ring(obj->base.dev, + obj->last_fenced_ring, + 0, obj->base.write_domain); + if (ret) + return ret; + } obj->fenced_gpu_access = false; } @@ -2529,9 +2544,12 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, return ret; } else if (obj->tiling_changed) { if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) - i915_gem_flush_ring(obj->base.dev, obj->ring, - 0, obj->base.write_domain); + if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { + ret = i915_gem_flush_ring(obj->base.dev, obj->ring, + 0, obj->base.write_domain); + if (ret) + return ret; + } obj->fenced_gpu_access = false; } @@ -2817,17 +2835,16 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) } /** Flushes any GPU write domain for the object if it's dirty. */ -static void +static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) - return; + return 0; /* Queue the GPU write cache flushing we need. */ - i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain); - BUG_ON(obj->base.write_domain); + return i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain); } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -2894,7 +2911,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (obj->gtt_space == NULL) return -EINVAL; - i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_flush_gpu_write_domain(obj); + if (ret) + return ret; + if (obj->pending_gpu_write || write) { ret = i915_gem_object_wait_rendering(obj, true); if (ret) @@ -2939,7 +2959,10 @@ i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, if (obj->gtt_space == NULL) return -EINVAL; - i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_flush_gpu_write_domain(obj); + if (ret) + return ret; + /* Currently, we are always called from an non-interruptible context. */ if (pipelined != obj->ring) { @@ -2964,12 +2987,17 @@ int i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj, bool interruptible) { + int ret; + if (!obj->active) return 0; - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) - i915_gem_flush_ring(obj->base.dev, obj->ring, - 0, obj->base.write_domain); + if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { + ret = i915_gem_flush_ring(obj->base.dev, obj->ring, + 0, obj->base.write_domain); + if (ret) + return ret; + } return i915_gem_object_wait_rendering(obj, interruptible); } @@ -2986,7 +3014,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_flush_gpu_write_domain(obj); + if (ret) + return ret; + ret = i915_gem_object_wait_rendering(obj, true); if (ret) return ret; @@ -3081,7 +3112,10 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, if (offset == 0 && size == obj->base.size) return i915_gem_object_set_to_cpu_domain(obj, 0); - i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_flush_gpu_write_domain(obj); + if (ret) + return ret; + ret = i915_gem_object_wait_rendering(obj, true); if (ret) return ret; @@ -3374,8 +3408,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * flush earlier is beneficial. */ if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - i915_gem_flush_ring(dev, obj->ring, - 0, obj->base.write_domain); + ret = i915_gem_flush_ring(dev, obj->ring, + 0, obj->base.write_domain); } else if (obj->ring->outstanding_lazy_request == obj->last_rendering_seqno) { struct drm_i915_gem_request *request; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1b78b66dd77e..97d5fbd8ea13 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -713,14 +713,14 @@ err: return ret; } -static void +static int i915_gem_execbuffer_flush(struct drm_device *dev, uint32_t invalidate_domains, uint32_t flush_domains, uint32_t flush_rings) { drm_i915_private_t *dev_priv = dev->dev_private; - int i; + int i, ret; if (flush_domains & I915_GEM_DOMAIN_CPU) intel_gtt_chipset_flush(); @@ -730,11 +730,17 @@ i915_gem_execbuffer_flush(struct drm_device *dev, if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { for (i = 0; i < I915_NUM_RINGS; i++) - if (flush_rings & (1 << i)) - i915_gem_flush_ring(dev, &dev_priv->ring[i], - invalidate_domains, - flush_domains); + if (flush_rings & (1 << i)) { + ret = i915_gem_flush_ring(dev, + &dev_priv->ring[i], + invalidate_domains, + flush_domains); + if (ret) + return ret; + } } + + return 0; } static int @@ -798,10 +804,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, cd.invalidate_domains, cd.flush_domains); #endif - i915_gem_execbuffer_flush(ring->dev, - cd.invalidate_domains, - cd.flush_domains, - cd.flush_rings); + ret = i915_gem_execbuffer_flush(ring->dev, + cd.invalidate_domains, + cd.flush_domains, + cd.flush_rings); + if (ret) + return ret; } list_for_each_entry(obj, objects, exec_list) { From db66e37d239b45f36a3f6495cf4ec49391b2c089 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 8 Jan 2011 09:02:21 +0000 Subject: [PATCH 28/37] drm/i915: Include TLB miss overhead for computing WM The docs recommend that if 8 display lines fit inside the FIFO buffer, then the number of watermark entries should be increased to hide the latency of filling the rest of the FIFO buffer. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1190efa390bd..25d96889d7d2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3425,8 +3425,9 @@ static bool ironlake_compute_wm0(struct drm_device *dev, int *cursor_wm) { struct drm_crtc *crtc; - int htotal, hdisplay, clock, pixel_size = 0; - int line_time_us, line_count, entries; + int htotal, hdisplay, clock, pixel_size; + int line_time_us, line_count; + int entries, tlb_miss; crtc = intel_get_crtc_for_pipe(dev, pipe); if (crtc->fb == NULL || !crtc->enabled) @@ -3439,6 +3440,9 @@ static bool ironlake_compute_wm0(struct drm_device *dev, /* Use the small buffer method to calculate plane watermark */ entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000; + tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; + if (tlb_miss > 0) + entries += tlb_miss; entries = DIV_ROUND_UP(entries, display->cacheline_size); *plane_wm = entries + display->guard_size; if (*plane_wm > (int)display->max_wm) @@ -3448,6 +3452,9 @@ static bool ironlake_compute_wm0(struct drm_device *dev, line_time_us = ((htotal * 1000) / clock); line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; entries = line_count * 64 * pixel_size; + tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; + if (tlb_miss > 0) + entries += tlb_miss; entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; if (*cursor_wm > (int)cursor->max_wm) From bcfb2e285827bf0cfea8bbfad18a4fca57fbabae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Jan 2011 21:06:07 +0000 Subject: [PATCH 29/37] drm/i915: Record the error batchbuffer on each ring Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 164 ++++++++-------------------- 3 files changed, 50 insertions(+), 120 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9c4cdc143be9..a7c194a837a3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -750,7 +750,9 @@ static int i915_error_state(struct seq_file *m, void *unused) if (error->batchbuffer[i]) { struct drm_i915_error_object *obj = error->batchbuffer[i]; - seq_printf(m, "--- gtt_offset = 0x%08x\n", obj->gtt_offset); + seq_printf(m, "%s --- gtt_offset = 0x%08x\n", + dev_priv->ring[i].name, + obj->gtt_offset); offset = 0; for (page = 0; page < obj->page_count; page++) { for (elt = 0; elt < PAGE_SIZE/4; elt++) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3e78314514a2..6c9a042737d6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -172,7 +172,7 @@ struct drm_i915_error_state { int page_count; u32 gtt_offset; u32 *pages[0]; - } *ringbuffer, *batchbuffer[2]; + } *ringbuffer, *batchbuffer[I915_NUM_RINGS]; struct drm_i915_error_buffer { size_t size; u32 name; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d431fc4fb84b..cf61235b858f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -566,10 +566,9 @@ static void i915_error_work_func(struct work_struct *work) #ifdef CONFIG_DEBUG_FS static struct drm_i915_error_object * -i915_error_object_create(struct drm_device *dev, +i915_error_object_create(struct drm_i915_private *dev_priv, struct drm_i915_gem_object *src) { - drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_error_object *dst; int page, page_count; u32 reloc_offset; @@ -642,52 +641,6 @@ i915_error_state_free(struct drm_device *dev, kfree(error); } -static u32 -i915_get_bbaddr(struct drm_device *dev, u32 *ring) -{ - u32 cmd; - - if (IS_I830(dev) || IS_845G(dev)) - cmd = MI_BATCH_BUFFER; - else if (INTEL_INFO(dev)->gen >= 4) - cmd = (MI_BATCH_BUFFER_START | (2 << 6) | - MI_BATCH_NON_SECURE_I965); - else - cmd = (MI_BATCH_BUFFER_START | (2 << 6)); - - return ring[0] == cmd ? ring[1] : 0; -} - -static u32 -i915_ringbuffer_last_batch(struct drm_device *dev, - struct intel_ring_buffer *ring) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 head, bbaddr; - u32 *val; - - /* Locate the current position in the ringbuffer and walk back - * to find the most recently dispatched batch buffer. - */ - head = I915_READ_HEAD(ring) & HEAD_ADDR; - - val = (u32 *)(ring->virtual_start + head); - while (--val >= (u32 *)ring->virtual_start) { - bbaddr = i915_get_bbaddr(dev, val); - if (bbaddr) - return bbaddr; - } - - val = (u32 *)(ring->virtual_start + ring->size); - while (--val >= (u32 *)ring->virtual_start) { - bbaddr = i915_get_bbaddr(dev, val); - if (bbaddr) - return bbaddr; - } - - return 0; -} - static u32 capture_bo_list(struct drm_i915_error_buffer *err, int count, struct list_head *head) @@ -751,6 +704,36 @@ static void i915_gem_record_fences(struct drm_device *dev, } } +static struct drm_i915_error_object * +i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, + struct intel_ring_buffer *ring) +{ + struct drm_i915_gem_object *obj; + u32 seqno; + + if (!ring->get_seqno) + return NULL; + + seqno = ring->get_seqno(ring); + list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { + if (obj->ring != ring) + continue; + + if (!i915_seqno_passed(obj->last_rendering_seqno, seqno)) + continue; + + if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) + continue; + + /* We need to copy these to an anonymous buffer as the simplest + * method to avoid being overwritten by userspace. + */ + return i915_error_object_create(dev_priv, obj); + } + + return NULL; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -765,10 +748,8 @@ static void i915_capture_error_state(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct drm_i915_error_state *error; - struct drm_i915_gem_object *batchbuffer[2]; unsigned long flags; - u32 bbaddr; - int count; + int i; spin_lock_irqsave(&dev_priv->error_lock, flags); error = dev_priv->first_error; @@ -827,83 +808,30 @@ static void i915_capture_error_state(struct drm_device *dev) } i915_gem_record_fences(dev, error); - bbaddr = i915_ringbuffer_last_batch(dev, &dev_priv->ring[RCS]); - - /* Grab the current batchbuffer, most likely to have crashed. */ - batchbuffer[0] = NULL; - batchbuffer[1] = NULL; - count = 0; - list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { - if (batchbuffer[0] == NULL && - bbaddr >= obj->gtt_offset && - bbaddr < obj->gtt_offset + obj->base.size) - batchbuffer[0] = obj; - - if (batchbuffer[1] == NULL && - error->acthd >= obj->gtt_offset && - error->acthd < obj->gtt_offset + obj->base.size) - batchbuffer[1] = obj; - - count++; - } - /* Scan the other lists for completeness for those bizarre errors. */ - if (batchbuffer[0] == NULL || batchbuffer[1] == NULL) { - list_for_each_entry(obj, &dev_priv->mm.flushing_list, mm_list) { - if (batchbuffer[0] == NULL && - bbaddr >= obj->gtt_offset && - bbaddr < obj->gtt_offset + obj->base.size) - batchbuffer[0] = obj; - - if (batchbuffer[1] == NULL && - error->acthd >= obj->gtt_offset && - error->acthd < obj->gtt_offset + obj->base.size) - batchbuffer[1] = obj; - - if (batchbuffer[0] && batchbuffer[1]) - break; - } - } - if (batchbuffer[0] == NULL || batchbuffer[1] == NULL) { - list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) { - if (batchbuffer[0] == NULL && - bbaddr >= obj->gtt_offset && - bbaddr < obj->gtt_offset + obj->base.size) - batchbuffer[0] = obj; - - if (batchbuffer[1] == NULL && - error->acthd >= obj->gtt_offset && - error->acthd < obj->gtt_offset + obj->base.size) - batchbuffer[1] = obj; - - if (batchbuffer[0] && batchbuffer[1]) - break; - } - } - - /* We need to copy these to an anonymous buffer as the simplest - * method to avoid being overwritten by userspace. - */ - error->batchbuffer[0] = i915_error_object_create(dev, batchbuffer[0]); - if (batchbuffer[1] != batchbuffer[0]) - error->batchbuffer[1] = i915_error_object_create(dev, batchbuffer[1]); - else - error->batchbuffer[1] = NULL; + /* Record the active batchbuffers */ + for (i = 0; i < I915_NUM_RINGS; i++) + error->batchbuffer[i] = + i915_error_first_batchbuffer(dev_priv, + &dev_priv->ring[i]); /* Record the ringbuffer */ - error->ringbuffer = i915_error_object_create(dev, + error->ringbuffer = i915_error_object_create(dev_priv, dev_priv->ring[RCS].obj); /* Record buffers on the active and pinned lists. */ error->active_bo = NULL; error->pinned_bo = NULL; - error->active_bo_count = count; + i = 0; + list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) + i++; + error->active_bo_count = i; list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) - count++; - error->pinned_bo_count = count - error->active_bo_count; + i++; + error->pinned_bo_count = i - error->active_bo_count; - if (count) { - error->active_bo = kmalloc(sizeof(*error->active_bo)*count, + if (i) { + error->active_bo = kmalloc(sizeof(*error->active_bo)*i, GFP_ATOMIC); if (error->active_bo) error->pinned_bo = From d9126400580e2caada85fa68799952956a6062fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Jan 2011 11:07:54 +0000 Subject: [PATCH 30/37] drm/i915/gtt: Unmap the PCI pages after unbinding them from the GTT Dave Airlie spotted that his ILK laptop with DMAR enabled was generating the occasional DMAR warning. "The ordering in the previous code was to rewrite the GTT table before unmapping the pages and that makes sense to me." This is his stable patch ported to d-i-n. Reported-by: Dave Airlie Original-patch-by: Dave Airlie Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 86673e77d7cb..70433ae50ac8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -85,15 +85,11 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj) void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - if (dev_priv->mm.gtt->needs_dmar) { - intel_gtt_unmap_memory(obj->sg_list, obj->num_sg); - obj->sg_list = NULL; - obj->num_sg = 0; - } - intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT); + + if (obj->sg_list) { + intel_gtt_unmap_memory(obj->sg_list, obj->num_sg); + obj->sg_list = NULL; + } } From 0a58705b2fc3fa29525cf2fdae3d4276a5771280 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 9 Jan 2011 21:05:44 +0000 Subject: [PATCH 31/37] drm/i915: Periodically flush the active lists and requests In order to retire active buffers whilst no client is active, we need to insert our own flush requests onto the ring. This is useful for servers that queue up some rendering and then go to sleep as it allows us to the complete processing of those requests, potentially making that memory available again much earlier. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2873d068eb1f..87c2df714f66 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1935,6 +1935,8 @@ i915_gem_retire_work_handler(struct work_struct *work) { drm_i915_private_t *dev_priv; struct drm_device *dev; + bool idle; + int i; dev_priv = container_of(work, drm_i915_private_t, mm.retire_work.work); @@ -1948,11 +1950,31 @@ i915_gem_retire_work_handler(struct work_struct *work) i915_gem_retire_requests(dev); - if (!dev_priv->mm.suspended && - (!list_empty(&dev_priv->ring[RCS].request_list) || - !list_empty(&dev_priv->ring[VCS].request_list) || - !list_empty(&dev_priv->ring[BCS].request_list))) + /* Send a periodic flush down the ring so we don't hold onto GEM + * objects indefinitely. + */ + idle = true; + for (i = 0; i < I915_NUM_RINGS; i++) { + struct intel_ring_buffer *ring = &dev_priv->ring[i]; + + if (!list_empty(&ring->gpu_write_list)) { + struct drm_i915_gem_request *request; + int ret; + + ret = i915_gem_flush_ring(dev, ring, 0, + I915_GEM_GPU_DOMAINS); + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (ret || request == NULL || + i915_add_request(dev, NULL, request, ring)) + kfree(request); + } + + idle &= list_empty(&ring->request_list); + } + + if (!dev_priv->mm.suspended && !idle) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + mutex_unlock(&dev->struct_mutex); } From a779e5abda0367aa9d53c0931d9687743afe503d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 9 Jan 2011 21:07:49 +0000 Subject: [PATCH 32/37] drm/i915: Record AGP memory type upon error Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 14 ++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- drivers/gpu/drm/i915/i915_irq.c | 1 + 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a7c194a837a3..73914d841856 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -624,6 +624,15 @@ static const char *ring_str(int ring) } } +static const char *agp_type_str(int type) +{ + switch (type) { + case 0: return " uncached"; + case 1: return " snooped"; + default: return ""; + } +} + static const char *pin_flag(int pinned) { if (pinned > 0) @@ -662,7 +671,7 @@ static void print_error_buffers(struct seq_file *m, seq_printf(m, "%s [%d]:\n", name, count); while (count--) { - seq_printf(m, " %08x %8zd %04x %04x %08x%s%s%s%s%s", + seq_printf(m, " %08x %8zd %04x %04x %08x%s%s%s%s%s%s", err->gtt_offset, err->size, err->read_domains, @@ -672,7 +681,8 @@ static void print_error_buffers(struct seq_file *m, tiling_flag(err->tiling), dirty_flag(err->dirty), purgeable_flag(err->purgeable), - ring_str(err->ring)); + ring_str(err->ring), + agp_type_str(err->agp_type)); if (err->name) seq_printf(m, " (name: %d)", err->name); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6c9a042737d6..6130f77c26bf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -174,18 +174,19 @@ struct drm_i915_error_state { u32 *pages[0]; } *ringbuffer, *batchbuffer[I915_NUM_RINGS]; struct drm_i915_error_buffer { - size_t size; + u32 size; u32 name; u32 seqno; u32 gtt_offset; u32 read_domains; u32 write_domain; - u32 fence_reg; + s32 fence_reg:5; s32 pinned:2; u32 tiling:2; u32 dirty:1; u32 purgeable:1; u32 ring:4; + u32 agp_type:1; } *active_bo, *pinned_bo; u32 active_bo_count, pinned_bo_count; struct intel_overlay_error_state *overlay; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index cf61235b858f..e418e8bb61e6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -665,6 +665,7 @@ static u32 capture_bo_list(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->ring = obj->ring ? obj->ring->id : 0; + err->agp_type = obj->agp_type == AGP_USER_CACHED_MEMORY; if (++i == count) break; From 08c18323547ce6d70eab3b37eca894baf114ad85 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Jan 2011 00:00:24 +0000 Subject: [PATCH 33/37] drm/i915/debugfs: Show all objects in the gtt Useful for determining the layout. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 53 +++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 73914d841856..19a3d58044dd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -106,10 +106,19 @@ static const char *get_tiling_flag(struct drm_i915_gem_object *obj) } } +static const char *agp_type_str(int type) +{ + switch (type) { + case 0: return " uncached"; + case 1: return " snooped"; + default: return ""; + } +} + static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { - seq_printf(m, "%p: %s%s %8zd %04x %04x %d %d%s%s", + seq_printf(m, "%p: %s%s %8zd %04x %04x %d %d%s%s%s", &obj->base, get_pin_flag(obj), get_tiling_flag(obj), @@ -118,6 +127,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain, obj->last_rendering_seqno, obj->last_fenced_seqno, + agp_type_str(obj->agp_type == AGP_USER_CACHED_MEMORY), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) @@ -276,6 +286,37 @@ static int i915_gem_object_info(struct seq_file *m, void* data) return 0; } +static int i915_gem_gtt_info(struct seq_file *m, void* data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj; + size_t total_obj_size, total_gtt_size; + int count, ret; + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + total_obj_size = total_gtt_size = count = 0; + list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { + seq_printf(m, " "); + describe_obj(m, obj); + seq_printf(m, "\n"); + total_obj_size += obj->base.size; + total_gtt_size += obj->gtt_space->size; + count++; + } + + mutex_unlock(&dev->struct_mutex); + + seq_printf(m, "Total %d objects, %zu bytes, %zu GTT size\n", + count, total_obj_size, total_gtt_size); + + return 0; +} + static int i915_gem_pageflip_info(struct seq_file *m, void *data) { @@ -624,15 +665,6 @@ static const char *ring_str(int ring) } } -static const char *agp_type_str(int type) -{ - switch (type) { - case 0: return " uncached"; - case 1: return " snooped"; - default: return ""; - } -} - static const char *pin_flag(int pinned) { if (pinned > 0) @@ -1229,6 +1261,7 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor) static struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, + {"i915_gem_gtt", i915_gem_gtt_info, 0}, {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, {"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST}, {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST}, From 36cf17423095882ec0f8f2c04d1bd0ee812149df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Jan 2011 12:09:12 +0000 Subject: [PATCH 34/37] drm/i915/execbuffer: Correctly clear the current object list upon EFAULT Before releasing the lock in order to copy the relocation list from user pages, we need to drop all the object references as another thread may usurp and execute another batchbuffer before we reacquire the lock. However, the code was buggy and failed to clear the list... Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 97d5fbd8ea13..0445770cc23c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -622,7 +622,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, int i, total, ret; /* We may process another execbuffer during the unlock... */ - while (list_empty(objects)) { + while (!list_empty(objects)) { obj = list_first_entry(objects, struct drm_i915_gem_object, exec_list); @@ -665,7 +665,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, } /* reacquire the objects */ - INIT_LIST_HEAD(objects); eb_reset(eb); for (i = 0; i < count; i++) { struct drm_i915_gem_object *obj; @@ -1353,4 +1352,3 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, drm_free_large(exec2_list); return ret; } - From 092de6f225638ec300936bfcbdc67805733cc78c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Jan 2011 14:21:05 +0000 Subject: [PATCH 35/37] drm/i915/evict: Ensure we completely cleanup on failure ... and not leave the objects in a inconsistent state. Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_gem_evict.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 78b8cf90c922..3d39005540aa 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -127,9 +127,15 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, } /* Nothing found, clean up and bail out! */ - list_for_each_entry(obj, &unwind_list, exec_list) { + while (!list_empty(&unwind_list)) { + obj = list_first_entry(&unwind_list, + struct drm_i915_gem_object, + exec_list); + ret = drm_mm_scan_remove_block(obj->gtt_space); BUG_ON(ret); + + list_del_init(&obj->exec_list); drm_gem_object_unreference(&obj->base); } @@ -162,6 +168,7 @@ found: exec_list); if (ret == 0) ret = i915_gem_object_unbind(obj); + list_del_init(&obj->exec_list); drm_gem_object_unreference(&obj->base); } From 809b63349ce6eb6603e7dab482c642f28135a2db Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Jan 2011 17:33:15 +0000 Subject: [PATCH 36/37] drm/i915: If we hit OOM when allocating GTT pages, clear the aperture Rather than evicting an object at random, which is unlikely to alleviate the memory pressure sufficient to allow us to continue, zap the entire aperture. That should give the system long enough to recover and reap some pages from the evicted objects, forestalling the allocation error for the new object. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 87c2df714f66..3dfc848ff755 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2782,10 +2782,8 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->gtt_space = NULL; if (ret == -ENOMEM) { - /* first try to clear up some space from the GTT */ - ret = i915_gem_evict_something(dev, size, - alignment, - map_and_fenceable); + /* first try to reclaim some memory by clearing the GTT */ + ret = i915_gem_evict_everything(dev, false); if (ret) { /* now try to shrink everyone else */ if (gfpmask) { @@ -2793,7 +2791,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, goto search_free; } - return ret; + return -ENOMEM; } goto search_free; @@ -2808,9 +2806,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; - ret = i915_gem_evict_something(dev, size, - alignment, map_and_fenceable); - if (ret) + if (i915_gem_evict_everything(dev, false)) return ret; goto search_free; From 6fe4f14044f181e146cdc15485428f95fa541ce8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Jan 2011 17:35:37 +0000 Subject: [PATCH 37/37] drm/i915/execbuffer: Reorder binding of objects to favour restrictions As the mappable portion of the aperture is always a small subset at the start of the GTT, it is allocated preferentially by drm_mm. This is useful in case we ever need to map an object later. However, if you have a large object that can consume the entire mappable region of the GTT this prevents the batchbuffer from fitting and so causing an error. Instead allocate all those that require a mapping up front in order to improve the likelihood of finding sufficient space to bind them. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 72 ++++++++++++++-------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6130f77c26bf..385fc7ec39d3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -796,6 +796,7 @@ struct drm_i915_gem_object { */ struct hlist_node exec_node; unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; /** * Current offset of the object in GTT space. diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0445770cc23c..e69834341ef0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -268,7 +268,6 @@ eb_destroy(struct eb_objects *eb) static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *entry, struct drm_i915_gem_relocation_entry *reloc) { struct drm_device *dev = obj->base.dev; @@ -411,10 +410,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, - struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *entry) + struct eb_objects *eb) { struct drm_i915_gem_relocation_entry __user *user_relocs; + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; int i, ret; user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; @@ -426,7 +425,7 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, sizeof(reloc))) return -EFAULT; - ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &reloc); + ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); if (ret) return ret; @@ -442,13 +441,13 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *entry, struct drm_i915_gem_relocation_entry *relocs) { + const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; int i, ret; for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &relocs[i]); + ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); if (ret) return ret; } @@ -459,8 +458,7 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate(struct drm_device *dev, struct eb_objects *eb, - struct list_head *objects, - struct drm_i915_gem_exec_object2 *exec) + struct list_head *objects) { struct drm_i915_gem_object *obj; int ret; @@ -468,7 +466,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, list_for_each_entry(obj, objects, exec_list) { obj->base.pending_read_domains = 0; obj->base.pending_write_domain = 0; - ret = i915_gem_execbuffer_relocate_object(obj, eb, exec++); + ret = i915_gem_execbuffer_relocate_object(obj, eb); if (ret) return ret; } @@ -479,13 +477,36 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, - struct list_head *objects, - struct drm_i915_gem_exec_object2 *exec) + struct list_head *objects) { struct drm_i915_gem_object *obj; - struct drm_i915_gem_exec_object2 *entry; int ret, retry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; + struct list_head ordered_objects; + + INIT_LIST_HEAD(&ordered_objects); + while (!list_empty(objects)) { + struct drm_i915_gem_exec_object2 *entry; + bool need_fence, need_mappable; + + obj = list_first_entry(objects, + struct drm_i915_gem_object, + exec_list); + entry = obj->exec_entry; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = + entry->relocation_count ? true : need_fence; + + if (need_mappable) + list_move(&obj->exec_list, &ordered_objects); + else + list_move_tail(&obj->exec_list, &ordered_objects); + } + list_splice(&ordered_objects, objects); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -504,14 +525,11 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, ret = 0; /* Unbind any ill-fitting objects or pin. */ - entry = exec; list_for_each_entry(obj, objects, exec_list) { + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; - - if (!obj->gtt_space) { - entry++; + if (!obj->gtt_space) continue; - } need_fence = has_fenced_gpu_access && @@ -534,8 +552,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, } /* Bind fresh objects */ - entry = exec; list_for_each_entry(obj, objects, exec_list) { + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence; need_fence = @@ -570,7 +588,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, } entry->offset = obj->gtt_offset; - entry++; } /* Decrement pin count for bound objects */ @@ -680,10 +697,11 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, list_add_tail(&obj->exec_list, objects); obj->exec_handle = exec[i].handle; + obj->exec_entry = &exec[i]; eb_add_object(eb, obj); } - ret = i915_gem_execbuffer_reserve(ring, file, objects, exec); + ret = i915_gem_execbuffer_reserve(ring, file, objects); if (ret) goto err; @@ -692,7 +710,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, obj->base.pending_read_domains = 0; obj->base.pending_write_domain = 0; ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, - exec, reloc + total); if (ret) goto err; @@ -1110,16 +1127,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, list_add_tail(&obj->exec_list, &objects); obj->exec_handle = exec[i].handle; + obj->exec_entry = &exec[i]; eb_add_object(eb, obj); } + /* take note of the batch buffer before we might reorder the lists */ + batch_obj = list_entry(objects.prev, + struct drm_i915_gem_object, + exec_list); + /* Move the objects en-masse into the GTT, evicting if necessary. */ - ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec); + ret = i915_gem_execbuffer_reserve(ring, file, &objects); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ - ret = i915_gem_execbuffer_relocate(dev, eb, &objects, exec); + ret = i915_gem_execbuffer_relocate(dev, eb, &objects); if (ret) { if (ret == -EFAULT) { ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, @@ -1133,9 +1156,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - batch_obj = list_entry(objects.prev, - struct drm_i915_gem_object, - exec_list); if (batch_obj->base.pending_write_domain) { DRM_ERROR("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL;